Browse Source

[fix] use html result page in google images (previous endpoint stopped working)

Adam Tauber 6 years ago
parent
commit
57e7e9da98
1 changed files with 16 additions and 25 deletions
  1. 16
    25
      searx/engines/google_images.py

+ 16
- 25
searx/engines/google_images.py View File

13
 from datetime import date, timedelta
13
 from datetime import date, timedelta
14
 from json import loads
14
 from json import loads
15
 from lxml import html
15
 from lxml import html
16
-from searx.url_utils import urlencode
16
+from searx.url_utils import urlencode, urlparse, parse_qs
17
 
17
 
18
 
18
 
19
 # engine dependent config
19
 # engine dependent config
25
 
25
 
26
 search_url = 'https://www.google.com/search'\
26
 search_url = 'https://www.google.com/search'\
27
     '?{query}'\
27
     '?{query}'\
28
-    '&asearch=ichunk'\
29
-    '&async=_id:rg_s,_pms:s'\
30
     '&tbm=isch'\
28
     '&tbm=isch'\
31
-    '&yv=2'\
29
+    '&gbv=1'\
30
+    '&sa=G'\
32
     '&{search_options}'
31
     '&{search_options}'
33
 time_range_attr = "qdr:{range}"
32
 time_range_attr = "qdr:{range}"
34
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
33
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
66
 def response(resp):
65
 def response(resp):
67
     results = []
66
     results = []
68
 
67
 
69
-    g_result = loads(resp.text)
70
-
71
-    dom = html.fromstring(g_result[1][1])
68
+    dom = html.fromstring(resp.text)
72
 
69
 
73
     # parse results
70
     # parse results
74
-    for result in dom.xpath('//div[@data-ved]'):
75
-
76
-        try:
77
-            metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()')))
78
-        except:
79
-            continue
80
-
81
-        thumbnail_src = metadata['tu']
82
-
83
-        # http to https
84
-        thumbnail_src = thumbnail_src.replace("http://", "https://")
85
-
71
+    for img in dom.xpath('//a'):
72
+        r = {
73
+            'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')),
74
+            'content': '',
75
+            'template': 'images.html',
76
+        }
77
+        url = urlparse(img.xpath('.//@href')[0])
78
+        query = parse_qs(url.query)
79
+        r['url'] = query['imgrefurl'][0]
80
+        r['img_src'] = query['imgurl'][0]
81
+        r['thumbnail_src'] = r['img_src']
86
         # append result
82
         # append result
87
-        results.append({'url': metadata['ru'],
88
-                        'title': metadata['pt'],
89
-                        'content': metadata['s'],
90
-                        'thumbnail_src': thumbnail_src,
91
-                        'img_src': metadata['ou'],
92
-                        'template': 'images.html'})
83
+        results.append(r)
93
 
84
 
94
     # return results
85
     # return results
95
     return results
86
     return results