Browse Source

Merge branch 'master' of github.com:nicholasks/searx

Nicholas Kegler 6 years ago
parent
commit
f34733c3fe
4 changed files with 18 additions and 41 deletions
  1. 1
    1
      searx/engines/base.py
  2. 16
    25
      searx/engines/google_images.py
  3. 1
    0
      searx/utils.py
  4. 0
    15
      tests/unit/engines/test_google_images.py

+ 1
- 1
searx/engines/base.py View File

55
 def request(query, params):
55
 def request(query, params):
56
     # replace shortcuts with API advanced search keywords
56
     # replace shortcuts with API advanced search keywords
57
     for key in shorcut_dict.keys():
57
     for key in shorcut_dict.keys():
58
-        query = re.sub(str(key), str(shorcut_dict[key]), query)
58
+        query = re.sub(key, shorcut_dict[key], str(query))
59
 
59
 
60
     # basic search
60
     # basic search
61
     offset = (params['pageno'] - 1) * number_of_results
61
     offset = (params['pageno'] - 1) * number_of_results

+ 16
- 25
searx/engines/google_images.py View File

13
 from datetime import date, timedelta
13
 from datetime import date, timedelta
14
 from json import loads
14
 from json import loads
15
 from lxml import html
15
 from lxml import html
16
-from searx.url_utils import urlencode
16
+from searx.url_utils import urlencode, urlparse, parse_qs
17
 
17
 
18
 
18
 
19
 # engine dependent config
19
 # engine dependent config
25
 
25
 
26
 search_url = 'https://www.google.com/search'\
26
 search_url = 'https://www.google.com/search'\
27
     '?{query}'\
27
     '?{query}'\
28
-    '&asearch=ichunk'\
29
-    '&async=_id:rg_s,_pms:s'\
30
     '&tbm=isch'\
28
     '&tbm=isch'\
31
-    '&yv=2'\
29
+    '&gbv=1'\
30
+    '&sa=G'\
32
     '&{search_options}'
31
     '&{search_options}'
33
 time_range_attr = "qdr:{range}"
32
 time_range_attr = "qdr:{range}"
34
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
33
 time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
66
 def response(resp):
65
 def response(resp):
67
     results = []
66
     results = []
68
 
67
 
69
-    g_result = loads(resp.text)
70
-
71
-    dom = html.fromstring(g_result[1][1])
68
+    dom = html.fromstring(resp.text)
72
 
69
 
73
     # parse results
70
     # parse results
74
-    for result in dom.xpath('//div[@data-ved]'):
75
-
76
-        try:
77
-            metadata = loads(''.join(result.xpath('./div[contains(@class, "rg_meta")]/text()')))
78
-        except:
79
-            continue
80
-
81
-        thumbnail_src = metadata['tu']
82
-
83
-        # http to https
84
-        thumbnail_src = thumbnail_src.replace("http://", "https://")
85
-
71
+    for img in dom.xpath('//a'):
72
+        r = {
73
+            'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')),
74
+            'content': '',
75
+            'template': 'images.html',
76
+        }
77
+        url = urlparse(img.xpath('.//@href')[0])
78
+        query = parse_qs(url.query)
79
+        r['url'] = query['imgrefurl'][0]
80
+        r['img_src'] = query['imgurl'][0]
81
+        r['thumbnail_src'] = r['img_src']
86
         # append result
82
         # append result
87
-        results.append({'url': metadata['ru'],
88
-                        'title': metadata['pt'],
89
-                        'content': metadata['s'],
90
-                        'thumbnail_src': thumbnail_src,
91
-                        'img_src': metadata['ou'],
92
-                        'template': 'images.html'})
83
+        results.append(r)
93
 
84
 
94
     # return results
85
     # return results
95
     return results
86
     return results

+ 1
- 0
searx/utils.py View File

33
     unichr = chr
33
     unichr = chr
34
     unicode = str
34
     unicode = str
35
     IS_PY2 = False
35
     IS_PY2 = False
36
+    basestring = str
36
 else:
37
 else:
37
     IS_PY2 = True
38
     IS_PY2 = True
38
 
39
 

+ 0
- 15
tests/unit/engines/test_google_images.py View File

25
         self.assertRaises(AttributeError, google_images.response, [])
25
         self.assertRaises(AttributeError, google_images.response, [])
26
         self.assertRaises(AttributeError, google_images.response, '')
26
         self.assertRaises(AttributeError, google_images.response, '')
27
         self.assertRaises(AttributeError, google_images.response, '[]')
27
         self.assertRaises(AttributeError, google_images.response, '[]')
28
-
29
-        html = r"""
30
-["rg_s",["dom","\u003Cstyle\u003E.rg_kn,.rg_s{}.rg_bx{display:-moz-inline-box;display:inline-block;margin-top:0;margin-right:12px;margin-bottom:12px;margin-left:0;overflow:hidden;position:relative;vertical-align:top;z-index:1}.rg_meta{display:none}.rg_l{display:inline-block;height:100%;position:absolute;text-decoration:none;width:100%}.rg_l:focus{outline:0}.rg_i{border:0;color:rgba(0,0,0,0);display:block;-webkit-touch-callout:none;}.rg_an,.rg_anbg,.rg_ilm,.rg_ilmbg{right:0;bottom:0;box-sizing:border-box;-moz-box-sizing:border-box;color:#fff;font:normal 11px arial,sans-serif;line-height:100%;white-space:nowrap;width:100%}.rg_anbg,.rg_ilmbg{background:rgba(51,51,51,0.8);margin-left:0;padding:2px 4px;position:absolute}.rg_ilmn{bottom:0;display:block;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.rg_ilm{display:none}#rg_s.rg_kn .rg_l:focus .rg_ilm{display:block}.rg_kn .rg_bx:hover .rg_ilm,.rg_bx:hover .rg_anbg{display:none}.rg_bx:hover .rg_ilm,.rg_anbg,.rg_kn .rg_bx:hover .rg_anbg{display:block}\u003C\/style\u003E\u003Cdiv eid=\"qlKuV-T3BoqksAHMnaroAw\" id=\"isr_scm_0\" style=\"display:none\"\u003E\u003C\/div\u003E\u003Cdiv data-cei=\"qlKuV-T3BoqksAHMnaroAw\" class=\"rg_add_chunk\"\u003E\u003C!--m--\u003E\u003Cdiv class=\"rg_di rg_bx rg_el ivg-i\" data-ved=\"0ahUKEwjk9PCm-7zOAhUKEiwKHcyOCj0QMwgCKAAwAA\"\u003E\u003Ca jsaction=\"fire.ivg_o;mouseover:str.hmov;mouseout:str.hmou\" class=\"rg_l\" style=\"background:rgb(170,205,240)\"\u003E\u003Cimg data-sz=\"f\" name=\"5eykIeMjmCk7xM:\" src=\"https:\/\/encrypted-tbn0.gstatic.com\/images?q=tbn\" class=\"rg_i rg_ic\" alt=\"Image result for south\" jsaction=\"load:str.tbn\" onload=\"google.aft\u0026\u0026google.aft(this)\"\u003E\u003Cdiv class=\"_aOd rg_ilm\"\u003E\u003Cdiv class=\"rg_ilmbg\"\u003E\u003Cspan class=\"rg_ilmn\"\u003E 566\u0026nbsp;\u0026#215;\u0026nbsp;365 - en.wikipedia.org \u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/a\u003E\u003Cdiv class=\"rg_meta\"\u003E{\"id\":\"5eykIeMjmCk7xM:\",\"isu\":\"en.wikipedia.org\",\"itg\":false,\"ity\":\"png\",\"oh\":365,\"ou\":\"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/e\/e4\/Us_south_census.png\",\"ow\":566,\"pt\":\"Southern United States - Wikipedia, the free encyclopedia\",\"rid\":\"cErfE02-v-VcAM\",\"ru\":\"https:\/\/en.wikipedia.org\/wiki\/Southern_United_States\",\"s\":\"The Southern United States as defined by the United States Census Bureau.\",\"sc\":1,\"th\":180,\"tu\":\"https:\/\/encrypted-tbn0.gstatic.com\/images?q\\u003dtbn\",\"tw\":280}\u003C\/div\u003E\u003C\/div\u003E\u003C!--n--\u003E\u003C!--m--\u003E\u003Cdiv class=\"rg_di rg_bx rg_el ivg-i\" data-ved=\"0ahUKEwjk9PCm-7zOAhUKEiwKHcyOCj0QMwgDKAEwAQ\"\u003E\u003Ca jsaction=\"fire.ivg_o;mouseover:str.hmov;mouseout:str.hmou\" class=\"rg_l\" style=\"background:rgb(249,252,249)\"\u003E\u003Cimg data-sz=\"f\" name=\"eRjGCc0cFyVkKM:\" src=\"https:\/\/encrypted-tbn2.gstatic.com\/images?q=tbn:ANd9GcSI7SZlbDwdMCgGXzJkpwgdn9uL41xUJ1IiIcKs0qW43_Yp0EhEsg\" class=\"rg_i rg_ic\" alt=\"Image result for south\" jsaction=\"load:str.tbn\" onload=\"google.aft\u0026\u0026google.aft(this)\"\u003E\u003Cdiv class=\"_aOd rg_ilm\"\u003E\u003Cdiv class=\"rg_ilmbg\"\u003E\u003Cspan class=\"rg_ilmn\"\u003E 2000\u0026nbsp;\u0026#215;\u0026nbsp;1002 - commons.wikimedia.org \u003C\/span\u003E\u003C\/div\u003E\u003C\/div\u003E\u003C\/a\u003E\u003Cdiv class=\"rg_meta\"\u003E{\"id\":\"eRjGCc0cFyVkKM:\",\"isu\":\"commons.wikimedia.org\",\"itg\":false,\"ity\":\"png\",\"oh\":1002,\"ou\":\"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/8\/84\/South_plate.svg\/2000px-South_plate.svg.png\",\"ow\":2000,\"pt\":\"File:South plate.svg - Wikimedia Commons\",\"rid\":\"F8TVsT2GBLb6RM\",\"ru\":\"https:\/\/commons.wikimedia.org\/wiki\/File:South_plate.svg\",\"s\":\"This image rendered as PNG in other widths: 200px, 500px, 1000px, 2000px.\",\"sc\":1,\"th\":159,\"tu\":\"https:\/\/encrypted-tbn2.gstatic.com\/images?q\\u003dtbn:ANd9GcSI7SZlbDwdMCgGXzJkpwgdn9uL41xUJ1IiIcKs0qW43_Yp0EhEsg\",\"tw\":317}\u003C\/div\u003E\u003C\/div\u003E\u003C!--n--\u003E\u003C\/div\u003E"]]"""  # noqa
31
-        response = mock.Mock(text=html)
32
-        results = google_images.response(response)
33
-        self.assertEqual(type(results), list)
34
-        self.assertEqual(len(results), 2)
35
-        self.assertEqual(results[0]['title'], u'Southern United States - Wikipedia, the free encyclopedia')
36
-        self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Southern_United_States')
37
-        self.assertEqual(results[0]['img_src'],
38
-                         'https://upload.wikimedia.org/wikipedia/commons/e/e4/Us_south_census.png')
39
-        self.assertEqual(results[0]['content'],
40
-                         'The Southern United States as defined by the United States Census Bureau.')
41
-        self.assertEqual(results[0]['thumbnail_src'],
42
-                         'https://encrypted-tbn0.gstatic.com/images?q=tbn')