Browse Source

add google videos

marc 7 years ago
parent
commit
856dfc3018
3 changed files with 147 additions and 0 deletions
  1. 83
    0
      searx/engines/google_videos.py
  2. 4
    0
      searx/settings.yml
  3. 60
    0
      tests/unit/engines/test_google_videos.py

+ 83
- 0
searx/engines/google_videos.py View File

@@ -0,0 +1,83 @@
1
+"""
2
+ Google (Videos)
3
+
4
+ @website     https://www.google.com
5
+ @provide-api yes (https://developers.google.com/custom-search/)
6
+
7
+ @using-api   no
8
+ @results     HTML
9
+ @stable      no
10
+ @parse       url, title, content
11
+"""
12
+
13
+from datetime import date, timedelta
14
+from json import loads
15
+from lxml import html
16
+from searx.engines.xpath import extract_text
17
+from searx.url_utils import urlencode
18
+
19
+
20
+# engine dependent config
21
+categories = ['videos']
22
+paging = True
23
+safesearch = True
24
+time_range_support = True
25
+number_of_results = 10
26
+
27
+search_url = 'https://www.google.com/search'\
28
+    '?{query}'\
29
+    '&tbm=vid'\
30
+    '&{search_options}'
31
+time_range_attr = "qdr:{range}"
32
+time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
33
+time_range_dict = {'day': 'd',
34
+                   'week': 'w',
35
+                   'month': 'm'}
36
+
37
+
38
+# do search-request
39
+def request(query, params):
40
+    search_options = {
41
+        'ijn': params['pageno'] - 1,
42
+        'start': (params['pageno'] - 1) * number_of_results
43
+    }
44
+
45
+    if params['time_range'] in time_range_dict:
46
+        search_options['tbs'] = time_range_attr.format(range=time_range_dict[params['time_range']])
47
+    elif params['time_range'] == 'year':
48
+        now = date.today()
49
+        then = now - timedelta(days=365)
50
+        start = then.strftime('%m/%d/%Y')
51
+        end = now.strftime('%m/%d/%Y')
52
+        search_options['tbs'] = time_range_custom_attr.format(start=start, end=end)
53
+
54
+    if safesearch and params['safesearch']:
55
+        search_options['safe'] = 'on'
56
+
57
+    params['url'] = search_url.format(query=urlencode({'q': query}),
58
+                                      search_options=urlencode(search_options))
59
+
60
+    return params
61
+
62
+
63
+# get response from search-request
64
+def response(resp):
65
+    results = []
66
+
67
+    dom = html.fromstring(resp.text)
68
+
69
+    # parse results
70
+    for result in dom.xpath('//div[@class="g"]'):
71
+
72
+        title = extract_text(result.xpath('.//h3/a'))
73
+        url = result.xpath('.//h3/a/@href')[0]
74
+        content = extract_text(result.xpath('.//span[@class="st"]'))
75
+
76
+        # append result
77
+        results.append({'url': url,
78
+                        'title': title,
79
+                        'content': content,
80
+                        'thumbnail': '',
81
+                        'template': 'videos.html'})
82
+
83
+    return results

+ 4
- 0
searx/settings.yml View File

@@ -266,6 +266,10 @@ engines:
266 266
     engine : google_news
267 267
     shortcut : gon
268 268
 
269
+  - name : google videos
270
+    engine : google_videos
271
+    shortcut : gov
272
+
269 273
   - name : google scholar
270 274
     engine : xpath
271 275
     paging : True

+ 60
- 0
tests/unit/engines/test_google_videos.py View File

@@ -0,0 +1,60 @@
1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import google_videos
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestGoogleVideosEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 1
13
+        dicto['safesearch'] = 1
14
+        dicto['time_range'] = ''
15
+        params = google_videos.request(query, dicto)
16
+        self.assertIn('url', params)
17
+        self.assertIn(query, params['url'])
18
+
19
+        dicto['safesearch'] = 0
20
+        params = google_videos.request(query, dicto)
21
+        self.assertNotIn('safe', params['url'])
22
+
23
+    def test_response(self):
24
+        self.assertRaises(AttributeError, google_videos.response, None)
25
+        self.assertRaises(AttributeError, google_videos.response, [])
26
+        self.assertRaises(AttributeError, google_videos.response, '')
27
+        self.assertRaises(AttributeError, google_videos.response, '[]')
28
+
29
+        html = r"""
30
+        <div>
31
+            <div>
32
+                <div class="g">
33
+                    <div>
34
+                        <h3><a href="url_1">Title 1</h3>
35
+                    </div>
36
+                    <div>
37
+                        <span class="st">Content 1</span>
38
+                    </div>
39
+                </div>
40
+                <div class="g">
41
+                    <div>
42
+                        <h3><a href="url_2">Title 2</h3>
43
+                    </div>
44
+                    <div>
45
+                        <span class="st">Content 2</span>
46
+                    </div>
47
+                </div>
48
+            </div>
49
+        </div>
50
+        """
51
+        response = mock.Mock(text=html)
52
+        results = google_videos.response(response)
53
+        self.assertEqual(type(results), list)
54
+        self.assertEqual(len(results), 2)
55
+        self.assertEqual(results[0]['url'], u'url_1')
56
+        self.assertEqual(results[0]['title'], u'Title 1')
57
+        self.assertEqual(results[0]['content'], u'Content 1')
58
+        self.assertEqual(results[1]['url'], u'url_2')
59
+        self.assertEqual(results[1]['title'], u'Title 2')
60
+        self.assertEqual(results[1]['content'], u'Content 2')