Browse Source

add findx engine for general, images and videos

Noémi Ványi 6 years ago
parent
commit
d04e471ce5
2 changed files with 137 additions and 0 deletions
  1. 119
    0
      searx/engines/findx.py
  2. 18
    0
      searx/settings.yml

+ 119
- 0
searx/engines/findx.py View File

@@ -0,0 +1,119 @@
1
+"""
2
+FindX (General, Images, Videos)
3
+
4
+@website     https://www.findx.com
5
+@provide-api no
6
+@using-api   no
7
+@results     HTML
8
+@stable      no
9
+@parse       url, title, content, embedded, img_src, thumbnail_src
10
+"""
11
+
12
+from dateutil import parser
13
+from json import loads
14
+import re
15
+
16
+from lxml import html
17
+
18
+from searx import logger
19
+from searx.engines.xpath import extract_text
20
+from searx.engines.youtube_noapi import base_youtube_url, embedded_url
21
+from searx.url_utils import urlencode
22
+
23
+
24
+paging = True
25
+results_xpath = '//script[@id="initial-state"]'
26
+search_url = 'https://www.findx.com/{category}?{q}'
27
+type_map = {
28
+    'none': 'web',
29
+    'general': 'web',
30
+    'images': 'images',
31
+    'videos': 'videos',
32
+}
33
+
34
+
35
+def request(query, params):
36
+    category = 'general'
37
+    if 'category' in params and len(params['category']) == 1:
38
+        category = params['category'][0]
39
+
40
+    params['url'] = search_url.format(
41
+        category=type_map[category],
42
+        q=urlencode({
43
+            'q': query,
44
+            'page': params['pageno']
45
+        })
46
+    )
47
+    return params
48
+
49
+
50
+def response(resp):
51
+    dom = html.fromstring(resp.text)
52
+    results_raw_json = dom.xpath(results_xpath)
53
+    results_json = loads(extract_text(results_raw_json))
54
+
55
+    if len(results_json['web']['results']) > 0:
56
+        return _general_results(results_json['web']['results'])
57
+
58
+    if len(results_json['images']['results']) > 0:
59
+        return _images_results(results_json['images']['results'])
60
+
61
+    if len(results_json['video']['results']) > 0:
62
+        return _videos_results(results_json['video']['results'])
63
+
64
+    return []
65
+
66
+
67
+def _general_results(general_results):
68
+    results = []
69
+    for result in general_results:
70
+        results.append({
71
+            'url': result['url'],
72
+            'title': result['title'],
73
+            'content': result['sum'],
74
+        })
75
+    return results
76
+
77
+
78
+def _images_results(image_results):
79
+    results = []
80
+    for result in image_results:
81
+        results.append({
82
+            'url': result['sourceURL'],
83
+            'title': result['title'],
84
+            'content': result['source'],
85
+            'thumbnail_src': _extract_url(result['assets']['thumb']['url']),
86
+            'img_src': _extract_url(result['assets']['file']['url']),
87
+            'template': 'images.html',
88
+        })
89
+    return results
90
+
91
+
92
+def _videos_results(video_results):
93
+    results = []
94
+    for result in video_results:
95
+        if not result['kind'].startswith('youtube'):
96
+            logger.warn('Unknown video kind in findx: {}'.format(result['kind']))
97
+            continue
98
+
99
+        description = result['snippet']['description']
100
+        if len(description) > 300:
101
+            description = description[:300] + '...'
102
+
103
+        results.append({
104
+            'url': base_youtube_url + result['id'],
105
+            'title': result['snippet']['title'],
106
+            'content': description,
107
+            'thumbnail': _extract_url(result['snippet']['thumbnails']['default']['url']),
108
+            'publishedDate': parser.parse(result['snippet']['publishedAt']),
109
+            'embedded': embedded_url.format(videoid=result['id']),
110
+            'template': 'videos.html',
111
+        })
112
+    return results
113
+
114
+
115
+def _extract_url(url):
116
+    matching = re.search('(/https?://[^)]+)', url)
117
+    if matching:
118
+        return matching.group(0)[1:]
119
+    return ''

+ 18
- 0
searx/settings.yml View File

@@ -218,6 +218,24 @@ engines:
218 218
     shortcut : fd
219 219
     disabled : True
220 220
 
221
+  - name : findx
222
+    engine : findx
223
+    shortcut : fx
224
+    categories : general
225
+    disabled : True
226
+
227
+  - name : findx images
228
+    engine : findx
229
+    shortcut : fxi
230
+    categories : images
231
+    disabled : True
232
+
233
+  - name : findx videos
234
+    engine : findx
235
+    shortcut : fxv
236
+    categories : videos
237
+    disabled : True
238
+
221 239
   - name : flickr
222 240
     categories : images
223 241
     shortcut : fl