Explorar el Código

Merge pull request #1199 from kvch/fix-microsoft-academic

Fix Microsoft Academic engine
Adam Tauber hace 6 años
padre
commit
f5be8206c8
Ninguna cuenta está vinculada al correo electrónico del colaborador
Se han modificado 2 ficheros con 76 adiciones y 9 borrados
  1. 75
    0
      searx/engines/microsoft_academic.py
  2. 1
    9
      searx/settings.yml

+ 75
- 0
searx/engines/microsoft_academic.py Ver fichero

@@ -0,0 +1,75 @@
1
+"""
2
+Microsoft Academic (Science)
3
+
4
+@website     https://academic.microsoft.com
5
+@provide-api yes
6
+@using-api   no
7
+@results     JSON
8
+@stable      no
9
+@parse       url, title, content
10
+"""
11
+
12
+from datetime import datetime
13
+from json import loads
14
+from uuid import uuid4
15
+
16
+from searx.url_utils import urlencode
17
+from searx.utils import html_to_text
18
+
19
+categories = ['images']
20
+paging = True
21
+result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
22
+
23
+
24
+def request(query, params):
25
+    correlation_id = uuid4()
26
+    msacademic = uuid4()
27
+    time_now = datetime.now()
28
+
29
+    params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id}))
30
+    params['cookies']['msacademic'] = str(msacademic)
31
+    params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now))
32
+    params['method'] = 'POST'
33
+    params['data'] = {
34
+        'Query': '@{query}@'.format(query=query),
35
+        'Limit': 10,
36
+        'Offset': params['pageno'] - 1,
37
+        'Filters': '',
38
+        'OrderBy': '',
39
+        'SortAscending': False,
40
+    }
41
+
42
+    return params
43
+
44
+
45
+def response(resp):
46
+    results = []
47
+    response_data = loads(resp.text)
48
+
49
+    for result in response_data['results']:
50
+        url = _get_url(result)
51
+        title = result['e']['dn']
52
+        content = _get_content(result)
53
+        results.append({
54
+            'url': url,
55
+            'title': html_to_text(title),
56
+            'content': html_to_text(content),
57
+        })
58
+
59
+    return results
60
+
61
+
62
+def _get_url(result):
63
+    if 's' in result['e']:
64
+        return result['e']['s'][0]['u']
65
+    return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id'])
66
+
67
+
68
+def _get_content(result):
69
+    if 'd' in result['e']:
70
+        content = result['e']['d']
71
+        if len(content) > 300:
72
+            return content[:300] + '...'
73
+        return content
74
+
75
+    return ''

+ 1
- 9
searx/settings.yml Ver fichero

@@ -398,15 +398,7 @@ engines:
398 398
     shortcut : lo
399 399
 
400 400
   - name : microsoft academic
401
-    engine : json_engine
402
-    paging : True
403
-    search_url : https://academic.microsoft.com/api/search/GetEntityResults?query=%40{query}%40&filters=&offset={pageno}&limit=8&correlationId=undefined
404
-    results_query : results
405
-    url_query : u
406
-    title_query : dn
407
-    content_query : d
408
-    page_size : 8
409
-    first_page_num : 0
401
+    engine : microsoft_academic
410 402
     categories : science
411 403
     shortcut : ma
412 404