Browse Source

[fix] make search requests on wikidata more accurate

Marc Abonce Seguin 6 years ago
parent
commit
b12857a70d
2 changed files with 7 additions and 7 deletions
  1. 6
    7
      searx/engines/wikidata.py
  2. 1
    0
      searx/settings.yml

+ 6
- 7
searx/engines/wikidata.py View File

27
 # urls
27
 # urls
28
 wikidata_host = 'https://www.wikidata.org'
28
 wikidata_host = 'https://www.wikidata.org'
29
 url_search = wikidata_host \
29
 url_search = wikidata_host \
30
-    + '/wiki/Special:ItemDisambiguation?{query}'
30
+    + '/w/index.php?{query}'
31
 
31
 
32
 wikidata_api = wikidata_host + '/w/api.php'
32
 wikidata_api = wikidata_host + '/w/api.php'
33
 url_detail = wikidata_api\
33
 url_detail = wikidata_api\
40
 url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
40
 url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
41
 
41
 
42
 # xpaths
42
 # xpaths
43
-wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
43
+wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
44
 title_xpath = '//*[contains(@class,"wikibase-title-label")]'
44
 title_xpath = '//*[contains(@class,"wikibase-title-label")]'
45
 description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
45
 description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
46
 property_xpath = '//div[@id="{propertyid}"]'
46
 property_xpath = '//div[@id="{propertyid}"]'
57
 
57
 
58
 
58
 
59
 def request(query, params):
59
 def request(query, params):
60
-    language = match_language(params['language'], supported_languages).split('-')[0]
61
-
62
     params['url'] = url_search.format(
60
     params['url'] = url_search.format(
63
-        query=urlencode({'label': query, 'language': language}))
61
+        query=urlencode({'search': query}))
64
     return params
62
     return params
65
 
63
 
66
 
64
 
67
 def response(resp):
65
 def response(resp):
68
     results = []
66
     results = []
69
     html = fromstring(resp.text)
67
     html = fromstring(resp.text)
70
-    wikidata_ids = html.xpath(wikidata_ids_xpath)
68
+    search_results = html.xpath(wikidata_ids_xpath)
71
 
69
 
72
     language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
70
     language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
73
 
71
 
74
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
72
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
75
-    for wikidata_id in wikidata_ids[:result_count]:
73
+    for search_result in search_results[:result_count]:
74
+        wikidata_id = search_result.split('/')[-1]
76
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
75
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
77
         htmlresponse = get(url)
76
         htmlresponse = get(url)
78
         jsonresponse = loads(htmlresponse.text)
77
         jsonresponse = loads(htmlresponse.text)

+ 1
- 0
searx/settings.yml View File

174
   - name : wikidata
174
   - name : wikidata
175
     engine : wikidata
175
     engine : wikidata
176
     shortcut : wd
176
     shortcut : wd
177
+    timeout : 3.0
177
     weight : 2
178
     weight : 2
178
 
179
 
179
   - name : duckduckgo
180
   - name : duckduckgo