|
|
|
|
27
|
# urls
|
27
|
# urls
|
28
|
wikidata_host = 'https://www.wikidata.org'
|
28
|
wikidata_host = 'https://www.wikidata.org'
|
29
|
url_search = wikidata_host \
|
29
|
url_search = wikidata_host \
|
30
|
- + '/wiki/Special:ItemDisambiguation?{query}'
|
|
|
|
|
30
|
+ + '/w/index.php?{query}'
|
31
|
|
31
|
|
32
|
wikidata_api = wikidata_host + '/w/api.php'
|
32
|
wikidata_api = wikidata_host + '/w/api.php'
|
33
|
url_detail = wikidata_api\
|
33
|
url_detail = wikidata_api\
|
|
|
|
|
40
|
url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
|
40
|
url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
|
41
|
|
41
|
|
42
|
# xpaths
|
42
|
# xpaths
|
43
|
-wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
|
|
|
|
|
43
|
+wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
|
44
|
title_xpath = '//*[contains(@class,"wikibase-title-label")]'
|
44
|
title_xpath = '//*[contains(@class,"wikibase-title-label")]'
|
45
|
description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
|
45
|
description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
|
46
|
property_xpath = '//div[@id="{propertyid}"]'
|
46
|
property_xpath = '//div[@id="{propertyid}"]'
|
|
|
|
|
57
|
|
57
|
|
58
|
|
58
|
|
59
|
def request(query, params):
|
59
|
def request(query, params):
|
60
|
- language = match_language(params['language'], supported_languages).split('-')[0]
|
|
|
61
|
-
|
|
|
62
|
params['url'] = url_search.format(
|
60
|
params['url'] = url_search.format(
|
63
|
- query=urlencode({'label': query, 'language': language}))
|
|
|
|
|
61
|
+ query=urlencode({'search': query}))
|
64
|
return params
|
62
|
return params
|
65
|
|
63
|
|
66
|
|
64
|
|
67
|
def response(resp):
|
65
|
def response(resp):
|
68
|
results = []
|
66
|
results = []
|
69
|
html = fromstring(resp.text)
|
67
|
html = fromstring(resp.text)
|
70
|
- wikidata_ids = html.xpath(wikidata_ids_xpath)
|
|
|
|
|
68
|
+ search_results = html.xpath(wikidata_ids_xpath)
|
71
|
|
69
|
|
72
|
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
|
70
|
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
|
73
|
|
71
|
|
74
|
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
72
|
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
75
|
- for wikidata_id in wikidata_ids[:result_count]:
|
|
|
|
|
73
|
+ for search_result in search_results[:result_count]:
|
|
|
74
|
+ wikidata_id = search_result.split('/')[-1]
|
76
|
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
75
|
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
77
|
htmlresponse = get(url)
|
76
|
htmlresponse = get(url)
|
78
|
jsonresponse = loads(htmlresponse.text)
|
77
|
jsonresponse = loads(htmlresponse.text)
|