123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. # -*- coding: utf-8 -*-
  2. from lxml.html import fromstring
  3. from collections import defaultdict
  4. import mock
  5. from searx.engines import wikidata
  6. from searx.testing import SearxTestCase
  7. class TestWikidataEngine(SearxTestCase):
  8. def test_request(self):
  9. query = 'test_query'
  10. dicto = defaultdict(dict)
  11. params = wikidata.request(query, dicto)
  12. self.assertIn('url', params)
  13. self.assertIn(query, params['url'])
  14. self.assertIn('wikidata.org', params['url'])
  15. params = wikidata.request(query, dicto)
  16. self.assertIn(query, params['url'])
  17. # successful cases are not tested here to avoid sending additional requests
  18. def test_response(self):
  19. self.assertRaises(AttributeError, wikidata.response, None)
  20. self.assertRaises(AttributeError, wikidata.response, [])
  21. self.assertRaises(AttributeError, wikidata.response, '')
  22. self.assertRaises(AttributeError, wikidata.response, '[]')
  23. wikidata.supported_languages = ['en', 'es']
  24. response = mock.Mock(text='<html></html>', search_params={"language": "en"})
  25. self.assertEqual(wikidata.response(response), [])
  26. def test_getDetail(self):
  27. response = {}
  28. results = wikidata.getDetail(response, "Q123", "en", "en-US")
  29. self.assertEqual(results, [])
  30. title_html = '<div><div class="wikibase-title-label">Test</div></div>'
  31. html = """
  32. <div>
  33. <div class="wikibase-entitytermsview-heading-description">
  34. </div>
  35. <div>
  36. <ul class="wikibase-sitelinklistview-listview">
  37. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  38. </ul>
  39. </div>
  40. </div>
  41. """
  42. response = {"parse": {"displaytitle": title_html, "text": html}}
  43. results = wikidata.getDetail(response, "Q123", "en", "en-US")
  44. self.assertEqual(len(results), 1)
  45. self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Test')
  46. title_html = """
  47. <div>
  48. <div class="wikibase-title-label">
  49. <span lang="en">Test</span>
  50. <sup class="wb-language-fallback-indicator">English</sup>
  51. </div>
  52. </div>
  53. """
  54. html = """
  55. <div>
  56. <div class="wikibase-entitytermsview-heading-description">
  57. <span lang="en">Description</span>
  58. <sup class="wb-language-fallback-indicator">English</sup>
  59. </div>
  60. <div id="P856">
  61. <div class="wikibase-statementgroupview-property-label">
  62. <a href="/wiki/Property:P856">
  63. <span lang="en">official website</span>
  64. <sup class="wb-language-fallback-indicator">English</sup>
  65. </a>
  66. </div>
  67. <div class="wikibase-statementview-mainsnak">
  68. <a class="external free" href="https://officialsite.com">
  69. https://officialsite.com
  70. </a>
  71. </div>
  72. </div>
  73. <div>
  74. <ul class="wikibase-sitelinklistview-listview">
  75. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  76. </ul>
  77. </div>
  78. </div>
  79. """
  80. response = {"parse": {"displaytitle": title_html, "text": html}}
  81. results = wikidata.getDetail(response, "Q123", "yua", "yua_MX")
  82. self.assertEqual(len(results), 2)
  83. self.assertEqual(results[0]['title'], 'Official website')
  84. self.assertEqual(results[0]['url'], 'https://officialsite.com')
  85. self.assertEqual(results[1]['infobox'], 'Test')
  86. self.assertEqual(results[1]['id'], None)
  87. self.assertEqual(results[1]['content'], 'Description')
  88. self.assertEqual(results[1]['attributes'], [])
  89. self.assertEqual(results[1]['urls'][0]['title'], 'Official website')
  90. self.assertEqual(results[1]['urls'][0]['url'], 'https://officialsite.com')
  91. self.assertEqual(results[1]['urls'][1]['title'], 'Wikipedia (en)')
  92. self.assertEqual(results[1]['urls'][1]['url'], 'https://en.wikipedia.org/wiki/Test')
  93. def test_add_image(self):
  94. image_src = wikidata.add_image(fromstring("<div></div>"))
  95. self.assertEqual(image_src, None)
  96. html = u"""
  97. <div>
  98. <div id="P18">
  99. <div class="wikibase-statementgroupview-property-label">
  100. <a href="/wiki/Property:P18">
  101. image
  102. </a>
  103. </div>
  104. <div class="wikibase-statementlistview">
  105. <div class="wikibase-statementview listview-item">
  106. <div class="wikibase-statementview-rankselector">
  107. <span class="wikibase-rankselector-normal"></span>
  108. </div>
  109. <div class="wikibase-statementview-mainsnak">
  110. <div>
  111. <div class="wikibase-snakview-value">
  112. <a href="https://commons.wikimedia.org/wiki/File:image.png">
  113. image.png
  114. </a>
  115. </div>
  116. </div>
  117. </div>
  118. </div>
  119. </div>
  120. </div>
  121. </div>
  122. """
  123. html_etree = fromstring(html)
  124. image_src = wikidata.add_image(html_etree)
  125. self.assertEqual(image_src,
  126. "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500&height=400")
  127. html = u"""
  128. <div>
  129. <div id="P2910">
  130. <div class="wikibase-statementgroupview-property-label">
  131. <a href="/wiki/Property:P2910">
  132. icon
  133. </a>
  134. </div>
  135. <div class="wikibase-statementlistview">
  136. <div class="wikibase-statementview listview-item">
  137. <div class="wikibase-statementview-rankselector">
  138. <span class="wikibase-rankselector-normal"></span>
  139. </div>
  140. <div class="wikibase-statementview-mainsnak">
  141. <div>
  142. <div class="wikibase-snakview-value">
  143. <a href="https://commons.wikimedia.org/wiki/File:icon.png">
  144. icon.png
  145. </a>
  146. </div>
  147. </div>
  148. </div>
  149. </div>
  150. </div>
  151. </div>
  152. <div id="P154">
  153. <div class="wikibase-statementgroupview-property-label">
  154. <a href="/wiki/Property:P154">
  155. logo
  156. </a>
  157. </div>
  158. <div class="wikibase-statementlistview">
  159. <div class="wikibase-statementview listview-item">
  160. <div class="wikibase-statementview-rankselector">
  161. <span class="wikibase-rankselector-normal"></span>
  162. </div>
  163. <div class="wikibase-statementview-mainsnak">
  164. <div>
  165. <div class="wikibase-snakview-value">
  166. <a href="https://commons.wikimedia.org/wiki/File:logo.png">
  167. logo.png
  168. </a>
  169. </div>
  170. </div>
  171. </div>
  172. </div>
  173. </div>
  174. </div>
  175. </div>
  176. """
  177. html_etree = fromstring(html)
  178. image_src = wikidata.add_image(html_etree)
  179. self.assertEqual(image_src,
  180. "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500&height=400")
  181. def test_add_attribute(self):
  182. html = u"""
  183. <div>
  184. <div id="P27">
  185. <div class="wikibase-statementgroupview-property-label">
  186. <a href="/wiki/Property:P27">
  187. country of citizenship
  188. </a>
  189. </div>
  190. <div class="wikibase-statementlistview">
  191. <div class="wikibase-statementview listview-item">
  192. <div class="wikibase-statementview-rankselector">
  193. <span class="wikibase-rankselector-normal"></span>
  194. </div>
  195. <div class="wikibase-statementview-mainsnak">
  196. <div>
  197. <div class="wikibase-snakview-value">
  198. <a href="/wiki/Q145">
  199. United Kingdom
  200. </a>
  201. </div>
  202. </div>
  203. </div>
  204. </div>
  205. </div>
  206. </div>
  207. </div>
  208. """
  209. attributes = []
  210. html_etree = fromstring(html)
  211. wikidata.add_attribute(attributes, html_etree, "Fail")
  212. self.assertEqual(attributes, [])
  213. wikidata.add_attribute(attributes, html_etree, "P27")
  214. self.assertEqual(len(attributes), 1)
  215. self.assertEqual(attributes[0]["label"], "Country of citizenship")
  216. self.assertEqual(attributes[0]["value"], "United Kingdom")
  217. html = u"""
  218. <div>
  219. <div id="P569">
  220. <div class="wikibase-statementgroupview-property-label">
  221. <a href="/wiki/Property:P569">
  222. date of birth
  223. </a>
  224. </div>
  225. <div class="wikibase-statementlistview">
  226. <div class="wikibase-statementview listview-item">
  227. <div class="wikibase-statementview-rankselector">
  228. <span class="wikibase-rankselector-normal"></span>
  229. </div>
  230. <div class="wikibase-statementview-mainsnak">
  231. <div>
  232. <div class="wikibase-snakview-value">
  233. 27 January 1832
  234. <sup class="wb-calendar-name">
  235. Gregorian
  236. </sup>
  237. </div>
  238. </div>
  239. </div>
  240. </div>
  241. </div>
  242. </div>
  243. </div>
  244. """
  245. attributes = []
  246. html_etree = fromstring(html)
  247. wikidata.add_attribute(attributes, html_etree, "P569", date=True)
  248. self.assertEqual(len(attributes), 1)
  249. self.assertEqual(attributes[0]["label"], "Date of birth")
  250. self.assertEqual(attributes[0]["value"], "27 January 1832")
  251. html = u"""
  252. <div>
  253. <div id="P6">
  254. <div class="wikibase-statementgroupview-property-label">
  255. <a href="/wiki/Property:P27">
  256. head of government
  257. </a>
  258. </div>
  259. <div class="wikibase-statementlistview">
  260. <div class="wikibase-statementview listview-item">
  261. <div class="wikibase-statementview-rankselector">
  262. <span class="wikibase-rankselector-normal"></span>
  263. </div>
  264. <div class="wikibase-statementview-mainsnak">
  265. <div>
  266. <div class="wikibase-snakview-value">
  267. <a href="/wiki/Q206">
  268. Old Prime Minister
  269. </a>
  270. </div>
  271. </div>
  272. </div>
  273. </div>
  274. <div class="wikibase-statementview listview-item">
  275. <div class="wikibase-statementview-rankselector">
  276. <span class="wikibase-rankselector-preferred"></span>
  277. </div>
  278. <div class="wikibase-statementview-mainsnak">
  279. <div>
  280. <div class="wikibase-snakview-value">
  281. <a href="/wiki/Q3099714">
  282. Actual Prime Minister
  283. </a>
  284. </div>
  285. </div>
  286. </div>
  287. </div>
  288. </div>
  289. </div>
  290. </div>
  291. """
  292. attributes = []
  293. html_etree = fromstring(html)
  294. wikidata.add_attribute(attributes, html_etree, "P6")
  295. self.assertEqual(len(attributes), 1)
  296. self.assertEqual(attributes[0]["label"], "Head of government")
  297. self.assertEqual(attributes[0]["value"], "Old Prime Minister, Actual Prime Minister")
  298. attributes = []
  299. html_etree = fromstring(html)
  300. wikidata.add_attribute(attributes, html_etree, "P6", trim=True)
  301. self.assertEqual(len(attributes), 1)
  302. self.assertEqual(attributes[0]["value"], "Actual Prime Minister")
  303. def test_add_url(self):
  304. html = u"""
  305. <div>
  306. <div id="P856">
  307. <div class="wikibase-statementgroupview-property-label">
  308. <a href="/wiki/Property:P856">
  309. official website
  310. </a>
  311. </div>
  312. <div class="wikibase-statementlistview">
  313. <div class="wikibase-statementview listview-item">
  314. <div class="wikibase-statementview-mainsnak">
  315. <div>
  316. <div class="wikibase-snakview-value">
  317. <a class="external free" href="https://searx.me">
  318. https://searx.me/
  319. </a>
  320. </div>
  321. </div>
  322. </div>
  323. </div>
  324. </div>
  325. </div>
  326. </div>
  327. """
  328. urls = []
  329. html_etree = fromstring(html)
  330. wikidata.add_url(urls, html_etree, 'P856')
  331. self.assertEquals(len(urls), 1)
  332. self.assertIn({'title': 'Official website', 'url': 'https://searx.me/'}, urls)
  333. urls = []
  334. results = []
  335. wikidata.add_url(urls, html_etree, 'P856', 'custom label', results=results)
  336. self.assertEquals(len(urls), 1)
  337. self.assertEquals(len(results), 1)
  338. self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, urls)
  339. self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, results)
  340. html = u"""
  341. <div>
  342. <div id="P856">
  343. <div class="wikibase-statementgroupview-property-label">
  344. <a href="/wiki/Property:P856">
  345. official website
  346. </a>
  347. </div>
  348. <div class="wikibase-statementlistview">
  349. <div class="wikibase-statementview listview-item">
  350. <div class="wikibase-statementview-mainsnak">
  351. <div>
  352. <div class="wikibase-snakview-value">
  353. <a class="external free" href="http://www.worldofwarcraft.com">
  354. http://www.worldofwarcraft.com
  355. </a>
  356. </div>
  357. </div>
  358. </div>
  359. </div>
  360. <div class="wikibase-statementview listview-item">
  361. <div class="wikibase-statementview-mainsnak">
  362. <div>
  363. <div class="wikibase-snakview-value">
  364. <a class="external free" href="http://eu.battle.net/wow/en/">
  365. http://eu.battle.net/wow/en/
  366. </a>
  367. </div>
  368. </div>
  369. </div>
  370. </div>
  371. </div>
  372. </div>
  373. </div>
  374. """
  375. urls = []
  376. html_etree = fromstring(html)
  377. wikidata.add_url(urls, html_etree, 'P856')
  378. self.assertEquals(len(urls), 2)
  379. self.assertIn({'title': 'Official website', 'url': 'http://www.worldofwarcraft.com'}, urls)
  380. self.assertIn({'title': 'Official website', 'url': 'http://eu.battle.net/wow/en/'}, urls)
  381. def test_get_imdblink(self):
  382. html = u"""
  383. <div>
  384. <div class="wikibase-statementview-mainsnak">
  385. <div>
  386. <div class="wikibase-snakview-value">
  387. <a class="wb-external-id" href="http://www.imdb.com/tt0433664">
  388. tt0433664
  389. </a>
  390. </div>
  391. </div>
  392. </div>
  393. </div>
  394. """
  395. html_etree = fromstring(html)
  396. imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')
  397. html = u"""
  398. <div>
  399. <div class="wikibase-statementview-mainsnak">
  400. <div>
  401. <div class="wikibase-snakview-value">
  402. <a class="wb-external-id"
  403. href="href="http://tools.wmflabs.org/...http://www.imdb.com/&id=nm4915994"">
  404. nm4915994
  405. </a>
  406. </div>
  407. </div>
  408. </div>
  409. </div>
  410. """
  411. html_etree = fromstring(html)
  412. imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')
  413. self.assertIn('https://www.imdb.com/name/nm4915994', imdblink)
  414. def test_get_geolink(self):
  415. html = u"""
  416. <div>
  417. <div class="wikibase-statementview-mainsnak">
  418. <div>
  419. <div class="wikibase-snakview-value">
  420. 60°N, 40°E
  421. </div>
  422. </div>
  423. </div>
  424. </div>
  425. """
  426. html_etree = fromstring(html)
  427. geolink = wikidata.get_geolink(html_etree)
  428. self.assertIn('https://www.openstreetmap.org/', geolink)
  429. self.assertIn('lat=60&lon=40', geolink)
  430. html = u"""
  431. <div>
  432. <div class="wikibase-statementview-mainsnak">
  433. <div>
  434. <div class="wikibase-snakview-value">
  435. 34°35'59"S, 58°22'55"W
  436. </div>
  437. </div>
  438. </div>
  439. </div>
  440. """
  441. html_etree = fromstring(html)
  442. geolink = wikidata.get_geolink(html_etree)
  443. self.assertIn('https://www.openstreetmap.org/', geolink)
  444. self.assertIn('lat=-34.59', geolink)
  445. self.assertIn('lon=-58.38', geolink)
  446. def test_get_wikilink(self):
  447. html = """
  448. <div>
  449. <div>
  450. <ul class="wikibase-sitelinklistview-listview">
  451. <li data-wb-siteid="arwiki"><a href="http://ar.wikipedia.org/wiki/Test">Test</a></li>
  452. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  453. </ul>
  454. </div>
  455. <div>
  456. <ul class="wikibase-sitelinklistview-listview">
  457. <li data-wb-siteid="enwikiquote"><a href="https://en.wikiquote.org/wiki/Test">Test</a></li>
  458. </ul>
  459. </div>
  460. </div>
  461. """
  462. html_etree = fromstring(html)
  463. wikilink = wikidata.get_wikilink(html_etree, 'nowiki')
  464. self.assertEqual(wikilink, None)
  465. wikilink = wikidata.get_wikilink(html_etree, 'enwiki')
  466. self.assertEqual(wikilink, 'https://en.wikipedia.org/wiki/Test')
  467. wikilink = wikidata.get_wikilink(html_etree, 'arwiki')
  468. self.assertEqual(wikilink, 'https://ar.wikipedia.org/wiki/Test')
  469. wikilink = wikidata.get_wikilink(html_etree, 'enwikiquote')
  470. self.assertEqual(wikilink, 'https://en.wikiquote.org/wiki/Test')