test_wikipedia.py 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. import mock
  4. from searx.engines import wikipedia
  5. from searx.testing import SearxTestCase
  6. class TestWikipediaEngine(SearxTestCase):
  7. def test_request(self):
  8. query = 'test_query'
  9. dicto = defaultdict(dict)
  10. dicto['language'] = 'fr_FR'
  11. params = wikipedia.request(query, dicto)
  12. self.assertIn('url', params)
  13. self.assertIn(query, params['url'])
  14. self.assertIn('test_query', params['url'])
  15. self.assertIn('Test_Query', params['url'])
  16. self.assertIn('fr.wikipedia.org', params['url'])
  17. query = 'Test_Query'
  18. params = wikipedia.request(query, dicto)
  19. self.assertIn('Test_Query', params['url'])
  20. self.assertNotIn('test_query', params['url'])
  21. dicto['language'] = 'all'
  22. params = wikipedia.request(query, dicto)
  23. self.assertIn('en', params['url'])
  24. def test_response(self):
  25. dicto = defaultdict(dict)
  26. dicto['language'] = 'fr'
  27. self.assertRaises(AttributeError, wikipedia.response, None)
  28. self.assertRaises(AttributeError, wikipedia.response, [])
  29. self.assertRaises(AttributeError, wikipedia.response, '')
  30. self.assertRaises(AttributeError, wikipedia.response, '[]')
  31. # page not found
  32. json = """
  33. {
  34. "batchcomplete": "",
  35. "query": {
  36. "normalized": [],
  37. "pages": {
  38. "-1": {
  39. "ns": 0,
  40. "title": "",
  41. "missing": ""
  42. }
  43. }
  44. }
  45. }"""
  46. response = mock.Mock(content=json, search_params=dicto)
  47. self.assertEqual(wikipedia.response(response), [])
  48. # normal case
  49. json = """
  50. {
  51. "batchcomplete": "",
  52. "query": {
  53. "normalized": [],
  54. "pages": {
  55. "12345": {
  56. "pageid": 12345,
  57. "ns": 0,
  58. "title": "The Title",
  59. "extract": "The Title is...",
  60. "thumbnail": {
  61. "source": "img_src.jpg"
  62. },
  63. "pageimage": "img_name.jpg"
  64. }
  65. }
  66. }
  67. }"""
  68. response = mock.Mock(content=json, search_params=dicto)
  69. results = wikipedia.response(response)
  70. self.assertEqual(type(results), list)
  71. self.assertEqual(len(results), 2)
  72. self.assertEqual(results[0]['title'], u'The Title')
  73. self.assertIn('fr.wikipedia.org/wiki/The_Title', results[0]['url'])
  74. self.assertEqual(results[1]['infobox'], u'The Title')
  75. self.assertIn('fr.wikipedia.org/wiki/The_Title', results[1]['id'])
  76. self.assertIn('The Title is...', results[1]['content'])
  77. self.assertEqual(results[1]['img_src'], 'img_src.jpg')
  78. # disambiguation page
  79. json = """
  80. {
  81. "batchcomplete": "",
  82. "query": {
  83. "normalized": [],
  84. "pages": {
  85. "12345": {
  86. "pageid": 12345,
  87. "ns": 0,
  88. "title": "The Title",
  89. "extract": "The Title can be:\\nThe Title 1\\nThe Title 2\\nThe Title 3\\nThe Title 4......................................................................................................................................." """ # noqa
  90. json += """
  91. }
  92. }
  93. }
  94. }"""
  95. response = mock.Mock(content=json, search_params=dicto)
  96. results = wikipedia.response(response)
  97. self.assertEqual(type(results), list)
  98. self.assertEqual(len(results), 0)
  99. # no image
  100. json = """
  101. {
  102. "batchcomplete": "",
  103. "query": {
  104. "normalized": [],
  105. "pages": {
  106. "12345": {
  107. "pageid": 12345,
  108. "ns": 0,
  109. "title": "The Title",
  110. "extract": "The Title is......................................................................................................................................................................................." """ # noqa
  111. json += """
  112. }
  113. }
  114. }
  115. }"""
  116. response = mock.Mock(content=json, search_params=dicto)
  117. results = wikipedia.response(response)
  118. self.assertEqual(type(results), list)
  119. self.assertEqual(len(results), 2)
  120. self.assertIn('The Title is...', results[1]['content'])
  121. self.assertEqual(results[1]['img_src'], None)
  122. # title not in first paragraph
  123. json = u"""
  124. {
  125. "batchcomplete": "",
  126. "query": {
  127. "normalized": [],
  128. "pages": {
  129. "12345": {
  130. "pageid": 12345,
  131. "ns": 0,
  132. "title": "披頭四樂隊",
  133. "extract": "披头士乐队....................................................................................................................................................................................................\\n披頭四樂隊...", """ # noqa
  134. json += """
  135. "thumbnail": {
  136. "source": "img_src.jpg"
  137. },
  138. "pageimage": "img_name.jpg"
  139. }
  140. }
  141. }
  142. }"""
  143. response = mock.Mock(content=json, search_params=dicto)
  144. results = wikipedia.response(response)
  145. self.assertEqual(type(results), list)
  146. self.assertEqual(len(results), 2)
  147. self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
  148. self.assertIn(u'披头士乐队...', results[1]['content'])