Ver código fonte

[fix][mod] wikidata date handling refactor - fixes #387

Adam Tauber 9 anos atrás
pai
commit
362c849797
2 arquivos alterados com 36 adições e 26 exclusões
  1. 26
    8
      searx/engines/wikidata.py
  2. 10
    18
      searx/utils.py

+ 26
- 8
searx/engines/wikidata.py Ver arquivo

@@ -1,8 +1,15 @@
1 1
 import json
2
-from urllib import urlencode
2
+
3
+from searx import logger
3 4
 from searx.poolrequests import get
4 5
 from searx.utils import format_date_by_locale
5 6
 
7
+from datetime import datetime
8
+from dateutil.parser import parse as dateutil_parse
9
+from urllib import urlencode
10
+
11
+
12
+logger = logger.getChild('wikidata')
6 13
 result_count = 1
7 14
 wikidata_host = 'https://www.wikidata.org'
8 15
 wikidata_api = wikidata_host + '/w/api.php'
@@ -164,14 +171,12 @@ def getDetail(jsonresponse, wikidata_id, language, locale):
164 171
     if postal_code is not None:
165 172
         attributes.append({'label': 'Postal code(s)', 'value': postal_code})
166 173
 
167
-    date_of_birth = get_time(claims, 'P569', None)
174
+    date_of_birth = get_time(claims, 'P569', locale, None)
168 175
     if date_of_birth is not None:
169
-        date_of_birth = format_date_by_locale(date_of_birth[8:], locale)
170 176
         attributes.append({'label': 'Date of birth', 'value': date_of_birth})
171 177
 
172
-    date_of_death = get_time(claims, 'P570', None)
178
+    date_of_death = get_time(claims, 'P570', locale, None)
173 179
     if date_of_death is not None:
174
-        date_of_death = format_date_by_locale(date_of_death[8:], locale)
175 180
         attributes.append({'label': 'Date of death', 'value': date_of_death})
176 181
 
177 182
     if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
@@ -229,7 +234,7 @@ def get_string(claims, propertyName, defaultValue=None):
229 234
         return result[0]
230 235
 
231 236
 
232
-def get_time(claims, propertyName, defaultValue=None):
237
+def get_time(claims, propertyName, locale, defaultValue=None):
233 238
     propValue = claims.get(propertyName, {})
234 239
     if len(propValue) == 0:
235 240
         return defaultValue
@@ -244,9 +249,22 @@ def get_time(claims, propertyName, defaultValue=None):
244 249
             result.append(value.get('time', ''))
245 250
 
246 251
     if len(result) == 0:
247
-        return defaultValue
252
+        date_string = defaultValue
248 253
     else:
249
-        return ', '.join(result)
254
+        date_string = ', '.join(result)
255
+
256
+    try:
257
+        parsed_date = datetime.strptime(date_string, "+%Y-%m-%dT%H:%M:%SZ")
258
+    except:
259
+        if date_string.startswith('-'):
260
+            return date_string.split('T')[0]
261
+        try:
262
+            parsed_date = dateutil_parse(date_string, fuzzy=False, default=False)
263
+        except:
264
+            logger.debug('could not parse date %s', date_string)
265
+            return date_string.split('T')[0]
266
+
267
+    return format_date_by_locale(parsed_date, locale)
250 268
 
251 269
 
252 270
 def get_geolink(claims, propertyName, defaultValue=''):

+ 10
- 18
searx/utils.py Ver arquivo

@@ -1,11 +1,10 @@
1 1
 # import htmlentitydefs
2
-import locale
3
-import dateutil.parser
4 2
 import cStringIO
5 3
 import csv
6 4
 import os
7 5
 import re
8 6
 
7
+from babel.dates import format_date
9 8
 from codecs import getincrementalencoder
10 9
 from HTMLParser import HTMLParser
11 10
 from random import choice
@@ -195,23 +194,16 @@ def get_result_templates(base_path):
195 194
     return result_templates
196 195
 
197 196
 
198
-def format_date_by_locale(date_string, locale_string):
197
+def format_date_by_locale(date, locale_string):
199 198
     # strftime works only on dates after 1900
200
-    parsed_date = dateutil.parser.parse(date_string)
201
-    if parsed_date.year <= 1900:
202
-        return parsed_date.isoformat().split('T')[0]
203
-
204
-    orig_locale = locale.getlocale()[0]
205
-    try:
206
-        locale.setlocale(locale.LC_ALL, locale_string)
207
-    except:
208
-        logger.warning('cannot set locale: {0}'.format(locale_string))
209
-    formatted_date = parsed_date.strftime(locale.nl_langinfo(locale.D_FMT))
210
-    try:
211
-        locale.setlocale(locale.LC_ALL, orig_locale)
212
-    except:
213
-        logger.warning('cannot set original locale: {0}'.format(orig_locale))
214
-    return formatted_date
199
+
200
+    if date.year <= 1900:
201
+        return date.isoformat().split('T')[0]
202
+
203
+    if locale_string == 'all':
204
+        locale_string = settings['ui']['default_locale'] or 'en_US'
205
+
206
+    return format_date(date, locale=locale_string)
215 207
 
216 208
 
217 209
 def dict_subset(d, properties):