Browse Source

[mod] fetch firefox versions in a standalone script

Alexandre Flament 7 years ago
parent
commit
0f4ac029e5
3 changed files with 92 additions and 19 deletions
  1. 15
    0
      searx/data/useragents.json
  2. 8
    19
      searx/utils.py
  3. 69
    0
      utils/fetch_firefox_version.py

+ 15
- 0
searx/data/useragents.json View File

1
+{
2
+    "os": [
3
+        "Windows NT 10; WOW64", 
4
+        "X11; Linux x86_64"
5
+    ], 
6
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", 
7
+    "versions": [
8
+        "59.0.2", 
9
+        "59.0.1", 
10
+        "59.0", 
11
+        "58.0.2", 
12
+        "58.0.1", 
13
+        "58.0"
14
+    ]
15
+}

+ 8
- 19
searx/utils.py View File

9
 from imp import load_source
9
 from imp import load_source
10
 from numbers import Number
10
 from numbers import Number
11
 from os.path import splitext, join
11
 from os.path import splitext, join
12
+from io import open
12
 from random import choice
13
 from random import choice
13
 import sys
14
 import sys
15
+import json
14
 
16
 
15
 from searx.version import VERSION_STRING
17
 from searx.version import VERSION_STRING
16
 from searx.languages import language_codes
18
 from searx.languages import language_codes
36
 
38
 
37
 logger = logger.getChild('utils')
39
 logger = logger.getChild('utils')
38
 
40
 
39
-ua_versions = ('40.0',
40
-               '41.0',
41
-               '42.0',
42
-               '43.0',
43
-               '44.0',
44
-               '45.0',
45
-               '46.0',
46
-               '47.0')
47
-
48
-ua_os = ('Windows NT 6.3; WOW64',
49
-         'X11; Linux x86_64',
50
-         'X11; Linux x86')
51
-
52
-ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
53
-
54
 blocked_tags = ('script',
41
 blocked_tags = ('script',
55
                 'style')
42
                 'style')
56
 
43
 
57
-
58
-def gen_useragent():
59
-    # TODO
60
-    return ua.format(os=choice(ua_os), version=choice(ua_versions))
44
+useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
45
+                             + "/data/useragents.json", 'r', encoding='utf-8').read())
61
 
46
 
62
 
47
 
63
 def searx_useragent():
48
 def searx_useragent():
66
            suffix=settings['outgoing'].get('useragent_suffix', ''))
51
            suffix=settings['outgoing'].get('useragent_suffix', ''))
67
 
52
 
68
 
53
 
54
+def gen_useragent():
55
+    return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions'])))
56
+
57
+
69
 def highlight_content(content, query):
58
 def highlight_content(content, query):
70
 
59
 
71
     if not content:
60
     if not content:

+ 69
- 0
utils/fetch_firefox_version.py View File

1
+#!/usr/bin/env python
2
+
3
+# set path
4
+from sys import path
5
+from os.path import realpath, dirname
6
+path.append(realpath(dirname(realpath(__file__)) + '/../'))
7
+
8
+#
9
+import json
10
+import requests
11
+import re
12
+from distutils.version import LooseVersion, StrictVersion
13
+from lxml import html
14
+from searx.url_utils import urlparse, urljoin
15
+
16
+URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
17
+RELEASE_PATH = '/pub/firefox/releases/'
18
+
19
+NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?(esr)?$')
20
+# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
21
+# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
22
+
23
+# 
24
+useragent = {
25
+    "versions": (),
26
+    "os": ('Windows NT 10; WOW64',
27
+           'X11; Linux x86_64'),
28
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
29
+}
30
+
31
+
32
+def fetch_firefox_versions():
33
+    resp = requests.get(URL, timeout=2.0)
34
+    if resp.status_code != 200:
35
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
36
+    else:
37
+        dom = html.fromstring(resp.text)
38
+        versions = []
39
+
40
+        for link in dom.xpath('//a/@href'):
41
+            url = urlparse(urljoin(URL, link))
42
+            path = url.path
43
+            if path.startswith(RELEASE_PATH):
44
+                version = path[len(RELEASE_PATH):-1]
45
+                if NORMAL_REGEX.match(version):
46
+                    versions.append(LooseVersion(version))
47
+
48
+        list.sort(versions, reverse=True)
49
+        return versions
50
+
51
+
52
+def fetch_firefox_last_versions():
53
+    versions = fetch_firefox_versions()
54
+
55
+    result = []
56
+    major_last = versions[0].version[0]
57
+    major_list = (major_last, major_last - 1)
58
+    for version in versions:
59
+        major_current = version.version[0]
60
+        if major_current in major_list and 'esr' not in version.version:
61
+            result.append(version.vstring)
62
+
63
+    return result
64
+
65
+
66
+useragent["versions"] = fetch_firefox_last_versions()
67
+f = open("../searx/data/useragents.json", "wb")
68
+json.dump(useragent, f, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8")
69
+f.close()