Ver código fonte

[mod] fetch firefox versions in a standalone script

Alexandre Flament 7 anos atrás
pai
commit
0f4ac029e5
3 arquivos alterados com 92 adições e 19 exclusões
  1. 15
    0
      searx/data/useragents.json
  2. 8
    19
      searx/utils.py
  3. 69
    0
      utils/fetch_firefox_version.py

+ 15
- 0
searx/data/useragents.json Ver arquivo

@@ -0,0 +1,15 @@
1
+{
2
+    "os": [
3
+        "Windows NT 10; WOW64", 
4
+        "X11; Linux x86_64"
5
+    ], 
6
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", 
7
+    "versions": [
8
+        "59.0.2", 
9
+        "59.0.1", 
10
+        "59.0", 
11
+        "58.0.2", 
12
+        "58.0.1", 
13
+        "58.0"
14
+    ]
15
+}

+ 8
- 19
searx/utils.py Ver arquivo

@@ -9,8 +9,10 @@ from codecs import getincrementalencoder
9 9
 from imp import load_source
10 10
 from numbers import Number
11 11
 from os.path import splitext, join
12
+from io import open
12 13
 from random import choice
13 14
 import sys
15
+import json
14 16
 
15 17
 from searx.version import VERSION_STRING
16 18
 from searx.languages import language_codes
@@ -36,28 +38,11 @@ else:
36 38
 
37 39
 logger = logger.getChild('utils')
38 40
 
39
-ua_versions = ('40.0',
40
-               '41.0',
41
-               '42.0',
42
-               '43.0',
43
-               '44.0',
44
-               '45.0',
45
-               '46.0',
46
-               '47.0')
47
-
48
-ua_os = ('Windows NT 6.3; WOW64',
49
-         'X11; Linux x86_64',
50
-         'X11; Linux x86')
51
-
52
-ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
53
-
54 41
 blocked_tags = ('script',
55 42
                 'style')
56 43
 
57
-
58
-def gen_useragent():
59
-    # TODO
60
-    return ua.format(os=choice(ua_os), version=choice(ua_versions))
44
+useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
45
+                             + "/data/useragents.json", 'r', encoding='utf-8').read())
61 46
 
62 47
 
63 48
 def searx_useragent():
@@ -66,6 +51,10 @@ def searx_useragent():
66 51
            suffix=settings['outgoing'].get('useragent_suffix', ''))
67 52
 
68 53
 
54
+def gen_useragent():
55
+    return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions'])))
56
+
57
+
69 58
 def highlight_content(content, query):
70 59
 
71 60
     if not content:

+ 69
- 0
utils/fetch_firefox_version.py Ver arquivo

@@ -0,0 +1,69 @@
1
+#!/usr/bin/env python
2
+
3
+# set path
4
+from sys import path
5
+from os.path import realpath, dirname
6
+path.append(realpath(dirname(realpath(__file__)) + '/../'))
7
+
8
+#
9
+import json
10
+import requests
11
+import re
12
+from distutils.version import LooseVersion, StrictVersion
13
+from lxml import html
14
+from searx.url_utils import urlparse, urljoin
15
+
16
+URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
17
+RELEASE_PATH = '/pub/firefox/releases/'
18
+
19
+NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?(esr)?$')
20
+# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
21
+# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
22
+
23
+# 
24
+useragent = {
25
+    "versions": (),
26
+    "os": ('Windows NT 10; WOW64',
27
+           'X11; Linux x86_64'),
28
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
29
+}
30
+
31
+
32
+def fetch_firefox_versions():
33
+    resp = requests.get(URL, timeout=2.0)
34
+    if resp.status_code != 200:
35
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
36
+    else:
37
+        dom = html.fromstring(resp.text)
38
+        versions = []
39
+
40
+        for link in dom.xpath('//a/@href'):
41
+            url = urlparse(urljoin(URL, link))
42
+            path = url.path
43
+            if path.startswith(RELEASE_PATH):
44
+                version = path[len(RELEASE_PATH):-1]
45
+                if NORMAL_REGEX.match(version):
46
+                    versions.append(LooseVersion(version))
47
+
48
+        list.sort(versions, reverse=True)
49
+        return versions
50
+
51
+
52
+def fetch_firefox_last_versions():
53
+    versions = fetch_firefox_versions()
54
+
55
+    result = []
56
+    major_last = versions[0].version[0]
57
+    major_list = (major_last, major_last - 1)
58
+    for version in versions:
59
+        major_current = version.version[0]
60
+        if major_current in major_list and 'esr' not in version.version:
61
+            result.append(version.vstring)
62
+
63
+    return result
64
+
65
+
66
+useragent["versions"] = fetch_firefox_last_versions()
67
+f = open("../searx/data/useragents.json", "wb")
68
+json.dump(useragent, f, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8")
69
+f.close()