rinpatch před 6 roky
rodič
revize
3ec35a06f7
No account linked to committer's email
2 změnil soubory, kde provedl 91 přidání a 0 odebrání
  1. 85
    0
      searx/engines/acgsou.py
  2. 6
    0
      searx/settings.yml

+ 85
- 0
searx/engines/acgsou.py Zobrazit soubor

@@ -0,0 +1,85 @@
1
+"""
2
+ Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
3
+
4
+ @website      https://www.acgsou.com/
5
+ @provide-api  no
6
+ @using-api    no
7
+ @results      HTML
8
+ @stable       no (HTML can change)
9
+ @parse        url, title, content, seed, leech, torrentfile
10
+"""
11
+
12
+from lxml import html
13
+from searx.engines.xpath import extract_text
14
+from searx.url_utils import urlencode
15
+from searx.utils import get_torrent_size, int_or_zero
16
+
17
+# engine dependent config
18
+categories = ['files', 'images', 'videos', 'music']
19
+paging = True
20
+
21
+# search-url
22
+base_url = 'https://www.acgsou.com/'
23
+search_url = base_url + 'search.php?{query}&page={offset}'
24
+# xpath queries
25
+xpath_results = '//table[contains(@class, "list_style table_fixed")]//tr[not(th)]'
26
+xpath_category = './/td[2]/a[1]'
27
+xpath_title = './/td[3]/a[last()]'
28
+xpath_torrent_links = './/td[3]/a'
29
+xpath_filesize = './/td[4]/text()'
30
+
31
+# do search-request
32
+def request(query, params):
33
+    query = urlencode({'keyword': query})
34
+    params['url'] = search_url.format(query=query, offset=params['pageno'])
35
+    return params
36
+
37
+
38
+# get response from search-request
39
+def response(resp):
40
+    results = [] 
41
+    dom = html.fromstring(resp.text)
42
+    print(resp.text)
43
+    for result in dom.xpath(xpath_results): 
44
+        # defaults
45
+        filesize = 0
46
+        magnet_link = "magnet:?xt=urn:btih:{}&tr=http://tracker.acgsou.com:2710/announce"
47
+        torrent_link = ""
48
+
49
+        # category in which our torrent belongs
50
+        try:
51
+            category = extract_text(result.xpath(xpath_category)[0])
52
+        except:
53
+            pass
54
+
55
+        # torrent title
56
+        page_a = result.xpath(xpath_title)[0]
57
+        title = extract_text(page_a)
58
+
59
+        # link to the page
60
+        href = base_url + page_a.attrib.get('href')
61
+        
62
+        #magnet link
63
+        magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5])
64
+
65
+        # let's try to calculate the torrent size
66
+        try:
67
+            filesize_info = result.xpath(xpath_filesize)[0]
68
+            filesize = filesize_info[:-2]
69
+            filesize_multiplier = filesize_info[-2:]
70
+            filesize = get_torrent_size(filesize, filesize_multiplier)
71
+        except :
72
+            pass
73
+
74
+        # content string contains all information not included into template
75
+        content = 'Category: "{category}".'
76
+        content = content.format(category=category)
77
+
78
+        results.append({'url': href,
79
+                        'title': title,
80
+                        'content': content,
81
+                        'filesize': filesize,
82
+                        'magnetlink': magnet_link,
83
+                        'template': 'torrent.html'})
84
+
85
+    return results

+ 6
- 0
searx/settings.yml Zobrazit soubor

@@ -433,6 +433,12 @@ engines:
433 433
     engine : nyaa
434 434
     shortcut : nt
435 435
     disabled : True
436
+  
437
+  - name : acgsou
438
+    engine : acgsou
439
+    shortcut : acg
440
+    disabled : True
441
+    timeout: 5.0
436 442
 
437 443
   - name : openairedatasets
438 444
     engine : json_engine