瀏覽代碼

Merge pull request #639 from kvch/digbt-engine

add digbt engine - fixes #638
Adam Tauber 8 年之前
父節點
當前提交
13bed1f872
共有 5 個文件被更改,包括 144 次插入15 次删除
  1. 2
    14
      searx/engines/btdigg.py
  2. 58
    0
      searx/engines/digbt.py
  3. 7
    1
      searx/settings.yml
  4. 18
    0
      searx/utils.py
  5. 59
    0
      tests/unit/engines/test_digbt.py

+ 2
- 14
searx/engines/btdigg.py 查看文件

@@ -16,6 +16,7 @@ from urllib import quote
16 16
 from lxml import html
17 17
 from operator import itemgetter
18 18
 from searx.engines.xpath import extract_text
19
+from searx.utils import get_torrent_size
19 20
 
20 21
 # engine dependent config
21 22
 categories = ['videos', 'music', 'files']
@@ -68,20 +69,7 @@ def response(resp):
68 69
         leech = 0
69 70
 
70 71
         # convert filesize to byte if possible
71
-        try:
72
-            filesize = float(filesize)
73
-
74
-            # convert filesize to byte
75
-            if filesize_multiplier == 'TB':
76
-                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
77
-            elif filesize_multiplier == 'GB':
78
-                filesize = int(filesize * 1024 * 1024 * 1024)
79
-            elif filesize_multiplier == 'MB':
80
-                filesize = int(filesize * 1024 * 1024)
81
-            elif filesize_multiplier == 'KB':
82
-                filesize = int(filesize * 1024)
83
-        except:
84
-            filesize = None
72
+        filesize = get_torrent_size(filesize, filesize_multiplier)
85 73
 
86 74
         # convert files to int if possible
87 75
         if files.isdigit():

+ 58
- 0
searx/engines/digbt.py 查看文件

@@ -0,0 +1,58 @@
1
+"""
2
+ DigBT (Videos, Music, Files)
3
+
4
+ @website     https://digbt.org
5
+ @provide-api no
6
+
7
+ @using-api   no
8
+ @results     HTML (using search portal)
9
+ @stable      no (HTML can change)
10
+ @parse       url, title, content, magnetlink
11
+"""
12
+
13
+from urlparse import urljoin
14
+from lxml import html
15
+from searx.engines.xpath import extract_text
16
+from searx.utils import get_torrent_size
17
+
18
+categories = ['videos', 'music', 'files']
19
+paging = True
20
+
21
+URL = 'https://digbt.org'
22
+SEARCH_URL = URL + '/search/{query}-time-{pageno}'
23
+FILESIZE = 3
24
+FILESIZE_MULTIPLIER = 4
25
+
26
+
27
+def request(query, params):
28
+    params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
29
+
30
+    return params
31
+
32
+
33
+def response(resp):
34
+    dom = html.fromstring(resp.content)
35
+    search_res = dom.xpath('.//td[@class="x-item"]')
36
+
37
+    if not search_res:
38
+        return list()
39
+
40
+    results = list()
41
+    for result in search_res:
42
+        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
43
+        title = result.xpath('.//a[@title]/text()')[0]
44
+        content = extract_text(result.xpath('.//div[@class="files"]'))
45
+        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
46
+        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
47
+        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
48
+
49
+        results.append({'url': url,
50
+                        'title': title,
51
+                        'content': content,
52
+                        'filesize': filesize,
53
+                        'magnetlink': magnetlink,
54
+                        'seed': 'N/A',
55
+                        'leech': 'N/A',
56
+                        'template': 'torrent.html'})
57
+
58
+    return results

+ 7
- 1
searx/settings.yml 查看文件

@@ -87,7 +87,7 @@ engines:
87 87
   - name : btdigg
88 88
     engine : btdigg
89 89
     shortcut : bt
90
-    
90
+
91 91
   - name : crossref
92 92
     engine : json_engine
93 93
     paging : True
@@ -118,6 +118,12 @@ engines:
118 118
     weight : 2
119 119
     disabled : True
120 120
 
121
+  - name : digbt
122
+    engine : digbt
123
+    shortcut : dbt
124
+    timeout : 6.0
125
+    disabled : True
126
+
121 127
   - name : digg
122 128
     engine : digg
123 129
     shortcut : dg

+ 18
- 0
searx/utils.py 查看文件

@@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
237 237
         return a_list[index]
238 238
     else:
239 239
         return default
240
+
241
+
242
+def get_torrent_size(filesize, filesize_multiplier):
243
+    try:
244
+        filesize = float(filesize)
245
+
246
+        if filesize_multiplier == 'TB':
247
+            filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
248
+        elif filesize_multiplier == 'GB':
249
+            filesize = int(filesize * 1024 * 1024 * 1024)
250
+        elif filesize_multiplier == 'MB':
251
+            filesize = int(filesize * 1024 * 1024)
252
+        elif filesize_multiplier == 'KB':
253
+            filesize = int(filesize * 1024)
254
+    except:
255
+        filesize = None
256
+
257
+    return filesize

+ 59
- 0
tests/unit/engines/test_digbt.py 查看文件

@@ -0,0 +1,59 @@
1
+from collections import defaultdict
2
+import mock
3
+from searx.engines import digbt
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestDigBTEngine(SearxTestCase):
8
+
9
+    def test_request(self):
10
+        query = 'test_query'
11
+        dicto = defaultdict(dict)
12
+        dicto['pageno'] = 0
13
+        params = digbt.request(query, dicto)
14
+        self.assertIn('url', params)
15
+        self.assertIn(query, params['url'])
16
+        self.assertIn('digbt.org', params['url'])
17
+
18
+    def test_response(self):
19
+        self.assertRaises(AttributeError, digbt.response, None)
20
+        self.assertRaises(AttributeError, digbt.response, [])
21
+        self.assertRaises(AttributeError, digbt.response, '')
22
+        self.assertRaises(AttributeError, digbt.response, '[]')
23
+
24
+        response = mock.Mock(content='<html></html>')
25
+        self.assertEqual(digbt.response(response), [])
26
+
27
+        html = """
28
+        <table class="table">
29
+            <tr><td class="x-item">
30
+            <div>
31
+                <a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a>
32
+                <span class="ctime"><span style="color:red;">4 hours ago</span></span>
33
+            </div>
34
+            <div class="files">
35
+                <ul>
36
+                    <li>The Big Bang Theory  2.9 GB</li>
37
+                    <li>....</li>
38
+                </ul>
39
+            </div>
40
+            <div class="tail">
41
+                Files: 1 Size: 2.9 GB  Downloads: 1 Updated: <span style="color:red;">4 hours ago</span>
42
+                &nbsp; &nbsp;
43
+                <a class="title" href="magnet:?xt=urn:btih:a&amp;dn=The+Big+Bang+Theory">
44
+                    <span class="glyphicon glyphicon-magnet"></span> magnet-link
45
+                </a>
46
+                &nbsp; &nbsp;
47
+            </div>
48
+            </td></tr>
49
+        </table>
50
+        """
51
+        response = mock.Mock(content=html)
52
+        results = digbt.response(response)
53
+        self.assertEqual(type(results), list)
54
+        self.assertEqual(len(results), 1)
55
+        self.assertEqual(results[0]['title'], 'The Big Bang Theory')
56
+        self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html')
57
+        self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....')
58
+        self.assertEqual(results[0]['filesize'], 3113851289)
59
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')