Browse Source

- recoll engine

   recoll is a local search engine based on Xapian:
   http://www.lesbonscomptes.com/recoll/

   By itself recoll does not offer web or API access,
   this can be achieved using recoll-webui:
   https://github.com/koniu/recoll-webui

   As recoll-webui by default does not support paged JSON
   results it is advisable to use a patched version which does:
   https://github.com/Yetangitu/recoll-webui/tree/jsonpage
   A pull request was sent upstream, if this is merged the patched
   version is no longer needed

   This engine uses a custom 'files' result template

   set base_url to the location where recoll-webui can be reached
   set dl_prefix to a location where the file hierarchy as indexed by recoll can be reached
   set search_dir to the part of the indexed file hierarchy to be searched, use an empty string to search the entire search domain
frankdelange 6 years ago
parent
commit
ddcb619b97

+ 75
- 0
searx/engines/recoll.py View File

@@ -0,0 +1,75 @@
1
+"""
2
+ Recoll (local search engine)
3
+
4
+ @using-api   yes
5
+ @results     JSON
6
+ @stable      yes
7
+ @parse       url, content, size, abstract, author, mtype, time, filename, label
8
+"""
9
+
10
+from json import loads
11
+from searx.url_utils import urlencode
12
+from datetime import date, timedelta
13
+
14
+# engine dependent config
15
+paging = True
16
+time_range_support = True
17
+
18
+# parameters from settings.yml
19
+base_url = None
20
+search_dir = ''
21
+dl_prefix = None
22
+
23
+def get_time_range(time_range):
24
+    sw = {
25
+	    'day' : 1,
26
+	    'week': 7,
27
+	    'month': 30,
28
+	    'year': 365
29
+    }
30
+
31
+    offset = sw.get(time_range,0)
32
+    if not offset:
33
+	    return ''
34
+
35
+    return (date.today()-timedelta(days=offset)).isoformat()
36
+
37
+# do search-request
38
+def request(query, params):
39
+    search_after = get_time_range(params['time_range'])
40
+    search_url = base_url + 'pagedjson?query={query}&page={page}&after={after}&dir={dir}'
41
+
42
+    params['url'] = search_url.format(query=urlencode({'q': query}), page=params['pageno'], after=search_after, dir=search_dir)
43
+
44
+    return params
45
+
46
+# get response from search-request
47
+def response(resp):
48
+    results = []
49
+
50
+    raw_search_results = loads(resp.text)
51
+
52
+    if not raw_search_results:
53
+        return []
54
+
55
+    for result in raw_search_results.get('results', []):
56
+        title = result['label']
57
+        url = result['url'].replace('file:///export', dl_prefix)
58
+        content = u'{}'.format(result['snippet'])
59
+
60
+        # append result
61
+        item = {'url': url,
62
+                'title': title,
63
+                'content': content,
64
+                'template': 'files.html'}
65
+
66
+        if result['size']:
67
+            item['size'] = int(result['size'])
68
+
69
+        for parameter in ['filename', 'abstract', 'author', 'mtype', 'time']:
70
+            if result[parameter]:
71
+                item[parameter] = result[parameter]
72
+
73
+        results.append(item)
74
+
75
+    return results

+ 41
- 0
searx/settings.yml View File

@@ -517,6 +517,47 @@ engines:
517 517
     shortcut : qws
518 518
     categories : social media
519 519
 
520
+    # recoll is a local search engine based on Xapian:
521
+    # http://www.lesbonscomptes.com/recoll/
522
+    #
523
+    # By itself recoll does not offer web or API access,
524
+    # this can be achieved using recoll-webui:
525
+    # https://github.com/koniu/recoll-webui
526
+    #
527
+    # As recoll-webui by default does not support paged JSON
528
+    # results it is advisable to use a patched version which does:
529
+    # https://github.com/Yetangitu/recoll-webui/tree/jsonpage
530
+    # A pull request was sent upstream, if this is merged the patched
531
+    # version is no longer needed
532
+    #
533
+    # This engine uses a custom 'files' result template
534
+    #
535
+    # set base_url to the location where recoll-webui can be reached
536
+    # set dl_prefix to a location where the file hierarchy as indexed by recoll can be reached
537
+    # set search_dir to the part of the indexed file hierarchy to be searched, use an empty string
538
+    #     to search the entire search domain
539
+
540
+    # this entry (with search_dir set to an empty string) covers the entire recoll search domain
541
+  - name : library
542
+    engine : recoll
543
+    shortcut : lib
544
+    base_url: 'https://recoll.example.org/'
545
+    search_dir : ''
546
+    dl_prefix : 'https://download.example.org'
547
+    timeout : 30.0
548
+    categories : files
549
+
550
+    # this entry only searches the 'reference' directory
551
+  - name : library reference
552
+    engine : recoll
553
+    base_url: 'https://recoll.example.org/'
554
+    search_dir : reference
555
+    dl_prefix : 'https://download.example.org'
556
+    shortcut : libr
557
+    timeout : 30.0
558
+    categories : files
559
+    disabled : True
560
+
520 561
   - name : reddit
521 562
     engine : reddit
522 563
     shortcut : re

+ 19
- 0
searx/static/themes/oscar/less/logicodev/results.less View File

@@ -41,6 +41,11 @@
41 41
 
42 42
 }
43 43
 
44
+.result-abstract {
45
+    margin-top: 0.5em;
46
+    margin-bottom: 0.8em;
47
+}
48
+
44 49
 .external-link {
45 50
     color: @dark-green;
46 51
     font-size: 12px;
@@ -114,6 +119,20 @@
114 119
     }
115 120
 }
116 121
 
122
+.result-metadata {
123
+    clear: both;
124
+    margin: 1em;
125
+
126
+    td {
127
+        padding-right: 1em;
128
+        color: @gray;
129
+    }
130
+
131
+    td:first-of-type {
132
+        color: @dark-gray;
133
+    }
134
+}
135
+
117 136
 // map formating of results
118 137
 .result-map {
119 138
     clear: both;

+ 26
- 0
searx/templates/oscar/macros.html View File

@@ -39,6 +39,20 @@
39 39
 <div class="external-link">{{ result.pretty_url }}</div>
40 40
 {%- endmacro %}
41 41
 
42
+<!-- Draw result footer without cache link -->
43
+{% macro result_footer_nocache(result) -%}
44
+    <div class="clearfix"></div>
45
+    <div class="pull-right">
46
+    {% for engine in result.engines %}
47
+        <span class="label label-default">{{ engine }}</span>
48
+    {% endfor %}
49
+    {% if proxify %}
50
+    <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}</small>
51
+    {% endif %}
52
+</div>
53
+<div class="external-link">{{ result.pretty_url }}</div>
54
+{%- endmacro %}
55
+
42 56
 <!-- Draw result footer -->
43 57
 {% macro result_footer_rtl(result) -%}
44 58
     <div class="clearfix"></div>
@@ -52,6 +66,18 @@
52 66
     <div class="external-link">{{ result.pretty_url }}</div>
53 67
 {%- endmacro %}
54 68
 
69
+<!-- Draw result footer without cache link -->
70
+{% macro result_footer_nocache_rtl(result) -%}
71
+    <div class="clearfix"></div>
72
+    {% for engine in result.engines %}
73
+        <span class="label label-default">{{ engine }}</span>
74
+    {% endfor %}
75
+    {% if proxify %}
76
+    <small>{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}</small>
77
+    {% endif %}
78
+    <div class="external-link">{{ result.pretty_url }}</div>
79
+{%- endmacro %}
80
+
55 81
 {% macro preferences_item_header(info, label, rtl) -%}
56 82
     {% if rtl %}
57 83
     <div class="row form-group">

+ 52
- 0
searx/templates/oscar/result_templates/files.html View File

@@ -0,0 +1,52 @@
1
+{% from 'oscar/macros.html' import result_header, result_sub_header, result_footer_nocache, result_footer_nocache_rtl, icon with context %}
2
+
3
+{{ result_header(result, favicons) }}
4
+{{ result_sub_header(result) }}
5
+
6
+{% if result.embedded %}
7
+    <small> &bull; <a class="text-info btn-collapse collapsed cursor-pointer media-loader disabled_if_nojs" data-toggle="collapse" data-target="#result-media-{{ index }}" data-btn-text-collapsed="{{ _('show media') }}" data-btn-text-not-collapsed="{{ _('hide media') }}">{{ icon('music') }} {{ _('show media') }}</a></small>
8
+{% endif %}
9
+
10
+{% if result.embedded %}
11
+<div id="result-media-{{ index }}" class="collapse">
12
+   {{ result.embedded|safe }}
13
+</div>
14
+{% endif %}
15
+
16
+{% if result.abstract %}<p class="result-content result-abstract">{{ result.abstract|safe }}</p>{% endif %}
17
+
18
+{% if result.img_src %}
19
+<div class="container-fluid">
20
+    <div class="row">
21
+<img src="{{ image_proxify(result.img_src) }}" alt="{{ result.title|striptags }}" title="{{ result.title|striptags }}" style="width: auto; max-height: 60px; min-height: 60px;" class="col-xs-2 col-sm-4 col-md-4 result-content">
22
+{% if result.content %}<p class="result-content col-xs-8 col-sm-8 col-md-8">{{ result.content|safe }}</p>{% endif %}
23
+    </div>
24
+</div>
25
+{% else %}
26
+{% if result.content %}<p class="result-content">{{ result.content|safe }}</p>{% endif %}
27
+{% endif %}
28
+
29
+<table class="result-metadata result-content">
30
+{% if result.author %}<tr><td>{{ _('Author') }}</td><td>{{ result.author|safe }}</td></tr>{% endif %}
31
+
32
+{% if result.filename %}<tr><td>{{ _('Filename') }}</td><td>{{ result.filename|safe }}</td></tr>{% endif %}
33
+
34
+{% if result.size %}<tr><td>{{ _('Filesize') }}</td><td>
35
+        {% if result.size < 1024 %}{{ result.size }} {{ _('Bytes') }}
36
+        {% elif result.size < 1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024) }} {{ _('kiB') }}
37
+        {% elif result.size < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024) }} {{ _('MiB') }}
38
+        {% elif result.size < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024) }} {{ _('GiB') }}
39
+        {% else %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif %}
40
+    </td></tr>
41
+{% endif %}
42
+
43
+{% if result.time %}<tr><td>{{ _('Date') }}</td><td>{{ result.time|safe }}</td></tr>{% endif %}
44
+
45
+{% if result.mtype %}<tr><td>{{ _('Type') }}</td><td>{{ result.mtype|safe }}</td></tr>{% endif %}
46
+</table>
47
+
48
+{% if rtl %}
49
+{{ result_footer_nocache_rtl(result) }}
50
+{% else %}
51
+{{ result_footer_nocache(result) }}
52
+{% endif %}