Browse Source

add generated files of parent

Noémi Ványi 8 years ago
parent
commit
5a1f928155
4 changed files with 290 additions and 0 deletions
  1. 114
    0
      _sources/admin/filtron.txt
  2. 1
    0
      _sources/index.txt
  3. 174
    0
      admin/filtron.html
  4. 1
    0
      index.html

+ 114
- 0
_sources/admin/filtron.txt View File

@@ -0,0 +1,114 @@
1
+How to protect an instance
2
+==========================
3
+
4
+Searx depens on external search services. To avoid the abuse of these services it is advised to limit the number of requests processed by searx.
5
+
6
+An application firewall, ``filtron`` solves exactly this problem. Information on how to install it can be found at the `project page of filtron <https://github.com/asciimoo/filtron>`__.
7
+
8
+Sample configuration of filtron
9
+-------------------------------
10
+
11
+An example configuration can be find below. This configuration limits the access of
12
+
13
+ * scripts or applications (roboagent limit)
14
+
15
+ * webcrawlers (botlimit)
16
+
17
+ * IPs which send too many requests (IP limit)
18
+
19
+ * too many json, csv, etc. requests (rss/json limit)
20
+
21
+ * the same UserAgent of if too many requests (useragent limit)
22
+
23
+
24
+.. code:: json
25
+
26
+    [
27
+        {
28
+            "name": "search request",
29
+            "filters": ["Param:q", "Path=^(/|/search)$"],
30
+            "interval": <time-interval-in-sec>,
31
+            "limit": <max-request-number-in-interval>,
32
+            "subrules": [
33
+                {
34
+                    "name": "roboagent limit",
35
+                    "interval": <time-interval-in-sec>,
36
+                    "limit": <max-request-number-in-interval>,
37
+                    "filters": ["Header:User-Agent=(curl|cURL|Wget|python-requests|Scrapy|FeedFetcher|Go-http-client)"],
38
+                    "actions": [
39
+                        {"name": "block",
40
+                         "params": {"message": "Rate limit exceeded"}}
41
+                    ]
42
+                },
43
+                {
44
+                    "name": "botlimit",
45
+                    "limit": 0,
46
+                    "stop": true,
47
+                    "filters": ["Header:User-Agent=(Googlebot|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT)"],
48
+                    "actions": [
49
+                        {"name": "block",
50
+                         "params": {"message": "Rate limit exceeded"}}
51
+                    ]
52
+                },
53
+                {
54
+                    "name": "IP limit",
55
+                    "interval": <time-interval-in-sec>,
56
+                    "limit": <max-request-number-in-interval>,
57
+                    "stop": true,
58
+                    "aggregations": ["Header:X-Forwarded-For"],
59
+                    "actions": [
60
+                        {"name": "block",
61
+                         "params": {"message": "Rate limit exceeded"}}
62
+                    ]
63
+                },
64
+                {
65
+                    "name": "rss/json limit",
66
+                    "interval": <time-interval-in-sec>,
67
+                    "limit": <max-request-number-in-interval>,
68
+                    "stop": true,
69
+                    "filters": ["Param:format=(csv|json|rss)"],
70
+                    "actions": [
71
+                        {"name": "block",
72
+                         "params": {"message": "Rate limit exceeded"}}
73
+                    ]
74
+                },
75
+                {
76
+                    "name": "useragent limit",
77
+                    "interval": <time-interval-in-sec>,
78
+                    "limit": <max-request-number-in-interval>,
79
+                    "aggregations": ["Header:User-Agent"],
80
+                    "actions": [
81
+                        {"name": "block",
82
+                         "params": {"message": "Rate limit exceeded"}}
83
+                    ]
84
+                }
85
+            ]
86
+        }
87
+    ]
88
+
89
+
90
+
91
+Route request through filtron
92
+-----------------------------
93
+
94
+Filtron can be started using the following command:
95
+
96
+.. code:: bash
97
+
98
+    $ filtron -rules rules.json
99
+
100
+It listens on 127.0.0.1:4004 and forwards filtered requests to 127.0.0.1:8888 by default.
101
+
102
+Use it along with ``nginx`` with the following example configuration.
103
+
104
+.. code:: bash
105
+
106
+    location / {
107
+        proxy_set_header        Host    $http_host;
108
+        proxy_set_header        X-Real-IP $remote_addr;
109
+        proxy_set_header        X-Forwarded-For $proxy_add_x_forwarded_for;
110
+        proxy_set_header        X-Scheme $scheme;
111
+        proxy_pass http://127.0.0.1:4004/;
112
+    }
113
+
114
+Requests are coming from port 4004 going through filtron and then forwarded to port 8888 where a searx is being run.

+ 1
- 0
_sources/index.txt View File

@@ -38,6 +38,7 @@ Administrator documentation
38 38
 
39 39
    dev/install/installation
40 40
    admin/api
41
+   admin/filtron
41 42
 
42 43
 Developer documentation
43 44
 -----------------------

+ 174
- 0
admin/filtron.html View File

@@ -0,0 +1,174 @@
1
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
+  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+
4
+
5
+<html xmlns="http://www.w3.org/1999/xhtml">
6
+  <head>
7
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
8
+    
9
+    <title>How to protect an instance &#8212; searx 0.9.0 documentation</title>
10
+    
11
+    <link rel="stylesheet" href="../_static/style.css" type="text/css" />
12
+    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
13
+    
14
+    <script type="text/javascript">
15
+      var DOCUMENTATION_OPTIONS = {
16
+        URL_ROOT:    '../',
17
+        VERSION:     '0.9.0',
18
+        COLLAPSE_INDEX: false,
19
+        FILE_SUFFIX: '.html',
20
+        HAS_SOURCE:  true
21
+      };
22
+    </script>
23
+    <script type="text/javascript" src="../_static/jquery.js"></script>
24
+    <script type="text/javascript" src="../_static/underscore.js"></script>
25
+    <script type="text/javascript" src="../_static/doctools.js"></script>
26
+    <link rel="index" title="Index" href="../genindex.html" />
27
+    <link rel="search" title="Search" href="../search.html" />
28
+    <link rel="top" title="searx 0.9.0 documentation" href="../index.html" />
29
+    <link rel="next" title="Development Quickstart" href="../dev/quickstart.html" />
30
+    <link rel="prev" title="Administration API" href="api.html" />
31
+   
32
+  
33
+  <link media="only screen and (max-device-width: 480px)" href="../_static/small_flask.css" type= "text/css" rel="stylesheet" />
34
+  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9">
35
+
36
+  </head>
37
+  <body role="document">  
38
+
39
+    <div class="document">
40
+      <div class="documentwrapper">
41
+        <div class="bodywrapper">
42
+          <div class="body" role="main">
43
+            
44
+  <div class="section" id="how-to-protect-an-instance">
45
+<h1>How to protect an instance<a class="headerlink" href="#how-to-protect-an-instance" title="Permalink to this headline">¶</a></h1>
46
+<p>Searx depens on external search services. To avoid the abuse of these services it is advised to limit the number of requests processed by searx.</p>
47
+<p>An application firewall, <code class="docutils literal"><span class="pre">filtron</span></code> solves exactly this problem. Information on how to install it can be found at the <a class="reference external" href="https://github.com/asciimoo/filtron">project page of filtron</a>.</p>
48
+<div class="section" id="sample-configuration-of-filtron">
49
+<h2>Sample configuration of filtron<a class="headerlink" href="#sample-configuration-of-filtron" title="Permalink to this headline">¶</a></h2>
50
+<p>An example configuration can be find below. This configuration limits the access of</p>
51
+<blockquote>
52
+<div><ul class="simple">
53
+<li>scripts or applications (roboagent limit)</li>
54
+<li>webcrawlers (botlimit)</li>
55
+<li>IPs which send too many requests (IP limit)</li>
56
+<li>too many json, csv, etc. requests (rss/json limit)</li>
57
+<li>the same UserAgent of if too many requests (useragent limit)</li>
58
+</ul>
59
+</div></blockquote>
60
+<div class="code json highlight-default"><div class="highlight"><pre><span></span><span class="p">[</span>
61
+    <span class="p">{</span>
62
+        <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;search request&quot;</span><span class="p">,</span>
63
+        <span class="s2">&quot;filters&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Param:q&quot;</span><span class="p">,</span> <span class="s2">&quot;Path=^(/|/search)$&quot;</span><span class="p">],</span>
64
+        <span class="s2">&quot;interval&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">time</span><span class="o">-</span><span class="n">interval</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">sec</span><span class="o">&gt;</span><span class="p">,</span>
65
+        <span class="s2">&quot;limit&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="nb">max</span><span class="o">-</span><span class="n">request</span><span class="o">-</span><span class="n">number</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">interval</span><span class="o">&gt;</span><span class="p">,</span>
66
+        <span class="s2">&quot;subrules&quot;</span><span class="p">:</span> <span class="p">[</span>
67
+            <span class="p">{</span>
68
+                <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;roboagent limit&quot;</span><span class="p">,</span>
69
+                <span class="s2">&quot;interval&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">time</span><span class="o">-</span><span class="n">interval</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">sec</span><span class="o">&gt;</span><span class="p">,</span>
70
+                <span class="s2">&quot;limit&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="nb">max</span><span class="o">-</span><span class="n">request</span><span class="o">-</span><span class="n">number</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">interval</span><span class="o">&gt;</span><span class="p">,</span>
71
+                <span class="s2">&quot;filters&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Header:User-Agent=(curl|cURL|Wget|python-requests|Scrapy|FeedFetcher|Go-http-client)&quot;</span><span class="p">],</span>
72
+                <span class="s2">&quot;actions&quot;</span><span class="p">:</span> <span class="p">[</span>
73
+                    <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;block&quot;</span><span class="p">,</span>
74
+                     <span class="s2">&quot;params&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;message&quot;</span><span class="p">:</span> <span class="s2">&quot;Rate limit exceeded&quot;</span><span class="p">}}</span>
75
+                <span class="p">]</span>
76
+            <span class="p">},</span>
77
+            <span class="p">{</span>
78
+                <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;botlimit&quot;</span><span class="p">,</span>
79
+                <span class="s2">&quot;limit&quot;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span>
80
+                <span class="s2">&quot;stop&quot;</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span>
81
+                <span class="s2">&quot;filters&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Header:User-Agent=(Googlebot|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT)&quot;</span><span class="p">],</span>
82
+                <span class="s2">&quot;actions&quot;</span><span class="p">:</span> <span class="p">[</span>
83
+                    <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;block&quot;</span><span class="p">,</span>
84
+                     <span class="s2">&quot;params&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;message&quot;</span><span class="p">:</span> <span class="s2">&quot;Rate limit exceeded&quot;</span><span class="p">}}</span>
85
+                <span class="p">]</span>
86
+            <span class="p">},</span>
87
+            <span class="p">{</span>
88
+                <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;IP limit&quot;</span><span class="p">,</span>
89
+                <span class="s2">&quot;interval&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">time</span><span class="o">-</span><span class="n">interval</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">sec</span><span class="o">&gt;</span><span class="p">,</span>
90
+                <span class="s2">&quot;limit&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="nb">max</span><span class="o">-</span><span class="n">request</span><span class="o">-</span><span class="n">number</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">interval</span><span class="o">&gt;</span><span class="p">,</span>
91
+                <span class="s2">&quot;stop&quot;</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span>
92
+                <span class="s2">&quot;aggregations&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Header:X-Forwarded-For&quot;</span><span class="p">],</span>
93
+                <span class="s2">&quot;actions&quot;</span><span class="p">:</span> <span class="p">[</span>
94
+                    <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;block&quot;</span><span class="p">,</span>
95
+                     <span class="s2">&quot;params&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;message&quot;</span><span class="p">:</span> <span class="s2">&quot;Rate limit exceeded&quot;</span><span class="p">}}</span>
96
+                <span class="p">]</span>
97
+            <span class="p">},</span>
98
+            <span class="p">{</span>
99
+                <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;rss/json limit&quot;</span><span class="p">,</span>
100
+                <span class="s2">&quot;interval&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">time</span><span class="o">-</span><span class="n">interval</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">sec</span><span class="o">&gt;</span><span class="p">,</span>
101
+                <span class="s2">&quot;limit&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="nb">max</span><span class="o">-</span><span class="n">request</span><span class="o">-</span><span class="n">number</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">interval</span><span class="o">&gt;</span><span class="p">,</span>
102
+                <span class="s2">&quot;stop&quot;</span><span class="p">:</span> <span class="n">true</span><span class="p">,</span>
103
+                <span class="s2">&quot;filters&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Param:format=(csv|json|rss)&quot;</span><span class="p">],</span>
104
+                <span class="s2">&quot;actions&quot;</span><span class="p">:</span> <span class="p">[</span>
105
+                    <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;block&quot;</span><span class="p">,</span>
106
+                     <span class="s2">&quot;params&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;message&quot;</span><span class="p">:</span> <span class="s2">&quot;Rate limit exceeded&quot;</span><span class="p">}}</span>
107
+                <span class="p">]</span>
108
+            <span class="p">},</span>
109
+            <span class="p">{</span>
110
+                <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;useragent limit&quot;</span><span class="p">,</span>
111
+                <span class="s2">&quot;interval&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="n">time</span><span class="o">-</span><span class="n">interval</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">sec</span><span class="o">&gt;</span><span class="p">,</span>
112
+                <span class="s2">&quot;limit&quot;</span><span class="p">:</span> <span class="o">&lt;</span><span class="nb">max</span><span class="o">-</span><span class="n">request</span><span class="o">-</span><span class="n">number</span><span class="o">-</span><span class="ow">in</span><span class="o">-</span><span class="n">interval</span><span class="o">&gt;</span><span class="p">,</span>
113
+                <span class="s2">&quot;aggregations&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;Header:User-Agent&quot;</span><span class="p">],</span>
114
+                <span class="s2">&quot;actions&quot;</span><span class="p">:</span> <span class="p">[</span>
115
+                    <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;block&quot;</span><span class="p">,</span>
116
+                     <span class="s2">&quot;params&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;message&quot;</span><span class="p">:</span> <span class="s2">&quot;Rate limit exceeded&quot;</span><span class="p">}}</span>
117
+                <span class="p">]</span>
118
+            <span class="p">}</span>
119
+        <span class="p">]</span>
120
+    <span class="p">}</span>
121
+<span class="p">]</span>
122
+</pre></div>
123
+</div>
124
+</div>
125
+<div class="section" id="route-request-through-filtron">
126
+<h2>Route request through filtron<a class="headerlink" href="#route-request-through-filtron" title="Permalink to this headline">¶</a></h2>
127
+<p>Filtron can be started using the following command:</p>
128
+<div class="code bash highlight-default"><div class="highlight"><pre><span></span>$ filtron -rules rules.json
129
+</pre></div>
130
+</div>
131
+<p>It listens on 127.0.0.1:4004 and forwards filtered requests to 127.0.0.1:8888 by default.</p>
132
+<p>Use it along with <code class="docutils literal"><span class="pre">nginx</span></code> with the following example configuration.</p>
133
+<div class="code bash highlight-default"><div class="highlight"><pre><span></span>location / {
134
+    proxy_set_header        Host    $http_host;
135
+    proxy_set_header        X-Real-IP $remote_addr;
136
+    proxy_set_header        X-Forwarded-For $proxy_add_x_forwarded_for;
137
+    proxy_set_header        X-Scheme $scheme;
138
+    proxy_pass http://127.0.0.1:4004/;
139
+}
140
+</pre></div>
141
+</div>
142
+<p>Requests are coming from port 4004 going through filtron and then forwarded to port 8888 where a searx is being run.</p>
143
+</div>
144
+</div>
145
+
146
+
147
+          </div>
148
+        </div>
149
+      </div>
150
+      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
151
+        <div class="sphinxsidebarwrapper"><div class="sidebar_container body">
152
+<h1>Searx</h1>
153
+<ul>
154
+    <li><a href="../index.html">Home</a></li>
155
+    <li><a href="https://github.com/asciimoo/searx">Source</a></li>
156
+    <li><a href="https://github.com/asciimoo/searx/wiki">Wiki</a></li>
157
+    <li><a href="https://github.com/asciimoo/searx/wiki/Searx-instances">Public instances</a></li>
158
+</ul>
159
+<hr />
160
+<ul>
161
+    <li><a href="https://twitter.com/Searx_engine">Twitter</a></li>
162
+    <li><a href="https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software">Flattr</a></li>
163
+    <li><a href="https://gratipay.com/searx">Gratipay</a></li>
164
+</ul>
165
+</div>
166
+        </div>
167
+      </div>
168
+      <div class="clearer"></div>
169
+    </div>
170
+    <div class="footer">
171
+      &copy; Copyright 2015-2016, Adam Tauber.
172
+    </div>
173
+  </body>
174
+</html>

+ 1
- 0
index.html View File

@@ -72,6 +72,7 @@
72 72
 <ul>
73 73
 <li class="toctree-l1"><a class="reference internal" href="dev/install/installation.html">Installation</a></li>
74 74
 <li class="toctree-l1"><a class="reference internal" href="admin/api.html">Administration API</a></li>
75
+<li class="toctree-l1"><a class="reference internal" href="admin/filtron.html">How to protect an instance</a></li>
75 76
 </ul>
76 77
 </div>
77 78
 </div>