|
@@ -66,7 +66,15 @@ def response(resp):
|
66
|
66
|
url = link.attrib.get('href')
|
67
|
67
|
|
68
|
68
|
# block google-ad url's
|
69
|
|
- if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
|
|
69
|
+ if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
|
|
70
|
+ continue
|
|
71
|
+
|
|
72
|
+ # block startpage search url's
|
|
73
|
+ if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
|
|
74
|
+ continue
|
|
75
|
+
|
|
76
|
+ # block ixquick search url's
|
|
77
|
+ if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
|
70
|
78
|
continue
|
71
|
79
|
|
72
|
80
|
title = escape(extract_text(link))
|