|
@@ -69,22 +69,11 @@ def response(resp):
|
69
|
69
|
dom = html.fromstring(resp.text)
|
70
|
70
|
|
71
|
71
|
for result in dom.xpath('//div[@class="dg_u"]'):
|
72
|
|
-
|
73
|
|
- # try to extract the url
|
74
|
|
- url_container = result.xpath('.//div[@class="sa_wrapper"]/@data-eventpayload')
|
75
|
|
- if len(url_container) > 0:
|
76
|
|
- url = loads(url_container[0])['purl']
|
77
|
|
- else:
|
78
|
|
- url = result.xpath('./a/@href')[0]
|
79
|
|
-
|
80
|
|
- # discard results that do not return an external url
|
81
|
|
- # very recent results sometimes don't return the video's url
|
82
|
|
- if url.startswith('/videos/search?'):
|
83
|
|
- continue
|
84
|
|
-
|
85
|
|
- title = extract_text(result.xpath('./a//div[@class="tl"]'))
|
86
|
|
- content = extract_text(result.xpath('.//div[@class="pubInfo"]'))
|
87
|
|
- thumbnail = result.xpath('.//div[@class="vthumb"]/img/@src')[0]
|
|
72
|
+ url = result.xpath('./div[@class="mc_vtvc"]/a/@href')[0]
|
|
73
|
+ url = 'https://bing.com' + url
|
|
74
|
+ title = extract_text(result.xpath('./div/a/div/div[@class="mc_vtvc_title"]/@title'))
|
|
75
|
+ content = extract_text(result.xpath('./div/a/div/div/div/div/text()'))
|
|
76
|
+ thumbnail = result.xpath('./div/a/div/div/img/@src')[0]
|
88
|
77
|
|
89
|
78
|
results.append({'url': url,
|
90
|
79
|
'title': title,
|
|
@@ -92,7 +81,6 @@ def response(resp):
|
92
|
81
|
'thumbnail': thumbnail,
|
93
|
82
|
'template': 'videos.html'})
|
94
|
83
|
|
95
|
|
- # first page ignores requested number of results
|
96
|
84
|
if len(results) >= number_of_results:
|
97
|
85
|
break
|
98
|
86
|
|