Handle error when parsing image result url
parent
b21b4f4f57
commit
d894bd347d
|
@ -313,12 +313,19 @@ class Filter:
|
||||||
urls = item.find('a')['href'].split('&imgrefurl=')
|
urls = item.find('a')['href'].split('&imgrefurl=')
|
||||||
|
|
||||||
img_url = urlparse.unquote(urls[0].replace('/imgres?imgurl=', ''))
|
img_url = urlparse.unquote(urls[0].replace('/imgres?imgurl=', ''))
|
||||||
webpage = urlparse.unquote(urls[1].split('&')[0])
|
|
||||||
|
try:
|
||||||
|
# Try to strip out only the necessary part of the web page link
|
||||||
|
web_page = urlparse.unquote(urls[1].split('&')[0])
|
||||||
|
except IndexError:
|
||||||
|
web_page = urlparse.unquote(urls[1])
|
||||||
|
|
||||||
img_tbn = urlparse.unquote(item.find('a').find('img')['src'])
|
img_tbn = urlparse.unquote(item.find('a').find('img')['src'])
|
||||||
|
|
||||||
results.append({
|
results.append({
|
||||||
'domain': urlparse.urlparse(webpage).netloc,
|
'domain': urlparse.urlparse(web_page).netloc,
|
||||||
'img_url': img_url,
|
'img_url': img_url,
|
||||||
'webpage': webpage,
|
'web_page': web_page,
|
||||||
'img_tbn': img_tbn
|
'img_tbn': img_tbn
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -64,7 +64,7 @@
|
||||||
<table class="TxbwNb">
|
<table class="TxbwNb">
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<a href="{{ results[(i*4)+j].webpage }}">
|
<a href="{{ results[(i*4)+j].web_page }}">
|
||||||
<div class="RAyV4b">
|
<div class="RAyV4b">
|
||||||
<img alt="" class="t0fcAb" src="{{ results[(i*4)+j].img_tbn }}"/>
|
<img alt="" class="t0fcAb" src="{{ results[(i*4)+j].img_tbn }}"/>
|
||||||
</div>
|
</div>
|
||||||
|
@ -73,7 +73,7 @@
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<a href="{{ results[(i*4)+j].webpage }}">
|
<a href="{{ results[(i*4)+j].web_page }}">
|
||||||
<div class="Tor4Ec">
|
<div class="Tor4Ec">
|
||||||
<span class="qXLe6d x3G5ab">
|
<span class="qXLe6d x3G5ab">
|
||||||
<span class="fYyStc">
|
<span class="fYyStc">
|
||||||
|
|
Loading…
Reference in New Issue