Quick improvement to ad removal
parent
cb18bc6ccc
commit
71ba00785f
|
@ -111,10 +111,9 @@ class Filter:
|
|||
return
|
||||
result_divs = main_divs.find_all('div', recursive=False)
|
||||
|
||||
# Only ads/sponsored content use classes in the list of result divs
|
||||
ad_divs = [ad_div for ad_div in result_divs if 'class' in ad_div.attrs]
|
||||
for div in ad_divs:
|
||||
div.decompose()
|
||||
for div in [_ for _ in result_divs]:
|
||||
has_ad = len([_ for _ in div.find_all('span', recursive=True) if 'ad' == _.text.lower()])
|
||||
_ = div.decompose() if has_ad else None
|
||||
|
||||
def update_image_paths(self, soup):
|
||||
for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
|
||||
|
|
Loading…
Reference in New Issue