diff --git a/app/filter.py b/app/filter.py index cd07318..92e5a9d 100644 --- a/app/filter.py +++ b/app/filter.py @@ -119,6 +119,7 @@ class Filter: page_url='', query='', mobile=False) -> None: + self.soup = None self.config = config self.mobile = mobile self.user_key = user_key @@ -149,46 +150,50 @@ class Filter: return Fernet(self.user_key).encrypt(path.encode()).decode() def clean(self, soup) -> BeautifulSoup: - self.main_divs = soup.find('div', {'id': 'main'}) + self.soup = soup + self.main_divs = self.soup.find('div', {'id': 'main'}) self.remove_ads() self.remove_block_titles() self.remove_block_url() self.collapse_sections() - self.update_css(soup) - self.update_styling(soup) - self.remove_block_tabs(soup) + self.update_css() + self.update_styling() + self.remove_block_tabs() - for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]: + for img in [_ for _ in self.soup.find_all('img') if 'src' in _.attrs]: self.update_element_src(img, 'image/png') - for audio in [_ for _ in soup.find_all('audio') if 'src' in _.attrs]: + for audio in [_ for _ in self.soup.find_all('audio') if 'src' in _.attrs]: self.update_element_src(audio, 'audio/mpeg') - for link in soup.find_all('a', href=True): + for link in self.soup.find_all('a', href=True): self.update_link(link) - input_form = soup.find('form') + if self.config.alts: + self.site_alt_swap() + + input_form = self.soup.find('form') if input_form is not None: input_form['method'] = 'GET' if self.config.get_only else 'POST' # Use a relative URI for submissions input_form['action'] = 'search' # Ensure no extra scripts passed through - for script in soup('script'): + for script in self.soup('script'): script.decompose() # Update default footer and header - footer = soup.find('footer') + footer = self.soup.find('footer') if footer: # Remove divs that have multiple links beyond just page navigation [_.decompose() for _ in footer.find_all('div', recursive=False) if len(_.find_all('a', href=True)) > 3] - header = soup.find('header') + header = self.soup.find('header') if header: header.decompose() - self.remove_site_blocks(soup) - return soup + self.remove_site_blocks(self.soup) + return self.soup def remove_site_blocks(self, soup) -> None: if not self.config.block or not soup.body: @@ -233,7 +238,7 @@ class Filter: if block_url.search(_.attrs['href']) is not None] _ = div.decompose() if len(block_divs) else None - def remove_block_tabs(self, soup) -> None: + def remove_block_tabs(self) -> None: if self.main_divs: for div in self.main_divs.find_all( 'div', @@ -242,7 +247,7 @@ class Filter: _ = div.decompose() else: # when in images tab - for div in soup.find_all( + for div in self.soup.find_all( 'div', attrs={'class': f'{GClasses.images_tbm_tab}'} ): @@ -369,7 +374,7 @@ class Filter: ) + '&type=' + urlparse.quote(mime) ) - def update_css(self, soup) -> None: + def update_css(self) -> None: """Updates URLs used in inline styles to be proxied by Whoogle using the /element endpoint. @@ -378,7 +383,7 @@ class Filter: """ # Filter all