Improve static typing throughout repo

Eventually this should be part of a separate mypy ci build, but right now it's just a general guideline. Future commits and PRs should be validated for static typing wherever possible. For reference, the testing commands used for this commit were: mypy --ignore-missing-imports --pretty --disallow-untyped-calls app/ mypy --ignore-missing-imports --pretty --disallow-untyped-calls test/
2021-03-24 15:13:52 -04:00 · 2021-03-24 15:13:52 -04:00 · 8ad8e66d37
parent 892b646a4e
commit 8ad8e66d37
5 changed files with 86 additions and 42 deletions
--- a/app/filter.py
+++ b/app/filter.py
@ -1,6 +1,7 @@
 from app.request import VALID_PARAMS
 from app.utils.results import *
-from bs4.element import ResultSet
+from bs4 import BeautifulSoup
 from bs4.element import ResultSet, Tag
 from cryptography.fernet import Fernet
 import re
 import urllib.parse as urlparse
@ -8,7 +9,7 @@ from urllib.parse import parse_qs
 class Filter:
-    def __init__(self, user_keys: dict, mobile=False, config=None):
+    def __init__(self, user_keys: dict, mobile=False, config=None) -> None:
        if config is None:
            config = {}
@ -29,7 +30,7 @@ class Filter:
    def elements(self):
        return self._elements
-    def reskin(self, page):
+    def reskin(self, page: str) -> str:
        # Aesthetic only re-skinning
        if self.dark:
            page = page.replace(
@ -39,22 +40,22 @@ class Filter:
        return page
-    def encrypt_path(self, msg, is_element=False):
+    def encrypt_path(self, path, is_element=False) -> str:
        # Encrypts path to avoid plaintext results in logs
        if is_element:
            # Element paths are encrypted separately from text, to allow key
            # regeneration once all items have been served to the user
            enc_path = Fernet(
                self.user_keys['element_key']
-            ).encrypt(msg.encode()).decode()
+            ).encrypt(path.encode()).decode()
            self._elements += 1
            return enc_path
        return Fernet(
            self.user_keys['text_key']
-        ).encrypt(msg.encode()).decode()
+        ).encrypt(path.encode()).decode()
-    def clean(self, soup):
+    def clean(self, soup) -> BeautifulSoup:
        self.main_divs = soup.find('div', {'id': 'main'})
        self.remove_ads()
        self.fix_question_section()
@ -90,7 +91,12 @@ class Filter:
        return soup
-    def remove_ads(self):
+    def remove_ads(self) -> None:
        """Removes ads found in the list of search result divs
        Returns:
            None (The soup object is modified directly)
        """
        if not self.main_divs:
            return
@ -99,7 +105,16 @@ class Filter:
                       if has_ad_content(_.text)]
            _ = div.decompose() if len(div_ads) else None
-    def fix_question_section(self):
+    def fix_question_section(self) -> None:
        """Collapses the "People Also Asked" section into a "details" element
        These sections are typically the only sections in the results page that
        are structured as <div><h2>Title</h2><div>...</div></div>, so they are
        extracted by checking all result divs for h2 children.
        Returns:
            None (The soup object is modified directly)
        """
        if not self.main_divs:
            return
@ -126,7 +141,14 @@ class Filter:
            for question in questions:
                question['style'] = 'padding: 10px; font-style: italic;'
-    def update_element_src(self, element, mime):
+    def update_element_src(self, element: Tag, mime: str) -> None:
        """Encrypts the original src of an element and rewrites the element src
        to use the "/element?src=" pass-through.
        Returns:
            None (The soup element is modified directly)
        """
        src = element['src']
        if src.startswith('//'):
@ -145,7 +167,8 @@ class Filter:
            src,
            is_element=True) + '&type=' + urlparse.quote(mime)
-    def update_styling(self, soup):
+    def update_styling(self, soup) -> None:
        """"""
        # Remove unnecessary button(s)
        for button in soup.find_all('button'):
            button.decompose()
@ -168,7 +191,17 @@ class Filter:
        except AttributeError:
            pass
-    def update_link(self, link):
+    def update_link(self, link: Tag) -> None:
        """Update internal link paths with encrypted path, otherwise remove
        unnecessary redirects and/or marketing params from the url
        Args:
            link: A bs4 Tag element to inspect and update
        Returns:
            None (the tag is updated directly)
        """
        # Replace href with only the intended destination (no "utm" type tags)
        href = link['href'].replace('https://www.google.com', '')
        if 'advanced_search' in href or 'tbm=shop' in href:
--- a/app/request.py
+++ b/app/request.py
@ -29,10 +29,10 @@ class TorError(Exception):
            altogether).
    """
-    def __init__(self, message, disable=False):
+    def __init__(self, message, disable=False) -> None:
        self.message = message
        self.disable = disable
-        super().__init__(self.message)
+        super().__init__(message)
 def send_tor_signal(signal: Signal) -> bool:
@ -64,7 +64,7 @@ def gen_query(query, args, config, near_city=None) -> str:
    # Use :past(hour/day/week/month/year) if available
    # example search "new restaurants :past month"
-    sub_lang = ''
+    lang = ''
    if ':past' in query and 'tbs' not in args:
        time_range = str.strip(query.split(':past', 1)[-1])
        param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0]))
@ -79,9 +79,10 @@ def gen_query(query, args, config, near_city=None) -> str:
        # Example:
        # &tbs=qdr:h,lr:lang_1pl
        # -- the lr param needs to be extracted and remove the leading '1'
-        sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
+        result_params = [_ for _ in result_tbs.split(',') if 'lr:' in _]
-        sub_lang = sub_lang[0][sub_lang[0].find('lr:') +
+        if len(result_params) > 0:
-                               3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
+            result_param = result_params[0]
            lang = result_param[result_param.find('lr:') + 3:len(result_param)]
    # Ensure search query is parsable
    query = urlparse.quote(query)
@ -103,8 +104,8 @@ def gen_query(query, args, config, near_city=None) -> str:
    if 'source' in args:
        param_dict['source'] = '&source=' + args.get('source')
        param_dict['lr'] = ('&lr=' + ''.join(
-            [_ for _ in sub_lang if not _.isdigit()]
+            [_ for _ in lang if not _.isdigit()]
-        )) if sub_lang else ''
+        )) if lang else ''
    else:
        param_dict['lr'] = (
                '&lr=' + config.lang_search
@ -150,12 +151,12 @@ class Request:
        # Set up proxy, if previously configured
        if os.environ.get('WHOOGLE_PROXY_LOC'):
            auth_str = ''
-            if os.environ.get('WHOOGLE_PROXY_USER'):
+            if os.environ.get('WHOOGLE_PROXY_USER', ''):
-                auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \
+                auth_str = os.environ.get('WHOOGLE_PROXY_USER', '') + \
-                           ':' + os.environ.get('WHOOGLE_PROXY_PASS')
+                           ':' + os.environ.get('WHOOGLE_PROXY_PASS', '')
            self.proxies = {
-                'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' +
+                'http': os.environ.get('WHOOGLE_PROXY_TYPE', '') + '://' +
-                auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'),
+                auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC', ''),
            }
            self.proxies['https'] = self.proxies['http'].replace('http',
                                                                 'https')
--- a/app/routes.py
+++ b/app/routes.py
@ -347,7 +347,7 @@ def window():
    return render_template('display.html', response=results)
-def run_app():
+def run_app() -> None:
    parser = argparse.ArgumentParser(
        description='Whoogle Search console runner')
    parser.add_argument(
--- a/app/utils/results.py
+++ b/app/utils/results.py
@ -57,6 +57,7 @@ def get_first_link(soup: BeautifulSoup) -> str:
        # Return the first search result URL
        if 'url?q=' in a['href']:
            return filter_link_args(a['href'])
    return ''
 def get_site_alt(link: str) -> str:
--- a/app/utils/search.py
+++ b/app/utils/search.py
@ -24,15 +24,24 @@ def needs_https(url: str) -> bool:
        bool: True/False representing the need to upgrade
    """
-    https_only = os.getenv('HTTPS_ONLY', False)
+    https_only = bool(os.getenv('HTTPS_ONLY', 0))
    is_heroku = url.endswith('.herokuapp.com')
    is_http = url.startswith('http://')
    return (is_heroku and is_http) or (https_only and is_http)
-def has_captcha(site_contents: str) -> bool:
+def has_captcha(results: str) -> bool:
-    return CAPTCHA in site_contents
+    """Checks to see if the search results are blocked by a captcha
    Args:
        results: The search page html as a string
    Returns:
        bool: True/False indicating if a captcha element was found
    """
    return CAPTCHA in results
 class Search:
@ -118,23 +127,23 @@ class Search:
        """
        mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
-        content_filter = Filter(
+        content_filter = Filter(self.session['fernet_keys'],
-            self.session['fernet_keys'],
+                                mobile=mobile,
-            mobile=mobile,
+                                config=self.config)
-            config=self.config)
+        full_query = gen_query(self.query,
-        full_query = gen_query(
+                               self.request_params,
-            self.query,
+                               self.config,
-            self.request_params,
+                               content_filter.near)
            self.config,
            content_filter.near)
        get_body = g.user_request.send(query=full_query)
        # Produce cleanable html soup from response
        html_soup = bsoup(content_filter.reskin(get_body.text), 'html.parser')
-        html_soup.insert(
+
-            0,
+        # Indicate whether or not a Tor connection is active
-            bsoup(TOR_BANNER, 'html.parser')
+        tor_banner = bsoup('', 'html.parser')
-            if g.user_request.tor_valid else bsoup('', 'html.parser'))
+        if g.user_request.tor_valid:
            tor_banner = bsoup(TOR_BANNER, 'html.parser')
        html_soup.insert(0, tor_banner)
        if self.feeling_lucky:
            return get_first_link(html_soup), 0