PEP-8: Fix formatting issues, add CI workflow (#161)

Enforces PEP-8 formatting for all python code

Adds a github action build for checking pep8 formatting using pycodestyle
main
Ben Busby 2020-12-17 16:06:47 -05:00 committed by GitHub
parent b55aad3fdf
commit 375f4ee9fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 329 additions and 157 deletions

22
.github/workflows/pep8.yml vendored Normal file
View File

@ -0,0 +1,22 @@
name: pep8
on:
push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pycodestyle
- name: Run pycodestyle
run: |
pycodestyle --show-source --show-pep8 app/*
pycodestyle --show-source --show-pep8 test/*

View File

@ -6,20 +6,35 @@ from flask_session import Session
import os import os
from stem import Signal from stem import Signal
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static') app = Flask(__name__, static_folder=os.path.dirname(
os.path.abspath(__file__)) + '/static')
app.user_elements = {} app.user_elements = {}
app.default_key_set = generate_user_keys() app.default_key_set = generate_user_keys()
app.no_cookie_ips = [] app.no_cookie_ips = []
app.config['SECRET_KEY'] = os.urandom(32) app.config['SECRET_KEY'] = os.urandom(32)
app.config['SESSION_TYPE'] = 'filesystem' app.config['SESSION_TYPE'] = 'filesystem'
app.config['VERSION_NUMBER'] = '0.2.1' app.config['VERSION_NUMBER'] = '0.2.1'
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['APP_ROOT'] = os.getenv(
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) 'APP_ROOT',
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config')) os.path.dirname(os.path.abspath(__file__)))
app.config['DEFAULT_CONFIG'] = os.path.join(app.config['CONFIG_PATH'], 'config.json') app.config['STATIC_FOLDER'] = os.getenv(
app.config['SESSION_FILE_DIR'] = os.path.join(app.config['CONFIG_PATH'], 'session') 'STATIC_FOLDER',
app.config['BANG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'bangs')) os.path.join(app.config['APP_ROOT'], 'static'))
app.config['BANG_FILE'] = os.path.join(app.config['BANG_PATH'], 'bangs.json') app.config['CONFIG_PATH'] = os.getenv(
'CONFIG_VOLUME',
os.path.join(app.config['STATIC_FOLDER'], 'config'))
app.config['DEFAULT_CONFIG'] = os.path.join(
app.config['CONFIG_PATH'],
'config.json')
app.config['SESSION_FILE_DIR'] = os.path.join(
app.config['CONFIG_PATH'],
'session')
app.config['BANG_PATH'] = os.getenv(
'CONFIG_VOLUME',
os.path.join(app.config['STATIC_FOLDER'], 'bangs'))
app.config['BANG_FILE'] = os.path.join(
app.config['BANG_PATH'],
'bangs.json')
if not os.path.exists(app.config['CONFIG_PATH']): if not os.path.exists(app.config['CONFIG_PATH']):
os.makedirs(app.config['CONFIG_PATH']) os.makedirs(app.config['CONFIG_PATH'])
@ -38,4 +53,4 @@ Session(app)
# Attempt to acquire tor identity, to determine if Tor config is available # Attempt to acquire tor identity, to determine if Tor config is available
send_tor_signal(Signal.HEARTBEAT) send_tor_signal(Signal.HEARTBEAT)
from app import routes from app import routes # noqa

View File

@ -32,20 +32,27 @@ class Filter:
def reskin(self, page): def reskin(self, page):
# Aesthetic only re-skinning # Aesthetic only re-skinning
if self.dark: if self.dark:
page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea') page = page.replace(
'fff', '000').replace(
'202124', 'ddd').replace(
'1967D2', '3b85ea')
return page return page
def encrypt_path(self, msg, is_element=False): def encrypt_path(self, msg, is_element=False):
# Encrypts path to avoid plaintext results in logs # Encrypts path to avoid plaintext results in logs
if is_element: if is_element:
# Element paths are tracked differently in order for the element key to be regenerated # Element paths are encrypted separately from text, to allow key
# once all elements have been loaded # regeneration once all items have been served to the user
enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode() enc_path = Fernet(
self.user_keys['element_key']
).encrypt(msg.encode()).decode()
self._elements += 1 self._elements += 1
return enc_path return enc_path
return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode() return Fernet(
self.user_keys['text_key']
).encrypt(msg.encode()).decode()
def clean(self, soup): def clean(self, soup):
self.main_divs = soup.find('div', {'id': 'main'}) self.main_divs = soup.find('div', {'id': 'main'})
@ -74,8 +81,8 @@ class Filter:
footer = soup.find('footer') footer = soup.find('footer')
if footer: if footer:
# Remove divs that have multiple links beyond just page navigation # Remove divs that have multiple links beyond just page navigation
[_.decompose() for _ in footer.find_all('div', recursive=False) [_.decompose() for _ in footer.find_all('div', recursive=False)
if len(_.find_all('a', href=True)) > 3] if len(_.find_all('a', href=True)) > 3]
header = soup.find('header') header = soup.find('header')
if header: if header:
@ -88,8 +95,9 @@ class Filter:
return return
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
has_ad = len([_ for _ in div.find_all('span', recursive=True) if has_ad_content(_.text)]) div_ads = [_ for _ in div.find_all('span', recursive=True)
_ = div.decompose() if has_ad else None if has_ad_content(_.text)]
_ = div.decompose() if len(div_ads) else None
def fix_question_section(self): def fix_question_section(self):
if not self.main_divs: if not self.main_divs:
@ -97,14 +105,14 @@ class Filter:
question_divs = [_ for _ in self.main_divs.find_all( question_divs = [_ for _ in self.main_divs.find_all(
'div', recursive=False 'div', recursive=False
) if len(_.find_all('h2')) > 0] ) if len(_.find_all('h2')) > 0]
if len(question_divs) == 0: if len(question_divs) == 0:
return return
# Wrap section in details element to allow collapse/expand # Wrap section in details element to allow collapse/expand
details = BeautifulSoup(features='lxml').new_tag('details') details = BeautifulSoup('html.parser').new_tag('details')
summary = BeautifulSoup(features='lxml').new_tag('summary') summary = BeautifulSoup('html.parser').new_tag('summary')
summary.string = question_divs[0].find('h2').text summary.string = question_divs[0].find('h2').text
question_divs[0].find('h2').decompose() question_divs[0].find('h2').decompose()
details.append(summary) details.append(summary)
@ -113,7 +121,7 @@ class Filter:
for question_div in question_divs: for question_div in question_divs:
questions = [_ for _ in question_div.find_all( questions = [_ for _ in question_div.find_all(
'div', recursive=True 'div', recursive=True
) if _.text.endswith('?')] ) if _.text.endswith('?')]
for question in questions: for question in questions:
question['style'] = 'padding: 10px; font-style: italic;' question['style'] = 'padding: 10px; font-style: italic;'
@ -131,11 +139,15 @@ class Filter:
element['src'] = BLANK_B64 element['src'] = BLANK_B64
return return
element['src'] = 'element?url=' + self.encrypt_path(element_src, is_element=True) + \ element['src'] = 'element?url=' + self.encrypt_path(
'&type=' + urlparse.quote(mime) element_src,
# TODO: Non-mobile image results link to website instead of image is_element=True) + '&type=' + urlparse.quote(mime)
# FIXME: Non-mobile image results link to website instead of image
# if not self.mobile: # if not self.mobile:
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser')) # img.append(
# BeautifulSoup(FULL_RES_IMG.format(element_src),
# 'html.parser'))
def update_styling(self, soup): def update_styling(self, soup):
# Remove unnecessary button(s) # Remove unnecessary button(s)
@ -149,8 +161,9 @@ class Filter:
# Update logo # Update logo
logo = soup.find('a', {'class': 'l'}) logo = soup.find('a', {'class': 'l'})
if logo and self.mobile: if logo and self.mobile:
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \ logo['style'] = ('display:flex; justify-content:center; '
'font-size:18px; ' 'align-items:center; color:#685e79; '
'font-size:18px; ')
# Fix search bar length on mobile # Fix search bar length on mobile
try: try:
@ -163,7 +176,7 @@ class Filter:
# Replace href with only the intended destination (no "utm" type tags) # Replace href with only the intended destination (no "utm" type tags)
href = link['href'].replace('https://www.google.com', '') href = link['href'].replace('https://www.google.com', '')
if 'advanced_search' in href or 'tbm=shop' in href: if 'advanced_search' in href or 'tbm=shop' in href:
# TODO: The "Shopping" tab requires further filtering (see #136) # FIXME: The "Shopping" tab requires further filtering (see #136)
# Temporarily removing all links to that tab for now. # Temporarily removing all links to that tab for now.
link.decompose() link.decompose()
return return
@ -171,20 +184,26 @@ class Filter:
link['target'] = '_blank' link['target'] = '_blank'
result_link = urlparse.urlparse(href) result_link = urlparse.urlparse(href)
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' query_link = parse_qs(
result_link.query
)['q'][0] if '?q=' in href else ''
if query_link.startswith('/'): if query_link.startswith('/'):
# Internal google links (i.e. mail, maps, etc) should still be forwarded to Google # Internal google links (i.e. mail, maps, etc) should still
# be forwarded to Google
link['href'] = 'https://google.com' + query_link link['href'] = 'https://google.com' + query_link
elif '/search?q=' in href: elif '/search?q=' in href:
# "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes # "li:1" implies the query should be interpreted verbatim,
# which is accomplished by wrapping the query in double quotes
if 'li:1' in href: if 'li:1' in href:
query_link = '"' + query_link + '"' query_link = '"' + query_link + '"'
new_search = 'search?q=' + self.encrypt_path(query_link) new_search = 'search?q=' + self.encrypt_path(query_link)
query_params = parse_qs(urlparse.urlparse(href).query) query_params = parse_qs(urlparse.urlparse(href).query)
for param in VALID_PARAMS: for param in VALID_PARAMS:
param_val = query_params[param][0] if param in query_params else '' if param not in query_params:
continue
param_val = query_params[param][0]
new_search += '&' + param + '=' + param_val new_search += '&' + param + '=' + param_val
link['href'] = new_search link['href'] = new_search
elif 'url?q=' in href: elif 'url?q=' in href:
@ -199,9 +218,11 @@ class Filter:
# Replace link location if "alts" config is enabled # Replace link location if "alts" config is enabled
if self.alt_redirect: if self.alt_redirect:
# Search and replace all link descriptions with alternative location # Search and replace all link descriptions
# with alternative location
link['href'] = get_site_alt(link['href']) link['href'] = get_site_alt(link['href'])
link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys()))) link_desc = link.find_all(
text=re.compile('|'.join(SITE_ALTS.keys())))
if len(link_desc) == 0: if len(link_desc) == 0:
return return

View File

@ -128,7 +128,7 @@ class Config:
{'name': 'Fiji', 'value': 'countryFJ'}, {'name': 'Fiji', 'value': 'countryFJ'},
{'name': 'Finland', 'value': 'countryFI'}, {'name': 'Finland', 'value': 'countryFI'},
{'name': 'France', 'value': 'countryFR'}, {'name': 'France', 'value': 'countryFR'},
{'name': 'France\, Metropolitan', 'value': 'countryFX'}, {'name': r'France\, Metropolitan', 'value': 'countryFX'},
{'name': 'French Guiana', 'value': 'countryGF'}, {'name': 'French Guiana', 'value': 'countryGF'},
{'name': 'French Polynesia', 'value': 'countryPF'}, {'name': 'French Polynesia', 'value': 'countryPF'},
{'name': 'French Southern Territories', 'value': 'countryTF'}, {'name': 'French Southern Territories', 'value': 'countryTF'},
@ -167,7 +167,8 @@ class Config:
{'name': 'Kazakhstan', 'value': 'countryKZ'}, {'name': 'Kazakhstan', 'value': 'countryKZ'},
{'name': 'Kenya', 'value': 'countryKE'}, {'name': 'Kenya', 'value': 'countryKE'},
{'name': 'Kiribati', 'value': 'countryKI'}, {'name': 'Kiribati', 'value': 'countryKI'},
{'name': 'Korea, Democratic People\'s Republic of', 'value': 'countryKP'}, {'name': 'Korea, Democratic People\'s Republic of',
'value': 'countryKP'},
{'name': 'Korea, Republic of', 'value': 'countryKR'}, {'name': 'Korea, Republic of', 'value': 'countryKR'},
{'name': 'Kuwait', 'value': 'countryKW'}, {'name': 'Kuwait', 'value': 'countryKW'},
{'name': 'Kyrgyzstan', 'value': 'countryKG'}, {'name': 'Kyrgyzstan', 'value': 'countryKG'},
@ -181,7 +182,8 @@ class Config:
{'name': 'Lithuania', 'value': 'countryLT'}, {'name': 'Lithuania', 'value': 'countryLT'},
{'name': 'Luxembourg', 'value': 'countryLU'}, {'name': 'Luxembourg', 'value': 'countryLU'},
{'name': 'Macao', 'value': 'countryMO'}, {'name': 'Macao', 'value': 'countryMO'},
{'name': 'Macedonia, the Former Yugosalv Republic of', 'value': 'countryMK'}, {'name': 'Macedonia, the Former Yugosalv Republic of',
'value': 'countryMK'},
{'name': 'Madagascar', 'value': 'countryMG'}, {'name': 'Madagascar', 'value': 'countryMG'},
{'name': 'Malawi', 'value': 'countryMW'}, {'name': 'Malawi', 'value': 'countryMW'},
{'name': 'Malaysia', 'value': 'countryMY'}, {'name': 'Malaysia', 'value': 'countryMY'},
@ -253,7 +255,8 @@ class Config:
{'name': 'Solomon Islands', 'value': 'countrySB'}, {'name': 'Solomon Islands', 'value': 'countrySB'},
{'name': 'Somalia', 'value': 'countrySO'}, {'name': 'Somalia', 'value': 'countrySO'},
{'name': 'South Africa', 'value': 'countryZA'}, {'name': 'South Africa', 'value': 'countryZA'},
{'name': 'South Georgia and the South Sandwich Islands', 'value': 'countryGS'}, {'name': 'South Georgia and the South Sandwich Islands',
'value': 'countryGS'},
{'name': 'Spain', 'value': 'countryES'}, {'name': 'Spain', 'value': 'countryES'},
{'name': 'Sri Lanka', 'value': 'countryLK'}, {'name': 'Sri Lanka', 'value': 'countryLK'},
{'name': 'Sudan', 'value': 'countrySD'}, {'name': 'Sudan', 'value': 'countrySD'},
@ -310,6 +313,12 @@ class Config:
self.alts = False self.alts = False
self.new_tab = False self.new_tab = False
self.get_only = False self.get_only = False
self.safe_keys = [
'lang_search',
'lang_interface',
'ctry',
'dark'
]
for key, value in kwargs.items(): for key, value in kwargs.items():
setattr(self, key, value) setattr(self, key, value)
@ -338,12 +347,7 @@ class Config:
array array
""" """
return key in [ return key in self.safe_keys
'lang_search',
'lang_interface',
'ctry',
'dark'
]
def from_params(self, params) -> 'Config': def from_params(self, params) -> 'Config':
"""Modify user config with search parameters. This is primarily """Modify user config with search parameters. This is primarily

View File

@ -8,9 +8,9 @@ import os
from stem import Signal, SocketError from stem import Signal, SocketError
from stem.control import Controller from stem.control import Controller
# Core Google search URLs
SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
AUTOCOMPLETE_URL = 'https://suggestqueries.google.com/complete/search?client=toolbar&' AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/'
'complete/search?client=toolbar&')
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
@ -72,11 +72,16 @@ def gen_query(query, args, config, near_city=None) -> str:
result_tbs = args.get('tbs') result_tbs = args.get('tbs')
param_dict['tbs'] = '&tbs=' + result_tbs param_dict['tbs'] = '&tbs=' + result_tbs
# Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted # Occasionally the 'tbs' param provided by google also contains a
# strangely. This is a (admittedly not very elegant) solution for this. # field for 'lr', but formatted strangely. This is a rough solution
# Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case # for this.
#
# Example:
# &tbs=qdr:h,lr:lang_1pl
# -- the lr param needs to be extracted and remove the leading '1'
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _] sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else '' sub_lang = sub_lang[0][sub_lang[0].find('lr:') +
3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
# Ensure search query is parsable # Ensure search query is parsable
query = urlparse.quote(query) query = urlparse.quote(query)
@ -93,20 +98,26 @@ def gen_query(query, args, config, near_city=None) -> str:
if near_city: if near_city:
param_dict['near'] = '&near=' + urlparse.quote(near_city) param_dict['near'] = '&near=' + urlparse.quote(near_city)
# Set language for results (lr) if source isn't set, otherwise use the result # Set language for results (lr) if source isn't set, otherwise use the
# language param provided by google (but with the strange digit(s) removed) # result language param provided in the results
if 'source' in args: if 'source' in args:
param_dict['source'] = '&source=' + args.get('source') param_dict['source'] = '&source=' + args.get('source')
param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else '' param_dict['lr'] = ('&lr=' + ''.join(
[_ for _ in sub_lang if not _.isdigit()]
)) if sub_lang else ''
else: else:
param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else '' param_dict['lr'] = (
'&lr=' + config.lang_search
) if config.lang_search else ''
# Set autocorrected search ignore # 'nfpr' defines the exclusion of results from an auto-corrected query
if 'nfpr' in args: if 'nfpr' in args:
param_dict['nfpr'] = '&nfpr=' + args.get('nfpr') param_dict['nfpr'] = '&nfpr=' + args.get('nfpr')
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else '' param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else '' param_dict['hl'] = (
'&hl=' + config.lang_interface.replace('lang_', '')
) if config.lang_interface else ''
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
for val in param_dict.values(): for val in param_dict.values():
@ -126,6 +137,7 @@ class Request:
root_path -- the root path of the whoogle instance root_path -- the root path of the whoogle instance
config -- the user's current whoogle configuration config -- the user's current whoogle configuration
""" """
def __init__(self, normal_ua, root_path, config: Config): def __init__(self, normal_ua, root_path, config: Config):
# Send heartbeat to Tor, used in determining if the user can or cannot # Send heartbeat to Tor, used in determining if the user can or cannot
# enable Tor for future requests # enable Tor for future requests
@ -143,9 +155,10 @@ class Request:
':' + os.environ.get('WHOOGLE_PROXY_PASS') ':' + os.environ.get('WHOOGLE_PROXY_PASS')
self.proxies = { self.proxies = {
'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' + 'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' +
auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'), auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'),
} }
self.proxies['https'] = self.proxies['http'].replace('http', 'https') self.proxies['https'] = self.proxies['http'].replace('http',
'https')
else: else:
self.proxies = { self.proxies = {
'http': 'socks5://127.0.0.1:9050', 'http': 'socks5://127.0.0.1:9050',
@ -169,7 +182,8 @@ class Request:
""" """
ac_query = dict(hl=self.language, q=query) ac_query = dict(hl=self.language, q=query)
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text response = self.send(base_url=AUTOCOMPLETE_URL,
query=urlparse.urlencode(ac_query)).text
if response: if response:
dom = etree.fromstring(response) dom = etree.fromstring(response)
@ -178,14 +192,14 @@ class Request:
return [] return []
def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response:
"""Sends an outbound request to a URL. Optionally sends the request using Tor, if """Sends an outbound request to a URL. Optionally sends the request
enabled by the user. using Tor, if enabled by the user.
Args: Args:
base_url: The URL to use in the request base_url: The URL to use in the request
query: The optional query string for the request query: The optional query string for the request
attempt: The number of attempts made for the request (used for cycling attempt: The number of attempts made for the request
through Tor identities, if enabled) (used for cycling through Tor identities, if enabled)
Returns: Returns:
Response: The Response object returned by the requests call Response: The Response object returned by the requests call
@ -195,21 +209,30 @@ class Request:
'User-Agent': self.modified_user_agent 'User-Agent': self.modified_user_agent
} }
# Validate Tor connection and request new identity if the last one failed # Validate Tor conn and request new identity if the last one failed
if self.tor and not send_tor_signal(Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT): if self.tor and not send_tor_signal(
raise TorError("Tor was previously enabled, but the connection has been dropped. Please check your " + Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT):
"Tor configuration and try again.", disable=True) raise TorError(
"Tor was previously enabled, but the connection has been "
"dropped. Please check your Tor configuration and try again.",
disable=True)
# Make sure that the tor connection is valid, if enabled # Make sure that the tor connection is valid, if enabled
if self.tor: if self.tor:
tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers) tor_check = requests.get('https://check.torproject.org/',
proxies=self.proxies, headers=headers)
self.tor_valid = 'Congratulations' in tor_check.text self.tor_valid = 'Congratulations' in tor_check.text
if not self.tor_valid: if not self.tor_valid:
raise TorError("Tor connection succeeded, but the connection could not be validated by torproject.org", raise TorError(
disable=True) "Tor connection succeeded, but the connection could not "
"be validated by torproject.org",
disable=True)
response = requests.get(base_url + query, proxies=self.proxies, headers=headers) response = requests.get(
base_url + query,
proxies=self.proxies,
headers=headers)
# Retry query with new identity if using Tor (max 10 attempts) # Retry query with new identity if using Tor (max 10 attempts)
if 'form id="captcha-form"' in response.text and self.tor: if 'form id="captcha-form"' in response.text and self.tor:

View File

@ -9,7 +9,8 @@ import uuid
from functools import wraps from functools import wraps
import waitress import waitress
from flask import jsonify, make_response, request, redirect, render_template, send_file, session, url_for from flask import jsonify, make_response, request, redirect, render_template, \
send_file, session, url_for
from requests import exceptions from requests import exceptions
from app import app from app import app
@ -30,23 +31,30 @@ def auth_required(f):
# Skip if username/password not set # Skip if username/password not set
whoogle_user = os.getenv('WHOOGLE_USER', '') whoogle_user = os.getenv('WHOOGLE_USER', '')
whoogle_pass = os.getenv('WHOOGLE_PASS', '') whoogle_pass = os.getenv('WHOOGLE_PASS', '')
if (not whoogle_user or not whoogle_pass) or \ if (not whoogle_user or not whoogle_pass) or (
(auth and whoogle_user == auth.username and whoogle_pass == auth.password): auth
and whoogle_user == auth.username
and whoogle_pass == auth.password):
return f(*args, **kwargs) return f(*args, **kwargs)
else: else:
return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}) return make_response('Not logged in', 401, {
'WWW-Authenticate': 'Basic realm="Login Required"'})
return decorated return decorated
@app.before_request @app.before_request
def before_request_func(): def before_request_func():
g.request_params = request.args if request.method == 'GET' else request.form g.request_params = (
request.args if request.method == 'GET' else request.form
)
g.cookies_disabled = False g.cookies_disabled = False
# Generate session values for user if unavailable # Generate session values for user if unavailable
if not valid_user_session(session): if not valid_user_session(session):
session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \ session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \
if os.path.exists(app.config['DEFAULT_CONFIG']) else {'url': request.url_root} if os.path.exists(app.config['DEFAULT_CONFIG']) else {
'url': request.url_root}
session['uuid'] = str(uuid.uuid4()) session['uuid'] = str(uuid.uuid4())
session['fernet_keys'] = generate_user_keys(True) session['fernet_keys'] = generate_user_keys(True)
@ -63,12 +71,16 @@ def before_request_func():
is_http = request.url.startswith('http://') is_http = request.url.startswith('http://')
if (is_heroku and is_http) or (https_only and is_http): if (is_heroku and is_http) or (https_only and is_http):
return redirect(request.url.replace('http://', 'https://', 1), code=308) return redirect(
request.url.replace('http://', 'https://', 1),
code=308)
g.user_config = Config(**session['config']) g.user_config = Config(**session['config'])
if not g.user_config.url: if not g.user_config.url:
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root g.user_config.url = request.url_root.replace(
'http://',
'https://') if https_only else request.url_root
g.user_request = Request( g.user_request = Request(
request.headers.get('User-Agent'), request.headers.get('User-Agent'),
@ -82,13 +94,17 @@ def before_request_func():
def after_request_func(response): def after_request_func(response):
if app.user_elements[session['uuid']] <= 0 and '/element' in request.url: if app.user_elements[session['uuid']] <= 0 and '/element' in request.url:
# Regenerate element key if all elements have been served to user # Regenerate element key if all elements have been served to user
session['fernet_keys']['element_key'] = '' if not g.cookies_disabled else app.default_key_set['element_key'] session['fernet_keys'][
'element_key'] = '' if not g.cookies_disabled else \
app.default_key_set['element_key']
app.user_elements[session['uuid']] = 0 app.user_elements[session['uuid']] = 0
# Check if address consistently has cookies blocked, in which case start removing session # Check if address consistently has cookies blocked,
# files after creation. # in which case start removing session files after creation.
# Note: This is primarily done to prevent overpopulation of session directories, since browsers that #
# block cookies will still trigger Flask's session creation routine with every request. # Note: This is primarily done to prevent overpopulation of session
# directories, since browsers that block cookies will still trigger
# Flask's session creation routine with every request.
if g.cookies_disabled and request.remote_addr not in app.no_cookie_ips: if g.cookies_disabled and request.remote_addr not in app.no_cookie_ips:
app.no_cookie_ips.append(request.remote_addr) app.no_cookie_ips.append(request.remote_addr)
elif g.cookies_disabled and request.remote_addr in app.no_cookie_ips: elif g.cookies_disabled and request.remote_addr in app.no_cookie_ips:
@ -101,6 +117,7 @@ def after_request_func(response):
@app.errorhandler(404) @app.errorhandler(404)
def unknown_page(e): def unknown_page(e):
app.logger.warn(e)
return redirect(g.app_location) return redirect(g.app_location)
@ -109,7 +126,8 @@ def unknown_page(e):
def index(): def index():
# Reset keys # Reset keys
session['fernet_keys'] = generate_user_keys(g.cookies_disabled) session['fernet_keys'] = generate_user_keys(g.cookies_disabled)
error_message = session['error_message'] if 'error_message' in session else '' error_message = session[
'error_message'] if 'error_message' in session else ''
session['error_message'] = '' session['error_message'] = ''
return render_template('index.html', return render_template('index.html',
@ -128,7 +146,8 @@ def opensearch():
if opensearch_url.endswith('/'): if opensearch_url.endswith('/'):
opensearch_url = opensearch_url[:-1] opensearch_url = opensearch_url[:-1]
get_only = g.user_config.get_only or 'Chrome' in request.headers.get('User-Agent') get_only = g.user_config.get_only or 'Chrome' in request.headers.get(
'User-Agent')
return render_template( return render_template(
'opensearch.xml', 'opensearch.xml',
@ -147,16 +166,23 @@ def autocomplete():
# Search bangs if the query begins with "!", but not "! " (feeling lucky) # Search bangs if the query begins with "!", but not "! " (feeling lucky)
if q.startswith('!') and len(q) > 1 and not q.startswith('! '): if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if _.startswith(q)]]) return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if
_.startswith(q)]])
if not q and not request.data: if not q and not request.data:
return jsonify({'?': []}) return jsonify({'?': []})
elif request.data: elif request.data:
q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', '')) q = urlparse.unquote_plus(
request.data.decode('utf-8').replace('q=', ''))
# Return a list of suggestions for the query # Return a list of suggestions for the query
# Note: If Tor is enabled, this returns nothing, as the request is almost always rejected #
return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []]) # Note: If Tor is enabled, this returns nothing, as the request is
# almost always rejected
return jsonify([
q,
g.user_request.autocomplete(q) if not g.user_config.tor else []
])
@app.route('/search', methods=['GET', 'POST']) @app.route('/search', methods=['GET', 'POST'])
@ -168,7 +194,8 @@ def search():
# Update user config if specified in search args # Update user config if specified in search args
g.user_config = g.user_config.from_params(g.request_params) g.user_config = g.user_config.from_params(g.request_params)
search_util = RoutingUtils(request, g.user_config, session, cookies_disabled=g.cookies_disabled) search_util = RoutingUtils(request, g.user_config, session,
cookies_disabled=g.cookies_disabled)
query = search_util.new_search_query() query = search_util.new_search_query()
resolved_bangs = search_util.bang_operator(bang_json) resolved_bangs = search_util.bang_operator(bang_json)
@ -183,14 +210,17 @@ def search():
try: try:
response, elements = search_util.generate_response() response, elements = search_util.generate_response()
except TorError as e: except TorError as e:
session['error_message'] = e.message + ("\\n\\nTor config is now disabled!" if e.disable else "") session['error_message'] = e.message + (
session['config']['tor'] = False if e.disable else session['config']['tor'] "\\n\\nTor config is now disabled!" if e.disable else "")
session['config']['tor'] = False if e.disable else session['config'][
'tor']
return redirect(url_for('.index')) return redirect(url_for('.index'))
if search_util.feeling_lucky or elements < 0: if search_util.feeling_lucky or elements < 0:
return redirect(response, code=303) return redirect(response, code=303)
# Keep count of external elements to fetch before element key can be regenerated # Keep count of external elements to fetch before
# the element key can be regenerated
app.user_elements[session['uuid']] = elements app.user_elements[session['uuid']] = elements
return render_template( return render_template(
@ -200,12 +230,13 @@ def search():
dark_mode=g.user_config.dark, dark_mode=g.user_config.dark,
response=response, response=response,
version_number=app.config['VERSION_NUMBER'], version_number=app.config['VERSION_NUMBER'],
search_header=render_template( search_header=(render_template(
'header.html', 'header.html',
dark_mode=g.user_config.dark, dark_mode=g.user_config.dark,
query=urlparse.unquote(query), query=urlparse.unquote(query),
search_type=search_util.search_type, search_type=search_util.search_type,
mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '') mobile=g.user_request.mobile)
if 'isch' not in search_util.search_type else ''))
@app.route('/config', methods=['GET', 'POST', 'PUT']) @app.route('/config', methods=['GET', 'POST', 'PUT'])
@ -215,8 +246,12 @@ def config():
return json.dumps(g.user_config.__dict__) return json.dumps(g.user_config.__dict__)
elif request.method == 'PUT': elif request.method == 'PUT':
if 'name' in request.args: if 'name' in request.args:
config_pkl = os.path.join(app.config['CONFIG_PATH'], request.args.get('name')) config_pkl = os.path.join(
session['config'] = pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_pkl) else session['config'] app.config['CONFIG_PATH'],
request.args.get('name'))
session['config'] = (pickle.load(open(config_pkl, 'rb'))
if os.path.exists(config_pkl)
else session['config'])
return json.dumps(session['config']) return json.dumps(session['config'])
else: else:
return json.dumps({}) return json.dumps({})
@ -227,11 +262,16 @@ def config():
# Save config by name to allow a user to easily load later # Save config by name to allow a user to easily load later
if 'name' in request.args: if 'name' in request.args:
pickle.dump(config_data, open(os.path.join(app.config['CONFIG_PATH'], request.args.get('name')), 'wb')) pickle.dump(
config_data,
open(os.path.join(
app.config['CONFIG_PATH'],
request.args.get('name')), 'wb'))
# Overwrite default config if user has cookies disabled # Overwrite default config if user has cookies disabled
if g.cookies_disabled: if g.cookies_disabled:
open(app.config['DEFAULT_CONFIG'], 'w').write(json.dumps(config_data, indent=4)) open(app.config['DEFAULT_CONFIG'], 'w').write(
json.dumps(config_data, indent=4))
session['config'] = config_data session['config'] = config_data
return redirect(config_data['url']) return redirect(config_data['url'])
@ -274,7 +314,8 @@ def element():
except exceptions.RequestException: except exceptions.RequestException:
pass pass
empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==') empty_gif = base64.b64decode(
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
return send_file(io.BytesIO(empty_gif), mimetype='image/gif') return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
@ -282,38 +323,62 @@ def element():
@auth_required @auth_required
def window(): def window():
get_body = g.user_request.send(base_url=request.args.get('location')).text get_body = g.user_request.send(base_url=request.args.get('location')).text
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') get_body = get_body.replace('src="/',
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"') 'src="' + request.args.get('location') + '"')
get_body = get_body.replace('href="/',
'href="' + request.args.get('location') + '"')
results = BeautifulSoup(get_body, 'html.parser') results = bsoup(get_body, 'html.parser')
try: for script in results('script'):
for script in results('script'): script.decompose()
script.decompose()
except Exception:
pass
return render_template('display.html', response=results) return render_template('display.html', response=results)
def run_app(): def run_app():
parser = argparse.ArgumentParser(description='Whoogle Search console runner') parser = argparse.ArgumentParser(
parser.add_argument('--port', default=5000, metavar='<port number>', description='Whoogle Search console runner')
help='Specifies a port to run on (default 5000)') parser.add_argument(
parser.add_argument('--host', default='127.0.0.1', metavar='<ip address>', '--port',
help='Specifies the host address to use (default 127.0.0.1)') default=5000,
parser.add_argument('--debug', default=False, action='store_true', metavar='<port number>',
help='Activates debug mode for the server (default False)') help='Specifies a port to run on (default 5000)')
parser.add_argument('--https-only', default=False, action='store_true', parser.add_argument(
help='Enforces HTTPS redirects for all requests') '--host',
parser.add_argument('--userpass', default='', metavar='<username:password>', default='127.0.0.1',
help='Sets a username/password basic auth combo (default None)') metavar='<ip address>',
parser.add_argument('--proxyauth', default='', metavar='<username:password>', help='Specifies the host address to use (default 127.0.0.1)')
help='Sets a username/password for a HTTP/SOCKS proxy (default None)') parser.add_argument(
parser.add_argument('--proxytype', default='', metavar='<socks4|socks5|http>', '--debug',
help='Sets a proxy type for all connections (default None)') default=False,
parser.add_argument('--proxyloc', default='', metavar='<location:port>', action='store_true',
help='Sets a proxy location for all connections (default None)') help='Activates debug mode for the server (default False)')
parser.add_argument(
'--https-only',
default=False,
action='store_true',
help='Enforces HTTPS redirects for all requests')
parser.add_argument(
'--userpass',
default='',
metavar='<username:password>',
help='Sets a username/password basic auth combo (default None)')
parser.add_argument(
'--proxyauth',
default='',
metavar='<username:password>',
help='Sets a username/password for a HTTP/SOCKS proxy (default None)')
parser.add_argument(
'--proxytype',
default='',
metavar='<socks4|socks5|http>',
help='Sets a proxy type for all connections (default None)')
parser.add_argument(
'--proxyloc',
default='',
metavar='<location:port>',
help='Sets a proxy location for all connections (default None)')
args = parser.parse_args() args = parser.parse_args()
if args.userpass: if args.userpass:

View File

@ -7,14 +7,16 @@ SKIP_ARGS = ['ref_src', 'utm']
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>' FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
LOGO_URL = GOOG_IMG + '_desk' LOGO_URL = GOOG_IMG + '_desk'
BLANK_B64 = ''' BLANK_B64 = ('data:image/png;base64,'
 'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
''' 'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
# Ad keywords
BLACKLIST = [ BLACKLIST = [
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고', 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', 'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', '広告', 'Augl.',
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio' 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', 'آگهی',
'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
] ]
SITE_ALTS = { SITE_ALTS = {
@ -25,7 +27,8 @@ SITE_ALTS = {
def has_ad_content(element: str): def has_ad_content(element: str):
return element.upper() in (value.upper() for value in BLACKLIST) or '' in element return element.upper() in (value.upper() for value in BLACKLIST) \
or '' in element
def get_first_link(soup): def get_first_link(soup):

View File

@ -1,25 +1,26 @@
from app.filter import Filter, get_first_link from app.filter import Filter, get_first_link
from app.utils.session_utils import generate_user_keys from app.utils.session_utils import generate_user_keys
from app.request import gen_query from app.request import gen_query
from bs4 import BeautifulSoup from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet, InvalidToken from cryptography.fernet import Fernet, InvalidToken
from flask import g from flask import g
from typing import Any, Tuple from typing import Any, Tuple
TOR_BANNER = '<hr><h1 style="text-align: center">You are using Tor</h1><hr>' TOR_BANNER = '<hr><h1 style="text-align: center">You are using Tor</h1><hr>'
class RoutingUtils: class RoutingUtils:
def __init__(self, request, config, session, cookies_disabled=False): def __init__(self, request, config, session, cookies_disabled=False):
self.request_params = request.args if request.method == 'GET' else request.form method = request.method
self.request_params = request.args if method == 'GET' else request.form
self.user_agent = request.headers.get('User-Agent') self.user_agent = request.headers.get('User-Agent')
self.feeling_lucky = False self.feeling_lucky = False
self.config = config self.config = config
self.session = session self.session = session
self.query = '' self.query = ''
self.cookies_disabled = cookies_disabled self.cookies_disabled = cookies_disabled
self.search_type = self.request_params.get('tbm') if 'tbm' in self.request_params else '' self.search_type = self.request_params.get(
'tbm') if 'tbm' in self.request_params else ''
def __getitem__(self, name): def __getitem__(self, name):
return getattr(self, name) return getattr(self, name)
@ -45,7 +46,9 @@ class RoutingUtils:
else: else:
# Attempt to decrypt if this is an internal link # Attempt to decrypt if this is an internal link
try: try:
q = Fernet(self.session['fernet_keys']['text_key']).decrypt(q.encode()).decode() q = Fernet(
self.session['fernet_keys']['text_key']
).decrypt(q.encode()).decode()
except InvalidToken: except InvalidToken:
pass pass
@ -53,29 +56,40 @@ class RoutingUtils:
self.session['fernet_keys']['text_key'] = generate_user_keys( self.session['fernet_keys']['text_key'] = generate_user_keys(
cookies_disabled=self.cookies_disabled)['text_key'] cookies_disabled=self.cookies_disabled)['text_key']
# Format depending on whether or not the query is a "feeling lucky" query # Strip leading '! ' for "feeling lucky" queries
self.feeling_lucky = q.startswith('! ') self.feeling_lucky = q.startswith('! ')
self.query = q[2:] if self.feeling_lucky else q self.query = q[2:] if self.feeling_lucky else q
return self.query return self.query
def bang_operator(self, bangs_dict: dict) -> str: def bang_operator(self, bangs_dict: dict) -> str:
for operator in bangs_dict.keys(): for operator in bangs_dict.keys():
if self.query.split(' ')[0] == operator: if self.query.split(' ')[0] != operator:
return bangs_dict[operator]['url'].format(self.query.replace(operator, '').strip()) continue
return bangs_dict[operator]['url'].format(
self.query.replace(operator, '').strip())
return '' return ''
def generate_response(self) -> Tuple[Any, int]: def generate_response(self) -> Tuple[Any, int]:
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config) content_filter = Filter(
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near) self.session['fernet_keys'],
mobile=mobile,
config=self.config)
full_query = gen_query(
self.query,
self.request_params,
self.config,
content_filter.near)
get_body = g.user_request.send(query=full_query) get_body = g.user_request.send(query=full_query)
# Produce cleanable html soup from response # Produce cleanable html soup from response
html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser') html_soup = bsoup(content_filter.reskin(get_body.text), 'html.parser')
html_soup.insert(0, BeautifulSoup( html_soup.insert(
TOR_BANNER, 0,
features='lxml') if g.user_request.tor_valid else BeautifulSoup("", features="lxml")) bsoup(TOR_BANNER, 'html.parser')
if g.user_request.tor_valid else bsoup('', 'html.parser'))
if self.feeling_lucky: if self.feeling_lucky:
return get_first_link(html_soup), 1 return get_first_link(html_soup), 1
@ -83,11 +97,13 @@ class RoutingUtils:
formatted_results = content_filter.clean(html_soup) formatted_results = content_filter.clean(html_soup)
# Append user config to all search links, if available # Append user config to all search links, if available
param_str = ''.join('&{}={}'.format(k, v) param_str = ''.join('&{}={}'.format(k, v)
for k, v in self.request_params.to_dict(flat=True).items() for k, v in
if self.config.is_safe_key(k)) self.request_params.to_dict(flat=True).items()
if self.config.is_safe_key(k))
for link in formatted_results.find_all('a', href=True): for link in formatted_results.find_all('a', href=True):
if 'search?' not in link['href'] or link['href'].index('search?') > 1: if 'search?' not in link['href'] or link['href'].index(
'search?') > 1:
continue continue
link['href'] += param_str link['href'] += param_str

View File

@ -18,6 +18,7 @@ more-itertools==8.3.0
packaging==20.4 packaging==20.4
pluggy==0.13.1 pluggy==0.13.1
py==1.8.1 py==1.8.1
pycodestyle==2.6.0
pycparser==2.19 pycparser==2.19
pyOpenSSL==19.1.0 pyOpenSSL==19.1.0
pyparsing==2.4.7 pyparsing==2.4.7

View File

@ -3,13 +3,12 @@ from app.filter import Filter
from app.utils.session_utils import generate_user_keys from app.utils.session_utils import generate_user_keys
from datetime import datetime from datetime import datetime
from dateutil.parser import * from dateutil.parser import *
import json
import os
def get_search_results(data): def get_search_results(data):
secret_key = generate_user_keys() secret_key = generate_user_keys()
soup = Filter(user_keys=secret_key).clean(BeautifulSoup(data, 'html.parser')) soup = Filter(user_keys=secret_key).clean(
BeautifulSoup(data, 'html.parser'))
main_divs = soup.find('div', {'id': 'main'}) main_divs = soup.find('div', {'id': 'main'})
assert len(main_divs) > 1 assert len(main_divs) > 1
@ -17,7 +16,9 @@ def get_search_results(data):
result_divs = [] result_divs = []
for div in main_divs: for div in main_divs:
# Result divs should only have 1 inner div # Result divs should only have 1 inner div
if len(list(div.children)) != 1 or not div.findChild() or 'div' not in div.findChild().name: if (len(list(div.children)) != 1
or not div.findChild()
or 'div' not in div.findChild().name):
continue continue
result_divs.append(div) result_divs.append(div)
@ -78,6 +79,7 @@ def test_recent_results(client):
try: try:
date = parse(date_span) date = parse(date_span)
assert (current_date - date).days <= (num_days + 5) # Date can have a little bit of wiggle room # Date can have a little bit of wiggle room
assert (current_date - date).days <= (num_days + 5)
except ParserError: except ParserError:
pass pass