PEP-8: Fix formatting issues, add CI workflow (#161)
Enforces PEP-8 formatting for all python code Adds a github action build for checking pep8 formatting using pycodestylemain
parent
b55aad3fdf
commit
375f4ee9fd
|
@ -0,0 +1,22 @@
|
||||||
|
name: pep8
|
||||||
|
|
||||||
|
on:
|
||||||
|
push
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install pycodestyle
|
||||||
|
- name: Run pycodestyle
|
||||||
|
run: |
|
||||||
|
pycodestyle --show-source --show-pep8 app/*
|
||||||
|
pycodestyle --show-source --show-pep8 test/*
|
|
@ -6,20 +6,35 @@ from flask_session import Session
|
||||||
import os
|
import os
|
||||||
from stem import Signal
|
from stem import Signal
|
||||||
|
|
||||||
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
app = Flask(__name__, static_folder=os.path.dirname(
|
||||||
|
os.path.abspath(__file__)) + '/static')
|
||||||
app.user_elements = {}
|
app.user_elements = {}
|
||||||
app.default_key_set = generate_user_keys()
|
app.default_key_set = generate_user_keys()
|
||||||
app.no_cookie_ips = []
|
app.no_cookie_ips = []
|
||||||
app.config['SECRET_KEY'] = os.urandom(32)
|
app.config['SECRET_KEY'] = os.urandom(32)
|
||||||
app.config['SESSION_TYPE'] = 'filesystem'
|
app.config['SESSION_TYPE'] = 'filesystem'
|
||||||
app.config['VERSION_NUMBER'] = '0.2.1'
|
app.config['VERSION_NUMBER'] = '0.2.1'
|
||||||
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
|
app.config['APP_ROOT'] = os.getenv(
|
||||||
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
|
'APP_ROOT',
|
||||||
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config'))
|
os.path.dirname(os.path.abspath(__file__)))
|
||||||
app.config['DEFAULT_CONFIG'] = os.path.join(app.config['CONFIG_PATH'], 'config.json')
|
app.config['STATIC_FOLDER'] = os.getenv(
|
||||||
app.config['SESSION_FILE_DIR'] = os.path.join(app.config['CONFIG_PATH'], 'session')
|
'STATIC_FOLDER',
|
||||||
app.config['BANG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'bangs'))
|
os.path.join(app.config['APP_ROOT'], 'static'))
|
||||||
app.config['BANG_FILE'] = os.path.join(app.config['BANG_PATH'], 'bangs.json')
|
app.config['CONFIG_PATH'] = os.getenv(
|
||||||
|
'CONFIG_VOLUME',
|
||||||
|
os.path.join(app.config['STATIC_FOLDER'], 'config'))
|
||||||
|
app.config['DEFAULT_CONFIG'] = os.path.join(
|
||||||
|
app.config['CONFIG_PATH'],
|
||||||
|
'config.json')
|
||||||
|
app.config['SESSION_FILE_DIR'] = os.path.join(
|
||||||
|
app.config['CONFIG_PATH'],
|
||||||
|
'session')
|
||||||
|
app.config['BANG_PATH'] = os.getenv(
|
||||||
|
'CONFIG_VOLUME',
|
||||||
|
os.path.join(app.config['STATIC_FOLDER'], 'bangs'))
|
||||||
|
app.config['BANG_FILE'] = os.path.join(
|
||||||
|
app.config['BANG_PATH'],
|
||||||
|
'bangs.json')
|
||||||
|
|
||||||
if not os.path.exists(app.config['CONFIG_PATH']):
|
if not os.path.exists(app.config['CONFIG_PATH']):
|
||||||
os.makedirs(app.config['CONFIG_PATH'])
|
os.makedirs(app.config['CONFIG_PATH'])
|
||||||
|
@ -38,4 +53,4 @@ Session(app)
|
||||||
# Attempt to acquire tor identity, to determine if Tor config is available
|
# Attempt to acquire tor identity, to determine if Tor config is available
|
||||||
send_tor_signal(Signal.HEARTBEAT)
|
send_tor_signal(Signal.HEARTBEAT)
|
||||||
|
|
||||||
from app import routes
|
from app import routes # noqa
|
||||||
|
|
|
@ -32,20 +32,27 @@ class Filter:
|
||||||
def reskin(self, page):
|
def reskin(self, page):
|
||||||
# Aesthetic only re-skinning
|
# Aesthetic only re-skinning
|
||||||
if self.dark:
|
if self.dark:
|
||||||
page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea')
|
page = page.replace(
|
||||||
|
'fff', '000').replace(
|
||||||
|
'202124', 'ddd').replace(
|
||||||
|
'1967D2', '3b85ea')
|
||||||
|
|
||||||
return page
|
return page
|
||||||
|
|
||||||
def encrypt_path(self, msg, is_element=False):
|
def encrypt_path(self, msg, is_element=False):
|
||||||
# Encrypts path to avoid plaintext results in logs
|
# Encrypts path to avoid plaintext results in logs
|
||||||
if is_element:
|
if is_element:
|
||||||
# Element paths are tracked differently in order for the element key to be regenerated
|
# Element paths are encrypted separately from text, to allow key
|
||||||
# once all elements have been loaded
|
# regeneration once all items have been served to the user
|
||||||
enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode()
|
enc_path = Fernet(
|
||||||
|
self.user_keys['element_key']
|
||||||
|
).encrypt(msg.encode()).decode()
|
||||||
self._elements += 1
|
self._elements += 1
|
||||||
return enc_path
|
return enc_path
|
||||||
|
|
||||||
return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode()
|
return Fernet(
|
||||||
|
self.user_keys['text_key']
|
||||||
|
).encrypt(msg.encode()).decode()
|
||||||
|
|
||||||
def clean(self, soup):
|
def clean(self, soup):
|
||||||
self.main_divs = soup.find('div', {'id': 'main'})
|
self.main_divs = soup.find('div', {'id': 'main'})
|
||||||
|
@ -74,8 +81,8 @@ class Filter:
|
||||||
footer = soup.find('footer')
|
footer = soup.find('footer')
|
||||||
if footer:
|
if footer:
|
||||||
# Remove divs that have multiple links beyond just page navigation
|
# Remove divs that have multiple links beyond just page navigation
|
||||||
[_.decompose() for _ in footer.find_all('div', recursive=False)
|
[_.decompose() for _ in footer.find_all('div', recursive=False)
|
||||||
if len(_.find_all('a', href=True)) > 3]
|
if len(_.find_all('a', href=True)) > 3]
|
||||||
|
|
||||||
header = soup.find('header')
|
header = soup.find('header')
|
||||||
if header:
|
if header:
|
||||||
|
@ -88,8 +95,9 @@ class Filter:
|
||||||
return
|
return
|
||||||
|
|
||||||
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
||||||
has_ad = len([_ for _ in div.find_all('span', recursive=True) if has_ad_content(_.text)])
|
div_ads = [_ for _ in div.find_all('span', recursive=True)
|
||||||
_ = div.decompose() if has_ad else None
|
if has_ad_content(_.text)]
|
||||||
|
_ = div.decompose() if len(div_ads) else None
|
||||||
|
|
||||||
def fix_question_section(self):
|
def fix_question_section(self):
|
||||||
if not self.main_divs:
|
if not self.main_divs:
|
||||||
|
@ -97,14 +105,14 @@ class Filter:
|
||||||
|
|
||||||
question_divs = [_ for _ in self.main_divs.find_all(
|
question_divs = [_ for _ in self.main_divs.find_all(
|
||||||
'div', recursive=False
|
'div', recursive=False
|
||||||
) if len(_.find_all('h2')) > 0]
|
) if len(_.find_all('h2')) > 0]
|
||||||
|
|
||||||
if len(question_divs) == 0:
|
if len(question_divs) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Wrap section in details element to allow collapse/expand
|
# Wrap section in details element to allow collapse/expand
|
||||||
details = BeautifulSoup(features='lxml').new_tag('details')
|
details = BeautifulSoup('html.parser').new_tag('details')
|
||||||
summary = BeautifulSoup(features='lxml').new_tag('summary')
|
summary = BeautifulSoup('html.parser').new_tag('summary')
|
||||||
summary.string = question_divs[0].find('h2').text
|
summary.string = question_divs[0].find('h2').text
|
||||||
question_divs[0].find('h2').decompose()
|
question_divs[0].find('h2').decompose()
|
||||||
details.append(summary)
|
details.append(summary)
|
||||||
|
@ -113,7 +121,7 @@ class Filter:
|
||||||
for question_div in question_divs:
|
for question_div in question_divs:
|
||||||
questions = [_ for _ in question_div.find_all(
|
questions = [_ for _ in question_div.find_all(
|
||||||
'div', recursive=True
|
'div', recursive=True
|
||||||
) if _.text.endswith('?')]
|
) if _.text.endswith('?')]
|
||||||
|
|
||||||
for question in questions:
|
for question in questions:
|
||||||
question['style'] = 'padding: 10px; font-style: italic;'
|
question['style'] = 'padding: 10px; font-style: italic;'
|
||||||
|
@ -131,11 +139,15 @@ class Filter:
|
||||||
element['src'] = BLANK_B64
|
element['src'] = BLANK_B64
|
||||||
return
|
return
|
||||||
|
|
||||||
element['src'] = 'element?url=' + self.encrypt_path(element_src, is_element=True) + \
|
element['src'] = 'element?url=' + self.encrypt_path(
|
||||||
'&type=' + urlparse.quote(mime)
|
element_src,
|
||||||
# TODO: Non-mobile image results link to website instead of image
|
is_element=True) + '&type=' + urlparse.quote(mime)
|
||||||
|
|
||||||
|
# FIXME: Non-mobile image results link to website instead of image
|
||||||
# if not self.mobile:
|
# if not self.mobile:
|
||||||
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))
|
# img.append(
|
||||||
|
# BeautifulSoup(FULL_RES_IMG.format(element_src),
|
||||||
|
# 'html.parser'))
|
||||||
|
|
||||||
def update_styling(self, soup):
|
def update_styling(self, soup):
|
||||||
# Remove unnecessary button(s)
|
# Remove unnecessary button(s)
|
||||||
|
@ -149,8 +161,9 @@ class Filter:
|
||||||
# Update logo
|
# Update logo
|
||||||
logo = soup.find('a', {'class': 'l'})
|
logo = soup.find('a', {'class': 'l'})
|
||||||
if logo and self.mobile:
|
if logo and self.mobile:
|
||||||
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \
|
logo['style'] = ('display:flex; justify-content:center; '
|
||||||
'font-size:18px; '
|
'align-items:center; color:#685e79; '
|
||||||
|
'font-size:18px; ')
|
||||||
|
|
||||||
# Fix search bar length on mobile
|
# Fix search bar length on mobile
|
||||||
try:
|
try:
|
||||||
|
@ -163,7 +176,7 @@ class Filter:
|
||||||
# Replace href with only the intended destination (no "utm" type tags)
|
# Replace href with only the intended destination (no "utm" type tags)
|
||||||
href = link['href'].replace('https://www.google.com', '')
|
href = link['href'].replace('https://www.google.com', '')
|
||||||
if 'advanced_search' in href or 'tbm=shop' in href:
|
if 'advanced_search' in href or 'tbm=shop' in href:
|
||||||
# TODO: The "Shopping" tab requires further filtering (see #136)
|
# FIXME: The "Shopping" tab requires further filtering (see #136)
|
||||||
# Temporarily removing all links to that tab for now.
|
# Temporarily removing all links to that tab for now.
|
||||||
link.decompose()
|
link.decompose()
|
||||||
return
|
return
|
||||||
|
@ -171,20 +184,26 @@ class Filter:
|
||||||
link['target'] = '_blank'
|
link['target'] = '_blank'
|
||||||
|
|
||||||
result_link = urlparse.urlparse(href)
|
result_link = urlparse.urlparse(href)
|
||||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
query_link = parse_qs(
|
||||||
|
result_link.query
|
||||||
|
)['q'][0] if '?q=' in href else ''
|
||||||
|
|
||||||
if query_link.startswith('/'):
|
if query_link.startswith('/'):
|
||||||
# Internal google links (i.e. mail, maps, etc) should still be forwarded to Google
|
# Internal google links (i.e. mail, maps, etc) should still
|
||||||
|
# be forwarded to Google
|
||||||
link['href'] = 'https://google.com' + query_link
|
link['href'] = 'https://google.com' + query_link
|
||||||
elif '/search?q=' in href:
|
elif '/search?q=' in href:
|
||||||
# "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes
|
# "li:1" implies the query should be interpreted verbatim,
|
||||||
|
# which is accomplished by wrapping the query in double quotes
|
||||||
if 'li:1' in href:
|
if 'li:1' in href:
|
||||||
query_link = '"' + query_link + '"'
|
query_link = '"' + query_link + '"'
|
||||||
new_search = 'search?q=' + self.encrypt_path(query_link)
|
new_search = 'search?q=' + self.encrypt_path(query_link)
|
||||||
|
|
||||||
query_params = parse_qs(urlparse.urlparse(href).query)
|
query_params = parse_qs(urlparse.urlparse(href).query)
|
||||||
for param in VALID_PARAMS:
|
for param in VALID_PARAMS:
|
||||||
param_val = query_params[param][0] if param in query_params else ''
|
if param not in query_params:
|
||||||
|
continue
|
||||||
|
param_val = query_params[param][0]
|
||||||
new_search += '&' + param + '=' + param_val
|
new_search += '&' + param + '=' + param_val
|
||||||
link['href'] = new_search
|
link['href'] = new_search
|
||||||
elif 'url?q=' in href:
|
elif 'url?q=' in href:
|
||||||
|
@ -199,9 +218,11 @@ class Filter:
|
||||||
|
|
||||||
# Replace link location if "alts" config is enabled
|
# Replace link location if "alts" config is enabled
|
||||||
if self.alt_redirect:
|
if self.alt_redirect:
|
||||||
# Search and replace all link descriptions with alternative location
|
# Search and replace all link descriptions
|
||||||
|
# with alternative location
|
||||||
link['href'] = get_site_alt(link['href'])
|
link['href'] = get_site_alt(link['href'])
|
||||||
link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys())))
|
link_desc = link.find_all(
|
||||||
|
text=re.compile('|'.join(SITE_ALTS.keys())))
|
||||||
if len(link_desc) == 0:
|
if len(link_desc) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -128,7 +128,7 @@ class Config:
|
||||||
{'name': 'Fiji', 'value': 'countryFJ'},
|
{'name': 'Fiji', 'value': 'countryFJ'},
|
||||||
{'name': 'Finland', 'value': 'countryFI'},
|
{'name': 'Finland', 'value': 'countryFI'},
|
||||||
{'name': 'France', 'value': 'countryFR'},
|
{'name': 'France', 'value': 'countryFR'},
|
||||||
{'name': 'France\, Metropolitan', 'value': 'countryFX'},
|
{'name': r'France\, Metropolitan', 'value': 'countryFX'},
|
||||||
{'name': 'French Guiana', 'value': 'countryGF'},
|
{'name': 'French Guiana', 'value': 'countryGF'},
|
||||||
{'name': 'French Polynesia', 'value': 'countryPF'},
|
{'name': 'French Polynesia', 'value': 'countryPF'},
|
||||||
{'name': 'French Southern Territories', 'value': 'countryTF'},
|
{'name': 'French Southern Territories', 'value': 'countryTF'},
|
||||||
|
@ -167,7 +167,8 @@ class Config:
|
||||||
{'name': 'Kazakhstan', 'value': 'countryKZ'},
|
{'name': 'Kazakhstan', 'value': 'countryKZ'},
|
||||||
{'name': 'Kenya', 'value': 'countryKE'},
|
{'name': 'Kenya', 'value': 'countryKE'},
|
||||||
{'name': 'Kiribati', 'value': 'countryKI'},
|
{'name': 'Kiribati', 'value': 'countryKI'},
|
||||||
{'name': 'Korea, Democratic People\'s Republic of', 'value': 'countryKP'},
|
{'name': 'Korea, Democratic People\'s Republic of',
|
||||||
|
'value': 'countryKP'},
|
||||||
{'name': 'Korea, Republic of', 'value': 'countryKR'},
|
{'name': 'Korea, Republic of', 'value': 'countryKR'},
|
||||||
{'name': 'Kuwait', 'value': 'countryKW'},
|
{'name': 'Kuwait', 'value': 'countryKW'},
|
||||||
{'name': 'Kyrgyzstan', 'value': 'countryKG'},
|
{'name': 'Kyrgyzstan', 'value': 'countryKG'},
|
||||||
|
@ -181,7 +182,8 @@ class Config:
|
||||||
{'name': 'Lithuania', 'value': 'countryLT'},
|
{'name': 'Lithuania', 'value': 'countryLT'},
|
||||||
{'name': 'Luxembourg', 'value': 'countryLU'},
|
{'name': 'Luxembourg', 'value': 'countryLU'},
|
||||||
{'name': 'Macao', 'value': 'countryMO'},
|
{'name': 'Macao', 'value': 'countryMO'},
|
||||||
{'name': 'Macedonia, the Former Yugosalv Republic of', 'value': 'countryMK'},
|
{'name': 'Macedonia, the Former Yugosalv Republic of',
|
||||||
|
'value': 'countryMK'},
|
||||||
{'name': 'Madagascar', 'value': 'countryMG'},
|
{'name': 'Madagascar', 'value': 'countryMG'},
|
||||||
{'name': 'Malawi', 'value': 'countryMW'},
|
{'name': 'Malawi', 'value': 'countryMW'},
|
||||||
{'name': 'Malaysia', 'value': 'countryMY'},
|
{'name': 'Malaysia', 'value': 'countryMY'},
|
||||||
|
@ -253,7 +255,8 @@ class Config:
|
||||||
{'name': 'Solomon Islands', 'value': 'countrySB'},
|
{'name': 'Solomon Islands', 'value': 'countrySB'},
|
||||||
{'name': 'Somalia', 'value': 'countrySO'},
|
{'name': 'Somalia', 'value': 'countrySO'},
|
||||||
{'name': 'South Africa', 'value': 'countryZA'},
|
{'name': 'South Africa', 'value': 'countryZA'},
|
||||||
{'name': 'South Georgia and the South Sandwich Islands', 'value': 'countryGS'},
|
{'name': 'South Georgia and the South Sandwich Islands',
|
||||||
|
'value': 'countryGS'},
|
||||||
{'name': 'Spain', 'value': 'countryES'},
|
{'name': 'Spain', 'value': 'countryES'},
|
||||||
{'name': 'Sri Lanka', 'value': 'countryLK'},
|
{'name': 'Sri Lanka', 'value': 'countryLK'},
|
||||||
{'name': 'Sudan', 'value': 'countrySD'},
|
{'name': 'Sudan', 'value': 'countrySD'},
|
||||||
|
@ -310,6 +313,12 @@ class Config:
|
||||||
self.alts = False
|
self.alts = False
|
||||||
self.new_tab = False
|
self.new_tab = False
|
||||||
self.get_only = False
|
self.get_only = False
|
||||||
|
self.safe_keys = [
|
||||||
|
'lang_search',
|
||||||
|
'lang_interface',
|
||||||
|
'ctry',
|
||||||
|
'dark'
|
||||||
|
]
|
||||||
|
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
setattr(self, key, value)
|
setattr(self, key, value)
|
||||||
|
@ -338,12 +347,7 @@ class Config:
|
||||||
array
|
array
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return key in [
|
return key in self.safe_keys
|
||||||
'lang_search',
|
|
||||||
'lang_interface',
|
|
||||||
'ctry',
|
|
||||||
'dark'
|
|
||||||
]
|
|
||||||
|
|
||||||
def from_params(self, params) -> 'Config':
|
def from_params(self, params) -> 'Config':
|
||||||
"""Modify user config with search parameters. This is primarily
|
"""Modify user config with search parameters. This is primarily
|
||||||
|
|
|
@ -8,9 +8,9 @@ import os
|
||||||
from stem import Signal, SocketError
|
from stem import Signal, SocketError
|
||||||
from stem.control import Controller
|
from stem.control import Controller
|
||||||
|
|
||||||
# Core Google search URLs
|
|
||||||
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
||||||
AUTOCOMPLETE_URL = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
|
AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/'
|
||||||
|
'complete/search?client=toolbar&')
|
||||||
|
|
||||||
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
||||||
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
||||||
|
@ -72,11 +72,16 @@ def gen_query(query, args, config, near_city=None) -> str:
|
||||||
result_tbs = args.get('tbs')
|
result_tbs = args.get('tbs')
|
||||||
param_dict['tbs'] = '&tbs=' + result_tbs
|
param_dict['tbs'] = '&tbs=' + result_tbs
|
||||||
|
|
||||||
# Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted
|
# Occasionally the 'tbs' param provided by google also contains a
|
||||||
# strangely. This is a (admittedly not very elegant) solution for this.
|
# field for 'lr', but formatted strangely. This is a rough solution
|
||||||
# Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case
|
# for this.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# &tbs=qdr:h,lr:lang_1pl
|
||||||
|
# -- the lr param needs to be extracted and remove the leading '1'
|
||||||
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
|
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
|
||||||
sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
|
sub_lang = sub_lang[0][sub_lang[0].find('lr:') +
|
||||||
|
3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
|
||||||
|
|
||||||
# Ensure search query is parsable
|
# Ensure search query is parsable
|
||||||
query = urlparse.quote(query)
|
query = urlparse.quote(query)
|
||||||
|
@ -93,20 +98,26 @@ def gen_query(query, args, config, near_city=None) -> str:
|
||||||
if near_city:
|
if near_city:
|
||||||
param_dict['near'] = '&near=' + urlparse.quote(near_city)
|
param_dict['near'] = '&near=' + urlparse.quote(near_city)
|
||||||
|
|
||||||
# Set language for results (lr) if source isn't set, otherwise use the result
|
# Set language for results (lr) if source isn't set, otherwise use the
|
||||||
# language param provided by google (but with the strange digit(s) removed)
|
# result language param provided in the results
|
||||||
if 'source' in args:
|
if 'source' in args:
|
||||||
param_dict['source'] = '&source=' + args.get('source')
|
param_dict['source'] = '&source=' + args.get('source')
|
||||||
param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else ''
|
param_dict['lr'] = ('&lr=' + ''.join(
|
||||||
|
[_ for _ in sub_lang if not _.isdigit()]
|
||||||
|
)) if sub_lang else ''
|
||||||
else:
|
else:
|
||||||
param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else ''
|
param_dict['lr'] = (
|
||||||
|
'&lr=' + config.lang_search
|
||||||
|
) if config.lang_search else ''
|
||||||
|
|
||||||
# Set autocorrected search ignore
|
# 'nfpr' defines the exclusion of results from an auto-corrected query
|
||||||
if 'nfpr' in args:
|
if 'nfpr' in args:
|
||||||
param_dict['nfpr'] = '&nfpr=' + args.get('nfpr')
|
param_dict['nfpr'] = '&nfpr=' + args.get('nfpr')
|
||||||
|
|
||||||
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
|
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
|
||||||
param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else ''
|
param_dict['hl'] = (
|
||||||
|
'&hl=' + config.lang_interface.replace('lang_', '')
|
||||||
|
) if config.lang_interface else ''
|
||||||
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
|
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
|
||||||
|
|
||||||
for val in param_dict.values():
|
for val in param_dict.values():
|
||||||
|
@ -126,6 +137,7 @@ class Request:
|
||||||
root_path -- the root path of the whoogle instance
|
root_path -- the root path of the whoogle instance
|
||||||
config -- the user's current whoogle configuration
|
config -- the user's current whoogle configuration
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, normal_ua, root_path, config: Config):
|
def __init__(self, normal_ua, root_path, config: Config):
|
||||||
# Send heartbeat to Tor, used in determining if the user can or cannot
|
# Send heartbeat to Tor, used in determining if the user can or cannot
|
||||||
# enable Tor for future requests
|
# enable Tor for future requests
|
||||||
|
@ -143,9 +155,10 @@ class Request:
|
||||||
':' + os.environ.get('WHOOGLE_PROXY_PASS')
|
':' + os.environ.get('WHOOGLE_PROXY_PASS')
|
||||||
self.proxies = {
|
self.proxies = {
|
||||||
'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' +
|
'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' +
|
||||||
auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'),
|
auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'),
|
||||||
}
|
}
|
||||||
self.proxies['https'] = self.proxies['http'].replace('http', 'https')
|
self.proxies['https'] = self.proxies['http'].replace('http',
|
||||||
|
'https')
|
||||||
else:
|
else:
|
||||||
self.proxies = {
|
self.proxies = {
|
||||||
'http': 'socks5://127.0.0.1:9050',
|
'http': 'socks5://127.0.0.1:9050',
|
||||||
|
@ -169,7 +182,8 @@ class Request:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ac_query = dict(hl=self.language, q=query)
|
ac_query = dict(hl=self.language, q=query)
|
||||||
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text
|
response = self.send(base_url=AUTOCOMPLETE_URL,
|
||||||
|
query=urlparse.urlencode(ac_query)).text
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
dom = etree.fromstring(response)
|
dom = etree.fromstring(response)
|
||||||
|
@ -178,14 +192,14 @@ class Request:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response:
|
def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response:
|
||||||
"""Sends an outbound request to a URL. Optionally sends the request using Tor, if
|
"""Sends an outbound request to a URL. Optionally sends the request
|
||||||
enabled by the user.
|
using Tor, if enabled by the user.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
base_url: The URL to use in the request
|
base_url: The URL to use in the request
|
||||||
query: The optional query string for the request
|
query: The optional query string for the request
|
||||||
attempt: The number of attempts made for the request (used for cycling
|
attempt: The number of attempts made for the request
|
||||||
through Tor identities, if enabled)
|
(used for cycling through Tor identities, if enabled)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Response: The Response object returned by the requests call
|
Response: The Response object returned by the requests call
|
||||||
|
@ -195,21 +209,30 @@ class Request:
|
||||||
'User-Agent': self.modified_user_agent
|
'User-Agent': self.modified_user_agent
|
||||||
}
|
}
|
||||||
|
|
||||||
# Validate Tor connection and request new identity if the last one failed
|
# Validate Tor conn and request new identity if the last one failed
|
||||||
if self.tor and not send_tor_signal(Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT):
|
if self.tor and not send_tor_signal(
|
||||||
raise TorError("Tor was previously enabled, but the connection has been dropped. Please check your " +
|
Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT):
|
||||||
"Tor configuration and try again.", disable=True)
|
raise TorError(
|
||||||
|
"Tor was previously enabled, but the connection has been "
|
||||||
|
"dropped. Please check your Tor configuration and try again.",
|
||||||
|
disable=True)
|
||||||
|
|
||||||
# Make sure that the tor connection is valid, if enabled
|
# Make sure that the tor connection is valid, if enabled
|
||||||
if self.tor:
|
if self.tor:
|
||||||
tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers)
|
tor_check = requests.get('https://check.torproject.org/',
|
||||||
|
proxies=self.proxies, headers=headers)
|
||||||
self.tor_valid = 'Congratulations' in tor_check.text
|
self.tor_valid = 'Congratulations' in tor_check.text
|
||||||
|
|
||||||
if not self.tor_valid:
|
if not self.tor_valid:
|
||||||
raise TorError("Tor connection succeeded, but the connection could not be validated by torproject.org",
|
raise TorError(
|
||||||
disable=True)
|
"Tor connection succeeded, but the connection could not "
|
||||||
|
"be validated by torproject.org",
|
||||||
|
disable=True)
|
||||||
|
|
||||||
response = requests.get(base_url + query, proxies=self.proxies, headers=headers)
|
response = requests.get(
|
||||||
|
base_url + query,
|
||||||
|
proxies=self.proxies,
|
||||||
|
headers=headers)
|
||||||
|
|
||||||
# Retry query with new identity if using Tor (max 10 attempts)
|
# Retry query with new identity if using Tor (max 10 attempts)
|
||||||
if 'form id="captcha-form"' in response.text and self.tor:
|
if 'form id="captcha-form"' in response.text and self.tor:
|
||||||
|
|
175
app/routes.py
175
app/routes.py
|
@ -9,7 +9,8 @@ import uuid
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
import waitress
|
import waitress
|
||||||
from flask import jsonify, make_response, request, redirect, render_template, send_file, session, url_for
|
from flask import jsonify, make_response, request, redirect, render_template, \
|
||||||
|
send_file, session, url_for
|
||||||
from requests import exceptions
|
from requests import exceptions
|
||||||
|
|
||||||
from app import app
|
from app import app
|
||||||
|
@ -30,23 +31,30 @@ def auth_required(f):
|
||||||
# Skip if username/password not set
|
# Skip if username/password not set
|
||||||
whoogle_user = os.getenv('WHOOGLE_USER', '')
|
whoogle_user = os.getenv('WHOOGLE_USER', '')
|
||||||
whoogle_pass = os.getenv('WHOOGLE_PASS', '')
|
whoogle_pass = os.getenv('WHOOGLE_PASS', '')
|
||||||
if (not whoogle_user or not whoogle_pass) or \
|
if (not whoogle_user or not whoogle_pass) or (
|
||||||
(auth and whoogle_user == auth.username and whoogle_pass == auth.password):
|
auth
|
||||||
|
and whoogle_user == auth.username
|
||||||
|
and whoogle_pass == auth.password):
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
else:
|
else:
|
||||||
return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
|
return make_response('Not logged in', 401, {
|
||||||
|
'WWW-Authenticate': 'Basic realm="Login Required"'})
|
||||||
|
|
||||||
return decorated
|
return decorated
|
||||||
|
|
||||||
|
|
||||||
@app.before_request
|
@app.before_request
|
||||||
def before_request_func():
|
def before_request_func():
|
||||||
g.request_params = request.args if request.method == 'GET' else request.form
|
g.request_params = (
|
||||||
|
request.args if request.method == 'GET' else request.form
|
||||||
|
)
|
||||||
g.cookies_disabled = False
|
g.cookies_disabled = False
|
||||||
|
|
||||||
# Generate session values for user if unavailable
|
# Generate session values for user if unavailable
|
||||||
if not valid_user_session(session):
|
if not valid_user_session(session):
|
||||||
session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \
|
session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \
|
||||||
if os.path.exists(app.config['DEFAULT_CONFIG']) else {'url': request.url_root}
|
if os.path.exists(app.config['DEFAULT_CONFIG']) else {
|
||||||
|
'url': request.url_root}
|
||||||
session['uuid'] = str(uuid.uuid4())
|
session['uuid'] = str(uuid.uuid4())
|
||||||
session['fernet_keys'] = generate_user_keys(True)
|
session['fernet_keys'] = generate_user_keys(True)
|
||||||
|
|
||||||
|
@ -63,12 +71,16 @@ def before_request_func():
|
||||||
is_http = request.url.startswith('http://')
|
is_http = request.url.startswith('http://')
|
||||||
|
|
||||||
if (is_heroku and is_http) or (https_only and is_http):
|
if (is_heroku and is_http) or (https_only and is_http):
|
||||||
return redirect(request.url.replace('http://', 'https://', 1), code=308)
|
return redirect(
|
||||||
|
request.url.replace('http://', 'https://', 1),
|
||||||
|
code=308)
|
||||||
|
|
||||||
g.user_config = Config(**session['config'])
|
g.user_config = Config(**session['config'])
|
||||||
|
|
||||||
if not g.user_config.url:
|
if not g.user_config.url:
|
||||||
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root
|
g.user_config.url = request.url_root.replace(
|
||||||
|
'http://',
|
||||||
|
'https://') if https_only else request.url_root
|
||||||
|
|
||||||
g.user_request = Request(
|
g.user_request = Request(
|
||||||
request.headers.get('User-Agent'),
|
request.headers.get('User-Agent'),
|
||||||
|
@ -82,13 +94,17 @@ def before_request_func():
|
||||||
def after_request_func(response):
|
def after_request_func(response):
|
||||||
if app.user_elements[session['uuid']] <= 0 and '/element' in request.url:
|
if app.user_elements[session['uuid']] <= 0 and '/element' in request.url:
|
||||||
# Regenerate element key if all elements have been served to user
|
# Regenerate element key if all elements have been served to user
|
||||||
session['fernet_keys']['element_key'] = '' if not g.cookies_disabled else app.default_key_set['element_key']
|
session['fernet_keys'][
|
||||||
|
'element_key'] = '' if not g.cookies_disabled else \
|
||||||
|
app.default_key_set['element_key']
|
||||||
app.user_elements[session['uuid']] = 0
|
app.user_elements[session['uuid']] = 0
|
||||||
|
|
||||||
# Check if address consistently has cookies blocked, in which case start removing session
|
# Check if address consistently has cookies blocked,
|
||||||
# files after creation.
|
# in which case start removing session files after creation.
|
||||||
# Note: This is primarily done to prevent overpopulation of session directories, since browsers that
|
#
|
||||||
# block cookies will still trigger Flask's session creation routine with every request.
|
# Note: This is primarily done to prevent overpopulation of session
|
||||||
|
# directories, since browsers that block cookies will still trigger
|
||||||
|
# Flask's session creation routine with every request.
|
||||||
if g.cookies_disabled and request.remote_addr not in app.no_cookie_ips:
|
if g.cookies_disabled and request.remote_addr not in app.no_cookie_ips:
|
||||||
app.no_cookie_ips.append(request.remote_addr)
|
app.no_cookie_ips.append(request.remote_addr)
|
||||||
elif g.cookies_disabled and request.remote_addr in app.no_cookie_ips:
|
elif g.cookies_disabled and request.remote_addr in app.no_cookie_ips:
|
||||||
|
@ -101,6 +117,7 @@ def after_request_func(response):
|
||||||
|
|
||||||
@app.errorhandler(404)
|
@app.errorhandler(404)
|
||||||
def unknown_page(e):
|
def unknown_page(e):
|
||||||
|
app.logger.warn(e)
|
||||||
return redirect(g.app_location)
|
return redirect(g.app_location)
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,7 +126,8 @@ def unknown_page(e):
|
||||||
def index():
|
def index():
|
||||||
# Reset keys
|
# Reset keys
|
||||||
session['fernet_keys'] = generate_user_keys(g.cookies_disabled)
|
session['fernet_keys'] = generate_user_keys(g.cookies_disabled)
|
||||||
error_message = session['error_message'] if 'error_message' in session else ''
|
error_message = session[
|
||||||
|
'error_message'] if 'error_message' in session else ''
|
||||||
session['error_message'] = ''
|
session['error_message'] = ''
|
||||||
|
|
||||||
return render_template('index.html',
|
return render_template('index.html',
|
||||||
|
@ -128,7 +146,8 @@ def opensearch():
|
||||||
if opensearch_url.endswith('/'):
|
if opensearch_url.endswith('/'):
|
||||||
opensearch_url = opensearch_url[:-1]
|
opensearch_url = opensearch_url[:-1]
|
||||||
|
|
||||||
get_only = g.user_config.get_only or 'Chrome' in request.headers.get('User-Agent')
|
get_only = g.user_config.get_only or 'Chrome' in request.headers.get(
|
||||||
|
'User-Agent')
|
||||||
|
|
||||||
return render_template(
|
return render_template(
|
||||||
'opensearch.xml',
|
'opensearch.xml',
|
||||||
|
@ -147,16 +166,23 @@ def autocomplete():
|
||||||
|
|
||||||
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
|
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
|
||||||
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
|
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
|
||||||
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if _.startswith(q)]])
|
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if
|
||||||
|
_.startswith(q)]])
|
||||||
|
|
||||||
if not q and not request.data:
|
if not q and not request.data:
|
||||||
return jsonify({'?': []})
|
return jsonify({'?': []})
|
||||||
elif request.data:
|
elif request.data:
|
||||||
q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', ''))
|
q = urlparse.unquote_plus(
|
||||||
|
request.data.decode('utf-8').replace('q=', ''))
|
||||||
|
|
||||||
# Return a list of suggestions for the query
|
# Return a list of suggestions for the query
|
||||||
# Note: If Tor is enabled, this returns nothing, as the request is almost always rejected
|
#
|
||||||
return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []])
|
# Note: If Tor is enabled, this returns nothing, as the request is
|
||||||
|
# almost always rejected
|
||||||
|
return jsonify([
|
||||||
|
q,
|
||||||
|
g.user_request.autocomplete(q) if not g.user_config.tor else []
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
@app.route('/search', methods=['GET', 'POST'])
|
@app.route('/search', methods=['GET', 'POST'])
|
||||||
|
@ -168,7 +194,8 @@ def search():
|
||||||
# Update user config if specified in search args
|
# Update user config if specified in search args
|
||||||
g.user_config = g.user_config.from_params(g.request_params)
|
g.user_config = g.user_config.from_params(g.request_params)
|
||||||
|
|
||||||
search_util = RoutingUtils(request, g.user_config, session, cookies_disabled=g.cookies_disabled)
|
search_util = RoutingUtils(request, g.user_config, session,
|
||||||
|
cookies_disabled=g.cookies_disabled)
|
||||||
query = search_util.new_search_query()
|
query = search_util.new_search_query()
|
||||||
|
|
||||||
resolved_bangs = search_util.bang_operator(bang_json)
|
resolved_bangs = search_util.bang_operator(bang_json)
|
||||||
|
@ -183,14 +210,17 @@ def search():
|
||||||
try:
|
try:
|
||||||
response, elements = search_util.generate_response()
|
response, elements = search_util.generate_response()
|
||||||
except TorError as e:
|
except TorError as e:
|
||||||
session['error_message'] = e.message + ("\\n\\nTor config is now disabled!" if e.disable else "")
|
session['error_message'] = e.message + (
|
||||||
session['config']['tor'] = False if e.disable else session['config']['tor']
|
"\\n\\nTor config is now disabled!" if e.disable else "")
|
||||||
|
session['config']['tor'] = False if e.disable else session['config'][
|
||||||
|
'tor']
|
||||||
return redirect(url_for('.index'))
|
return redirect(url_for('.index'))
|
||||||
|
|
||||||
if search_util.feeling_lucky or elements < 0:
|
if search_util.feeling_lucky or elements < 0:
|
||||||
return redirect(response, code=303)
|
return redirect(response, code=303)
|
||||||
|
|
||||||
# Keep count of external elements to fetch before element key can be regenerated
|
# Keep count of external elements to fetch before
|
||||||
|
# the element key can be regenerated
|
||||||
app.user_elements[session['uuid']] = elements
|
app.user_elements[session['uuid']] = elements
|
||||||
|
|
||||||
return render_template(
|
return render_template(
|
||||||
|
@ -200,12 +230,13 @@ def search():
|
||||||
dark_mode=g.user_config.dark,
|
dark_mode=g.user_config.dark,
|
||||||
response=response,
|
response=response,
|
||||||
version_number=app.config['VERSION_NUMBER'],
|
version_number=app.config['VERSION_NUMBER'],
|
||||||
search_header=render_template(
|
search_header=(render_template(
|
||||||
'header.html',
|
'header.html',
|
||||||
dark_mode=g.user_config.dark,
|
dark_mode=g.user_config.dark,
|
||||||
query=urlparse.unquote(query),
|
query=urlparse.unquote(query),
|
||||||
search_type=search_util.search_type,
|
search_type=search_util.search_type,
|
||||||
mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '')
|
mobile=g.user_request.mobile)
|
||||||
|
if 'isch' not in search_util.search_type else ''))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/config', methods=['GET', 'POST', 'PUT'])
|
@app.route('/config', methods=['GET', 'POST', 'PUT'])
|
||||||
|
@ -215,8 +246,12 @@ def config():
|
||||||
return json.dumps(g.user_config.__dict__)
|
return json.dumps(g.user_config.__dict__)
|
||||||
elif request.method == 'PUT':
|
elif request.method == 'PUT':
|
||||||
if 'name' in request.args:
|
if 'name' in request.args:
|
||||||
config_pkl = os.path.join(app.config['CONFIG_PATH'], request.args.get('name'))
|
config_pkl = os.path.join(
|
||||||
session['config'] = pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_pkl) else session['config']
|
app.config['CONFIG_PATH'],
|
||||||
|
request.args.get('name'))
|
||||||
|
session['config'] = (pickle.load(open(config_pkl, 'rb'))
|
||||||
|
if os.path.exists(config_pkl)
|
||||||
|
else session['config'])
|
||||||
return json.dumps(session['config'])
|
return json.dumps(session['config'])
|
||||||
else:
|
else:
|
||||||
return json.dumps({})
|
return json.dumps({})
|
||||||
|
@ -227,11 +262,16 @@ def config():
|
||||||
|
|
||||||
# Save config by name to allow a user to easily load later
|
# Save config by name to allow a user to easily load later
|
||||||
if 'name' in request.args:
|
if 'name' in request.args:
|
||||||
pickle.dump(config_data, open(os.path.join(app.config['CONFIG_PATH'], request.args.get('name')), 'wb'))
|
pickle.dump(
|
||||||
|
config_data,
|
||||||
|
open(os.path.join(
|
||||||
|
app.config['CONFIG_PATH'],
|
||||||
|
request.args.get('name')), 'wb'))
|
||||||
|
|
||||||
# Overwrite default config if user has cookies disabled
|
# Overwrite default config if user has cookies disabled
|
||||||
if g.cookies_disabled:
|
if g.cookies_disabled:
|
||||||
open(app.config['DEFAULT_CONFIG'], 'w').write(json.dumps(config_data, indent=4))
|
open(app.config['DEFAULT_CONFIG'], 'w').write(
|
||||||
|
json.dumps(config_data, indent=4))
|
||||||
|
|
||||||
session['config'] = config_data
|
session['config'] = config_data
|
||||||
return redirect(config_data['url'])
|
return redirect(config_data['url'])
|
||||||
|
@ -274,7 +314,8 @@ def element():
|
||||||
except exceptions.RequestException:
|
except exceptions.RequestException:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
|
empty_gif = base64.b64decode(
|
||||||
|
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
|
||||||
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
|
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
|
||||||
|
|
||||||
|
|
||||||
|
@ -282,38 +323,62 @@ def element():
|
||||||
@auth_required
|
@auth_required
|
||||||
def window():
|
def window():
|
||||||
get_body = g.user_request.send(base_url=request.args.get('location')).text
|
get_body = g.user_request.send(base_url=request.args.get('location')).text
|
||||||
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
|
get_body = get_body.replace('src="/',
|
||||||
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"')
|
'src="' + request.args.get('location') + '"')
|
||||||
|
get_body = get_body.replace('href="/',
|
||||||
|
'href="' + request.args.get('location') + '"')
|
||||||
|
|
||||||
results = BeautifulSoup(get_body, 'html.parser')
|
results = bsoup(get_body, 'html.parser')
|
||||||
|
|
||||||
try:
|
for script in results('script'):
|
||||||
for script in results('script'):
|
script.decompose()
|
||||||
script.decompose()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return render_template('display.html', response=results)
|
return render_template('display.html', response=results)
|
||||||
|
|
||||||
|
|
||||||
def run_app():
|
def run_app():
|
||||||
parser = argparse.ArgumentParser(description='Whoogle Search console runner')
|
parser = argparse.ArgumentParser(
|
||||||
parser.add_argument('--port', default=5000, metavar='<port number>',
|
description='Whoogle Search console runner')
|
||||||
help='Specifies a port to run on (default 5000)')
|
parser.add_argument(
|
||||||
parser.add_argument('--host', default='127.0.0.1', metavar='<ip address>',
|
'--port',
|
||||||
help='Specifies the host address to use (default 127.0.0.1)')
|
default=5000,
|
||||||
parser.add_argument('--debug', default=False, action='store_true',
|
metavar='<port number>',
|
||||||
help='Activates debug mode for the server (default False)')
|
help='Specifies a port to run on (default 5000)')
|
||||||
parser.add_argument('--https-only', default=False, action='store_true',
|
parser.add_argument(
|
||||||
help='Enforces HTTPS redirects for all requests')
|
'--host',
|
||||||
parser.add_argument('--userpass', default='', metavar='<username:password>',
|
default='127.0.0.1',
|
||||||
help='Sets a username/password basic auth combo (default None)')
|
metavar='<ip address>',
|
||||||
parser.add_argument('--proxyauth', default='', metavar='<username:password>',
|
help='Specifies the host address to use (default 127.0.0.1)')
|
||||||
help='Sets a username/password for a HTTP/SOCKS proxy (default None)')
|
parser.add_argument(
|
||||||
parser.add_argument('--proxytype', default='', metavar='<socks4|socks5|http>',
|
'--debug',
|
||||||
help='Sets a proxy type for all connections (default None)')
|
default=False,
|
||||||
parser.add_argument('--proxyloc', default='', metavar='<location:port>',
|
action='store_true',
|
||||||
help='Sets a proxy location for all connections (default None)')
|
help='Activates debug mode for the server (default False)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--https-only',
|
||||||
|
default=False,
|
||||||
|
action='store_true',
|
||||||
|
help='Enforces HTTPS redirects for all requests')
|
||||||
|
parser.add_argument(
|
||||||
|
'--userpass',
|
||||||
|
default='',
|
||||||
|
metavar='<username:password>',
|
||||||
|
help='Sets a username/password basic auth combo (default None)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--proxyauth',
|
||||||
|
default='',
|
||||||
|
metavar='<username:password>',
|
||||||
|
help='Sets a username/password for a HTTP/SOCKS proxy (default None)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--proxytype',
|
||||||
|
default='',
|
||||||
|
metavar='<socks4|socks5|http>',
|
||||||
|
help='Sets a proxy type for all connections (default None)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--proxyloc',
|
||||||
|
default='',
|
||||||
|
metavar='<location:port>',
|
||||||
|
help='Sets a proxy location for all connections (default None)')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.userpass:
|
if args.userpass:
|
||||||
|
|
|
@ -7,14 +7,16 @@ SKIP_ARGS = ['ref_src', 'utm']
|
||||||
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
|
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
|
||||||
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
||||||
LOGO_URL = GOOG_IMG + '_desk'
|
LOGO_URL = GOOG_IMG + '_desk'
|
||||||
BLANK_B64 = '''
|
BLANK_B64 = ('data:image/png;base64,'
|
||||||
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
|
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
|
||||||
'''
|
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
|
||||||
|
|
||||||
|
# Ad keywords
|
||||||
BLACKLIST = [
|
BLACKLIST = [
|
||||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
|
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
|
||||||
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
|
'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', '広告', 'Augl.',
|
||||||
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
|
'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', 'آگهی',
|
||||||
|
'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
|
||||||
]
|
]
|
||||||
|
|
||||||
SITE_ALTS = {
|
SITE_ALTS = {
|
||||||
|
@ -25,7 +27,8 @@ SITE_ALTS = {
|
||||||
|
|
||||||
|
|
||||||
def has_ad_content(element: str):
|
def has_ad_content(element: str):
|
||||||
return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
|
return element.upper() in (value.upper() for value in BLACKLIST) \
|
||||||
|
or 'ⓘ' in element
|
||||||
|
|
||||||
|
|
||||||
def get_first_link(soup):
|
def get_first_link(soup):
|
||||||
|
|
|
@ -1,25 +1,26 @@
|
||||||
from app.filter import Filter, get_first_link
|
from app.filter import Filter, get_first_link
|
||||||
from app.utils.session_utils import generate_user_keys
|
from app.utils.session_utils import generate_user_keys
|
||||||
from app.request import gen_query
|
from app.request import gen_query
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup as bsoup
|
||||||
from cryptography.fernet import Fernet, InvalidToken
|
from cryptography.fernet import Fernet, InvalidToken
|
||||||
from flask import g
|
from flask import g
|
||||||
from typing import Any, Tuple
|
from typing import Any, Tuple
|
||||||
|
|
||||||
|
|
||||||
TOR_BANNER = '<hr><h1 style="text-align: center">You are using Tor</h1><hr>'
|
TOR_BANNER = '<hr><h1 style="text-align: center">You are using Tor</h1><hr>'
|
||||||
|
|
||||||
|
|
||||||
class RoutingUtils:
|
class RoutingUtils:
|
||||||
def __init__(self, request, config, session, cookies_disabled=False):
|
def __init__(self, request, config, session, cookies_disabled=False):
|
||||||
self.request_params = request.args if request.method == 'GET' else request.form
|
method = request.method
|
||||||
|
self.request_params = request.args if method == 'GET' else request.form
|
||||||
self.user_agent = request.headers.get('User-Agent')
|
self.user_agent = request.headers.get('User-Agent')
|
||||||
self.feeling_lucky = False
|
self.feeling_lucky = False
|
||||||
self.config = config
|
self.config = config
|
||||||
self.session = session
|
self.session = session
|
||||||
self.query = ''
|
self.query = ''
|
||||||
self.cookies_disabled = cookies_disabled
|
self.cookies_disabled = cookies_disabled
|
||||||
self.search_type = self.request_params.get('tbm') if 'tbm' in self.request_params else ''
|
self.search_type = self.request_params.get(
|
||||||
|
'tbm') if 'tbm' in self.request_params else ''
|
||||||
|
|
||||||
def __getitem__(self, name):
|
def __getitem__(self, name):
|
||||||
return getattr(self, name)
|
return getattr(self, name)
|
||||||
|
@ -45,7 +46,9 @@ class RoutingUtils:
|
||||||
else:
|
else:
|
||||||
# Attempt to decrypt if this is an internal link
|
# Attempt to decrypt if this is an internal link
|
||||||
try:
|
try:
|
||||||
q = Fernet(self.session['fernet_keys']['text_key']).decrypt(q.encode()).decode()
|
q = Fernet(
|
||||||
|
self.session['fernet_keys']['text_key']
|
||||||
|
).decrypt(q.encode()).decode()
|
||||||
except InvalidToken:
|
except InvalidToken:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -53,29 +56,40 @@ class RoutingUtils:
|
||||||
self.session['fernet_keys']['text_key'] = generate_user_keys(
|
self.session['fernet_keys']['text_key'] = generate_user_keys(
|
||||||
cookies_disabled=self.cookies_disabled)['text_key']
|
cookies_disabled=self.cookies_disabled)['text_key']
|
||||||
|
|
||||||
# Format depending on whether or not the query is a "feeling lucky" query
|
# Strip leading '! ' for "feeling lucky" queries
|
||||||
self.feeling_lucky = q.startswith('! ')
|
self.feeling_lucky = q.startswith('! ')
|
||||||
self.query = q[2:] if self.feeling_lucky else q
|
self.query = q[2:] if self.feeling_lucky else q
|
||||||
return self.query
|
return self.query
|
||||||
|
|
||||||
def bang_operator(self, bangs_dict: dict) -> str:
|
def bang_operator(self, bangs_dict: dict) -> str:
|
||||||
for operator in bangs_dict.keys():
|
for operator in bangs_dict.keys():
|
||||||
if self.query.split(' ')[0] == operator:
|
if self.query.split(' ')[0] != operator:
|
||||||
return bangs_dict[operator]['url'].format(self.query.replace(operator, '').strip())
|
continue
|
||||||
|
|
||||||
|
return bangs_dict[operator]['url'].format(
|
||||||
|
self.query.replace(operator, '').strip())
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def generate_response(self) -> Tuple[Any, int]:
|
def generate_response(self) -> Tuple[Any, int]:
|
||||||
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
|
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
|
||||||
|
|
||||||
content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config)
|
content_filter = Filter(
|
||||||
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near)
|
self.session['fernet_keys'],
|
||||||
|
mobile=mobile,
|
||||||
|
config=self.config)
|
||||||
|
full_query = gen_query(
|
||||||
|
self.query,
|
||||||
|
self.request_params,
|
||||||
|
self.config,
|
||||||
|
content_filter.near)
|
||||||
get_body = g.user_request.send(query=full_query)
|
get_body = g.user_request.send(query=full_query)
|
||||||
|
|
||||||
# Produce cleanable html soup from response
|
# Produce cleanable html soup from response
|
||||||
html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser')
|
html_soup = bsoup(content_filter.reskin(get_body.text), 'html.parser')
|
||||||
html_soup.insert(0, BeautifulSoup(
|
html_soup.insert(
|
||||||
TOR_BANNER,
|
0,
|
||||||
features='lxml') if g.user_request.tor_valid else BeautifulSoup("", features="lxml"))
|
bsoup(TOR_BANNER, 'html.parser')
|
||||||
|
if g.user_request.tor_valid else bsoup('', 'html.parser'))
|
||||||
|
|
||||||
if self.feeling_lucky:
|
if self.feeling_lucky:
|
||||||
return get_first_link(html_soup), 1
|
return get_first_link(html_soup), 1
|
||||||
|
@ -83,11 +97,13 @@ class RoutingUtils:
|
||||||
formatted_results = content_filter.clean(html_soup)
|
formatted_results = content_filter.clean(html_soup)
|
||||||
|
|
||||||
# Append user config to all search links, if available
|
# Append user config to all search links, if available
|
||||||
param_str = ''.join('&{}={}'.format(k, v)
|
param_str = ''.join('&{}={}'.format(k, v)
|
||||||
for k, v in self.request_params.to_dict(flat=True).items()
|
for k, v in
|
||||||
if self.config.is_safe_key(k))
|
self.request_params.to_dict(flat=True).items()
|
||||||
|
if self.config.is_safe_key(k))
|
||||||
for link in formatted_results.find_all('a', href=True):
|
for link in formatted_results.find_all('a', href=True):
|
||||||
if 'search?' not in link['href'] or link['href'].index('search?') > 1:
|
if 'search?' not in link['href'] or link['href'].index(
|
||||||
|
'search?') > 1:
|
||||||
continue
|
continue
|
||||||
link['href'] += param_str
|
link['href'] += param_str
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ more-itertools==8.3.0
|
||||||
packaging==20.4
|
packaging==20.4
|
||||||
pluggy==0.13.1
|
pluggy==0.13.1
|
||||||
py==1.8.1
|
py==1.8.1
|
||||||
|
pycodestyle==2.6.0
|
||||||
pycparser==2.19
|
pycparser==2.19
|
||||||
pyOpenSSL==19.1.0
|
pyOpenSSL==19.1.0
|
||||||
pyparsing==2.4.7
|
pyparsing==2.4.7
|
||||||
|
|
|
@ -3,13 +3,12 @@ from app.filter import Filter
|
||||||
from app.utils.session_utils import generate_user_keys
|
from app.utils.session_utils import generate_user_keys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil.parser import *
|
from dateutil.parser import *
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
def get_search_results(data):
|
def get_search_results(data):
|
||||||
secret_key = generate_user_keys()
|
secret_key = generate_user_keys()
|
||||||
soup = Filter(user_keys=secret_key).clean(BeautifulSoup(data, 'html.parser'))
|
soup = Filter(user_keys=secret_key).clean(
|
||||||
|
BeautifulSoup(data, 'html.parser'))
|
||||||
|
|
||||||
main_divs = soup.find('div', {'id': 'main'})
|
main_divs = soup.find('div', {'id': 'main'})
|
||||||
assert len(main_divs) > 1
|
assert len(main_divs) > 1
|
||||||
|
@ -17,7 +16,9 @@ def get_search_results(data):
|
||||||
result_divs = []
|
result_divs = []
|
||||||
for div in main_divs:
|
for div in main_divs:
|
||||||
# Result divs should only have 1 inner div
|
# Result divs should only have 1 inner div
|
||||||
if len(list(div.children)) != 1 or not div.findChild() or 'div' not in div.findChild().name:
|
if (len(list(div.children)) != 1
|
||||||
|
or not div.findChild()
|
||||||
|
or 'div' not in div.findChild().name):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
result_divs.append(div)
|
result_divs.append(div)
|
||||||
|
@ -78,6 +79,7 @@ def test_recent_results(client):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
date = parse(date_span)
|
date = parse(date_span)
|
||||||
assert (current_date - date).days <= (num_days + 5) # Date can have a little bit of wiggle room
|
# Date can have a little bit of wiggle room
|
||||||
|
assert (current_date - date).days <= (num_days + 5)
|
||||||
except ParserError:
|
except ParserError:
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in New Issue