Added better multilingual support, updated filter

Results page now includes method for switching to "All Languages" from
whichever language is specified as the primary in the config (see ).

Also removes the non-Whoogle links from the page footer, leaving only
the page navigation controls

Added support for the date range filter on the results page, though I'd
still recommend using the ":past <unit>" query instead.
main
Ben Busby 2020-06-07 14:06:49 -06:00
parent 6ec65f8754
commit 4324fcd8f8
3 changed files with 33 additions and 17 deletions

View File

@ -116,14 +116,11 @@ class Filter:
for script in soup('script'):
script.decompose()
# Remove google's language/time config
st_card = soup.find('div', id='st-card')
if st_card:
st_card.decompose()
footer = soup.find('div', id='sfooter')
# Update default footer and header
footer = soup.find('footer')
if footer:
footer.decompose()
# Remove divs that have multiple links beyond just page navigation
[_.decompose() for _ in footer.find_all('div', recursive=False) if len(_.find_all('a', href=True)) > 2]
header = soup.find('header')
if header:
@ -144,12 +141,12 @@ class Filter:
return
question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0]
for x in question_divs:
questions = [_ for _ in x.find_all('div', recursive=True) if _.text.endswith('?')]
for question_div in question_divs:
questions = [_ for _ in question_div.find_all('div', recursive=True) if _.text.endswith('?')]
for question in questions:
question['style'] = 'padding: 10px; font-style: italic;'
def update_element_src(self, element, mimetype):
def update_element_src(self, element, mime):
element_src = element['src']
if element_src.startswith('//'):
element_src = 'https:' + element_src
@ -163,7 +160,7 @@ class Filter:
return
element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \
'&type=' + urlparse.quote(mimetype)
'&type=' + urlparse.quote(mime)
# TODO: Non-mobile image results link to website instead of image
# if not self.mobile:
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))

View File

@ -12,7 +12,7 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
# Valid query params
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source']
def gen_user_agent(is_mobile):
@ -28,11 +28,22 @@ def gen_user_agent(is_mobile):
def gen_query(query, args, config, near_city=None):
param_dict = {key: '' for key in VALID_PARAMS}
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
if ':past' in query:
sub_lang = ''
if ':past' in query and 'tbs' not in args:
time_range = str.strip(query.split(':past', 1)[-1])
param_dict['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0]))
elif 'tbs' in args:
result_tbs = args.get('tbs')
param_dict['tbs'] = '&tbs=' + result_tbs
# Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted
# strangely. This is a (admittedly not very elegant) solution for this.
# Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
# Ensure search query is parsable
query = urlparse.quote(query)
@ -49,13 +60,20 @@ def gen_query(query, args, config, near_city=None):
if near_city:
param_dict['near'] = '&near=' + urlparse.quote(near_city)
# Set language for results (lr) and interface (hl)
param_dict['lr'] = '&lr=' + config.lang + '&hl=' + config.lang.replace('lang_', '')
# Set language for results (lr) if source isn't set, otherwise use the result
# language param provided by google (but with the strange digit(s) removed)
if 'source' in args:
param_dict['source'] = '&source=' + args.get('source')
param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else ''
else:
param_dict['lr'] = '&lr=' + config.lang
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
param_dict['hl'] = '&hl=' + config.lang.replace('lang_', '')
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
for val in param_dict.values():
if not val or val is None:
if not val:
continue
query += val

View File

@ -87,6 +87,7 @@ def after_request_func(response):
for key in session_list:
session.pop(key)
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
return response