Added better multilingual support, updated filter
Results page now includes method for switching to "All Languages" from whichever language is specified as the primary in the config (see #74). Also removes the non-Whoogle links from the page footer, leaving only the page navigation controls Added support for the date range filter on the results page, though I'd still recommend using the ":past <unit>" query instead.main
parent
6ec65f8754
commit
4324fcd8f8
|
@ -116,14 +116,11 @@ class Filter:
|
|||
for script in soup('script'):
|
||||
script.decompose()
|
||||
|
||||
# Remove google's language/time config
|
||||
st_card = soup.find('div', id='st-card')
|
||||
if st_card:
|
||||
st_card.decompose()
|
||||
|
||||
footer = soup.find('div', id='sfooter')
|
||||
# Update default footer and header
|
||||
footer = soup.find('footer')
|
||||
if footer:
|
||||
footer.decompose()
|
||||
# Remove divs that have multiple links beyond just page navigation
|
||||
[_.decompose() for _ in footer.find_all('div', recursive=False) if len(_.find_all('a', href=True)) > 2]
|
||||
|
||||
header = soup.find('header')
|
||||
if header:
|
||||
|
@ -144,12 +141,12 @@ class Filter:
|
|||
return
|
||||
|
||||
question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0]
|
||||
for x in question_divs:
|
||||
questions = [_ for _ in x.find_all('div', recursive=True) if _.text.endswith('?')]
|
||||
for question_div in question_divs:
|
||||
questions = [_ for _ in question_div.find_all('div', recursive=True) if _.text.endswith('?')]
|
||||
for question in questions:
|
||||
question['style'] = 'padding: 10px; font-style: italic;'
|
||||
|
||||
def update_element_src(self, element, mimetype):
|
||||
def update_element_src(self, element, mime):
|
||||
element_src = element['src']
|
||||
if element_src.startswith('//'):
|
||||
element_src = 'https:' + element_src
|
||||
|
@ -163,7 +160,7 @@ class Filter:
|
|||
return
|
||||
|
||||
element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \
|
||||
'&type=' + urlparse.quote(mimetype)
|
||||
'&type=' + urlparse.quote(mime)
|
||||
# TODO: Non-mobile image results link to website instead of image
|
||||
# if not self.mobile:
|
||||
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))
|
||||
|
|
|
@ -12,7 +12,7 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
|||
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
||||
|
||||
# Valid query params
|
||||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
|
||||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source']
|
||||
|
||||
|
||||
def gen_user_agent(is_mobile):
|
||||
|
@ -28,11 +28,22 @@ def gen_user_agent(is_mobile):
|
|||
|
||||
def gen_query(query, args, config, near_city=None):
|
||||
param_dict = {key: '' for key in VALID_PARAMS}
|
||||
|
||||
# Use :past(hour/day/week/month/year) if available
|
||||
# example search "new restaurants :past month"
|
||||
if ':past' in query:
|
||||
sub_lang = ''
|
||||
if ':past' in query and 'tbs' not in args:
|
||||
time_range = str.strip(query.split(':past', 1)[-1])
|
||||
param_dict['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
|
||||
param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0]))
|
||||
elif 'tbs' in args:
|
||||
result_tbs = args.get('tbs')
|
||||
param_dict['tbs'] = '&tbs=' + result_tbs
|
||||
|
||||
# Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted
|
||||
# strangely. This is a (admittedly not very elegant) solution for this.
|
||||
# Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case
|
||||
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
|
||||
sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
|
||||
|
||||
# Ensure search query is parsable
|
||||
query = urlparse.quote(query)
|
||||
|
@ -49,13 +60,20 @@ def gen_query(query, args, config, near_city=None):
|
|||
if near_city:
|
||||
param_dict['near'] = '&near=' + urlparse.quote(near_city)
|
||||
|
||||
# Set language for results (lr) and interface (hl)
|
||||
param_dict['lr'] = '&lr=' + config.lang + '&hl=' + config.lang.replace('lang_', '')
|
||||
# Set language for results (lr) if source isn't set, otherwise use the result
|
||||
# language param provided by google (but with the strange digit(s) removed)
|
||||
if 'source' in args:
|
||||
param_dict['source'] = '&source=' + args.get('source')
|
||||
param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else ''
|
||||
else:
|
||||
param_dict['lr'] = '&lr=' + config.lang
|
||||
|
||||
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
|
||||
param_dict['hl'] = '&hl=' + config.lang.replace('lang_', '')
|
||||
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
|
||||
|
||||
for val in param_dict.values():
|
||||
if not val or val is None:
|
||||
if not val:
|
||||
continue
|
||||
query += val
|
||||
|
||||
|
|
|
@ -87,6 +87,7 @@ def after_request_func(response):
|
|||
for key in session_list:
|
||||
session.pop(key)
|
||||
|
||||
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
|
||||
return response
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue