diff --git a/app/__init__.py b/app/__init__.py index 22e436d..f21d4b4 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,4 +1,4 @@ -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from flask import Flask from flask_session import Session import os diff --git a/app/filter.py b/app/filter.py index 1cc9f87..41a5cef 100644 --- a/app/filter.py +++ b/app/filter.py @@ -1,56 +1,11 @@ from app.request import VALID_PARAMS -from app.utils.misc import BLACKLIST -from bs4 import BeautifulSoup +from app.utils.filter_utils import * from bs4.element import ResultSet from cryptography.fernet import Fernet import re import urllib.parse as urlparse from urllib.parse import parse_qs -SKIP_ARGS = ['ref_src', 'utm'] -FULL_RES_IMG = '
Full Image' -GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' -LOGO_URL = GOOG_IMG + '_desk' -BLANK_B64 = ''' -data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC -''' - - -def get_first_link(soup): - # Replace hrefs with only the intended destination (no "utm" type tags) - for a in soup.find_all('a', href=True): - # Return the first search result URL - if 'url?q=' in a['href']: - return filter_link_args(a['href']) - - -def filter_link_args(query_link): - parsed_link = urlparse.urlparse(query_link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - if len(link_args) == 0 and len(parsed_link) > 0: - return query_link - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') - if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) - else: - query_link = query_link.replace('?', '') - - return query_link - - -def has_ad_content(element: str): - return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element - class Filter: def __init__(self, user_keys: dict, mobile=False, config=None): @@ -61,6 +16,7 @@ class Filter: self.dark = config['dark'] if 'dark' in config else False self.nojs = config['nojs'] if 'nojs' in config else False self.new_tab = config['new_tab'] if 'new_tab' in config else False + self.alt_redirect = config['alts'] if 'alts' in config else False self.mobile = mobile self.user_keys = user_keys self.main_divs = ResultSet('') @@ -213,8 +169,12 @@ class Filter: query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' if query_link.startswith('/'): + # Internal google links (i.e. mail, maps, etc) should still be forwarded to Google link['href'] = 'https://google.com' + query_link elif '/search?q=' in href: + # "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes + if 'li:1' in href: + query_link = '"' + query_link + '"' new_search = '/search?q=' + self.encrypt_path(query_link) query_params = parse_qs(urlparse.urlparse(href).query) @@ -232,11 +192,13 @@ class Filter: else: link['href'] = href + # Replace link location if "alts" config is enabled + if self.alt_redirect: + # Search and replace all link descriptions with alternative location + link['href'] = get_site_alt(link['href']) + link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys()))) + if len(link_desc) == 0: + return -def gen_nojs(sibling): - nojs_link = BeautifulSoup().new_tag('a') - nojs_link['href'] = '/window?location=' + sibling['href'] - nojs_link['style'] = 'display:block;width:100%;' - nojs_link.string = 'NoJS Link: ' + nojs_link['href'] - sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) + # Replace link destination + link_desc[0].replace_with(get_site_alt(link_desc[0])) diff --git a/app/models/config.py b/app/models/config.py index 45b1b65..d261cd3 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -306,6 +306,7 @@ class Config: self.dark = False self.nojs = False self.near = '' + self.alts = False self.new_tab = False self.get_only = False diff --git a/app/request.py b/app/request.py index 192eedc..4abb9b3 100644 --- a/app/request.py +++ b/app/request.py @@ -12,7 +12,7 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' # Valid query params -VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source'] +VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] def gen_user_agent(is_mobile): @@ -68,6 +68,10 @@ def gen_query(query, args, config, near_city=None): else: param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else '' + # Set autocorrected search ignore + if 'nfpr' in args: + param_dict['nfpr'] = '&nfpr=' + args.get('nfpr') + param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else '' param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else '' param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') diff --git a/app/routes.py b/app/routes.py index 7f1869c..fd6278d 100644 --- a/app/routes.py +++ b/app/routes.py @@ -15,7 +15,7 @@ from requests import exceptions from app import app from app.models.config import Config from app.request import Request -from app.utils.misc import valid_user_session +from app.utils.session_utils import valid_user_session from app.utils.routing_utils import * diff --git a/app/static/css/main.css b/app/static/css/main.css index ef4b557..34458f6 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -34,10 +34,10 @@ body { color: #685e79; border-radius: 10px 10px 0 0; max-width: 600px; - background: rgba(0,0,0,0); + background: rgba(0, 0, 0, 0); } -#search-bar:focus{ +#search-bar:focus { color: #685e79; } @@ -68,7 +68,7 @@ button::-moz-focus-inner { .collapsible { outline: 0; - background-color: rgba(0,0,0,0); + background-color: rgba(0, 0, 0, 0); color: #685e79; cursor: pointer; padding: 18px; @@ -129,3 +129,8 @@ footer { width: 100%; z-index: -1; } + +.info-text { + font-style: italic; + font-size: 12px; +} \ No newline at end of file diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 95d917b..1035ff9 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,3 +1,13 @@ +// Whoogle configurations that use boolean values and checkboxes +CONFIG_BOOLS = [ + "nojs", "dark", "safe", "alts", "new_tab", "get_only" +]; + +// Whoogle configurations that use string values and input fields +CONFIG_STRS = [ + "near", "url" +]; + const setupSearchLayout = () => { // Setup search field const searchBar = document.getElementById("search-bar"); @@ -18,15 +28,6 @@ const setupSearchLayout = () => { }; const fillConfigValues = () => { - // Establish all config value elements - const near = document.getElementById("config-near"); - const noJS = document.getElementById("config-nojs"); - const dark = document.getElementById("config-dark"); - const safe = document.getElementById("config-safe"); - const url = document.getElementById("config-url"); - const newTab = document.getElementById("config-new-tab"); - const getOnly = document.getElementById("config-get-only"); - // Request existing config info let xhrGET = new XMLHttpRequest(); xhrGET.open("GET", "/config"); @@ -39,15 +40,15 @@ const fillConfigValues = () => { // Allow for updating/saving config values let configSettings = JSON.parse(xhrGET.responseText); - near.value = configSettings["near"] ? configSettings["near"] : ""; - noJS.checked = !!configSettings["nojs"]; - dark.checked = !!configSettings["dark"]; - safe.checked = !!configSettings["safe"]; - getOnly.checked = !!configSettings["get_only"]; - newTab.checked = !!configSettings["new_tab"]; + CONFIG_STRS.forEach(function(item) { + let configElement = document.getElementById("config-" + item.replace("_", "-")); + configElement.value = configSettings[item] ? configSettings[item] : ""; + }); - // Addresses the issue of incorrect URL being used behind reverse proxy - url.value = configSettings["url"] ? configSettings["url"] : ""; + CONFIG_BOOLS.forEach(function(item) { + let configElement = document.getElementById("config-" + item.replace("_", "-")); + configElement.checked = !!configSettings[item]; + }); }; xhrGET.send(); diff --git a/app/templates/index.html b/app/templates/index.html index a541413..dd89e32 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -96,6 +96,12 @@ +
+ + +
— Replaces Twitter/YouTube/Instagram links + with Nitter/Invidious/Bibliogram links.
+
diff --git a/app/utils/filter_utils.py b/app/utils/filter_utils.py new file mode 100644 index 0000000..ed05d76 --- /dev/null +++ b/app/utils/filter_utils.py @@ -0,0 +1,79 @@ +from bs4 import BeautifulSoup +import urllib.parse as urlparse +from urllib.parse import parse_qs + +SKIP_ARGS = ['ref_src', 'utm'] +FULL_RES_IMG = '
Full Image' +GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' +LOGO_URL = GOOG_IMG + '_desk' +BLANK_B64 = ''' +data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC +''' + +BLACKLIST = [ + 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고', + 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', + 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés' +] + +SITE_ALTS = { + 'twitter.com': 'nitter.net', + 'youtube.com': 'invidio.us', + 'instagram.com': 'bibliogram.art/u' +} + + +def has_ad_content(element: str): + return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element + + +def get_first_link(soup): + # Replace hrefs with only the intended destination (no "utm" type tags) + for a in soup.find_all('a', href=True): + # Return the first search result URL + if 'url?q=' in a['href']: + return filter_link_args(a['href']) + + +def get_site_alt(link: str): + for site_key in SITE_ALTS.keys(): + if site_key not in link: + continue + + link = link.replace(site_key, SITE_ALTS[site_key]) + break + + return link + + +def filter_link_args(query_link): + parsed_link = urlparse.urlparse(query_link) + link_args = parse_qs(parsed_link.query) + safe_args = {} + + if len(link_args) == 0 and len(parsed_link) > 0: + return query_link + + for arg in link_args.keys(): + if arg in SKIP_ARGS: + continue + + safe_args[arg] = link_args[arg] + + # Remove original link query and replace with filtered args + query_link = query_link.replace(parsed_link.query, '') + if len(safe_args) > 0: + query_link = query_link + urlparse.urlencode(safe_args, doseq=True) + else: + query_link = query_link.replace('?', '') + + return query_link + + +def gen_nojs(sibling): + nojs_link = BeautifulSoup().new_tag('a') + nojs_link['href'] = '/window?location=' + sibling['href'] + nojs_link['style'] = 'display:block;width:100%;' + nojs_link.string = 'NoJS Link: ' + nojs_link['href'] + sibling.append(BeautifulSoup('


', 'html.parser')) + sibling.append(nojs_link) \ No newline at end of file diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index 40f8a90..2a649b4 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -1,5 +1,5 @@ from app.filter import Filter, get_first_link -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from app.request import gen_query from bs4 import BeautifulSoup from cryptography.fernet import Fernet, InvalidToken diff --git a/app/utils/misc.py b/app/utils/session_utils.py similarity index 62% rename from app/utils/misc.py rename to app/utils/session_utils.py index b87941d..f959abe 100644 --- a/app/utils/misc.py +++ b/app/utils/session_utils.py @@ -2,11 +2,6 @@ from cryptography.fernet import Fernet from flask import current_app as app REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys'] -BLACKLIST = [ - 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고', - 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', - 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés' -] def generate_user_keys(cookies_disabled=False) -> dict: diff --git a/test/conftest.py b/test/conftest.py index 63aec3e..7a15f00 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,5 +1,5 @@ from app import app -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys import pytest diff --git a/test/test_misc.py b/test/test_misc.py index 8eb1d78..92fcadb 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -1,4 +1,4 @@ -from app.utils.misc import generate_user_keys, valid_user_session +from app.utils.session_utils import generate_user_keys, valid_user_session def test_generate_user_keys(): diff --git a/test/test_results.py b/test/test_results.py index 463a355..a7aa771 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup from app.filter import Filter -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from datetime import datetime from dateutil.parser import *