Privacy respecting alternatives in results view (#106)
Full implementation of social media alt redirects (twitter/youtube/instagram -> nitter/invidious/bibliogram) depending on configuration. Verbatim search and option to ignore search autocorrect are now supported as well. Also cleaned up the javascript side of whoogle config so that it now uses arrays of available fields for parsing config values instead of manually assigning each one to a variable. This doesn't include support for Google Maps -> Open Street Maps, that seems a bit more involved than the social media redirects were, so it should likely be a separate effort.main
parent
3d7456f37b
commit
975ece8cd0
|
@ -1,4 +1,4 @@
|
|||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
from flask import Flask
|
||||
from flask_session import Session
|
||||
import os
|
||||
|
|
|
@ -1,56 +1,11 @@
|
|||
from app.request import VALID_PARAMS
|
||||
from app.utils.misc import BLACKLIST
|
||||
from bs4 import BeautifulSoup
|
||||
from app.utils.filter_utils import *
|
||||
from bs4.element import ResultSet
|
||||
from cryptography.fernet import Fernet
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
SKIP_ARGS = ['ref_src', 'utm']
|
||||
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
|
||||
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
||||
LOGO_URL = GOOG_IMG + '_desk'
|
||||
BLANK_B64 = '''
|
||||

|
||||
'''
|
||||
|
||||
|
||||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in a['href']:
|
||||
return filter_link_args(a['href'])
|
||||
|
||||
|
||||
def filter_link_args(query_link):
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
return query_link
|
||||
|
||||
|
||||
def has_ad_content(element: str):
|
||||
return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, user_keys: dict, mobile=False, config=None):
|
||||
|
@ -61,6 +16,7 @@ class Filter:
|
|||
self.dark = config['dark'] if 'dark' in config else False
|
||||
self.nojs = config['nojs'] if 'nojs' in config else False
|
||||
self.new_tab = config['new_tab'] if 'new_tab' in config else False
|
||||
self.alt_redirect = config['alts'] if 'alts' in config else False
|
||||
self.mobile = mobile
|
||||
self.user_keys = user_keys
|
||||
self.main_divs = ResultSet('')
|
||||
|
@ -213,8 +169,12 @@ class Filter:
|
|||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
||||
|
||||
if query_link.startswith('/'):
|
||||
# Internal google links (i.e. mail, maps, etc) should still be forwarded to Google
|
||||
link['href'] = 'https://google.com' + query_link
|
||||
elif '/search?q=' in href:
|
||||
# "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes
|
||||
if 'li:1' in href:
|
||||
query_link = '"' + query_link + '"'
|
||||
new_search = '/search?q=' + self.encrypt_path(query_link)
|
||||
|
||||
query_params = parse_qs(urlparse.urlparse(href).query)
|
||||
|
@ -232,11 +192,13 @@ class Filter:
|
|||
else:
|
||||
link['href'] = href
|
||||
|
||||
# Replace link location if "alts" config is enabled
|
||||
if self.alt_redirect:
|
||||
# Search and replace all link descriptions with alternative location
|
||||
link['href'] = get_site_alt(link['href'])
|
||||
link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys())))
|
||||
if len(link_desc) == 0:
|
||||
return
|
||||
|
||||
def gen_nojs(sibling):
|
||||
nojs_link = BeautifulSoup().new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + sibling['href']
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
sibling.append(nojs_link)
|
||||
# Replace link destination
|
||||
link_desc[0].replace_with(get_site_alt(link_desc[0]))
|
||||
|
|
|
@ -306,6 +306,7 @@ class Config:
|
|||
self.dark = False
|
||||
self.nojs = False
|
||||
self.near = ''
|
||||
self.alts = False
|
||||
self.new_tab = False
|
||||
self.get_only = False
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
|||
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
||||
|
||||
# Valid query params
|
||||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source']
|
||||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr']
|
||||
|
||||
|
||||
def gen_user_agent(is_mobile):
|
||||
|
@ -68,6 +68,10 @@ def gen_query(query, args, config, near_city=None):
|
|||
else:
|
||||
param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else ''
|
||||
|
||||
# Set autocorrected search ignore
|
||||
if 'nfpr' in args:
|
||||
param_dict['nfpr'] = '&nfpr=' + args.get('nfpr')
|
||||
|
||||
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
|
||||
param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else ''
|
||||
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
|
||||
|
|
|
@ -15,7 +15,7 @@ from requests import exceptions
|
|||
from app import app
|
||||
from app.models.config import Config
|
||||
from app.request import Request
|
||||
from app.utils.misc import valid_user_session
|
||||
from app.utils.session_utils import valid_user_session
|
||||
from app.utils.routing_utils import *
|
||||
|
||||
|
||||
|
|
|
@ -129,3 +129,8 @@ footer {
|
|||
width: 100%;
|
||||
z-index: -1;
|
||||
}
|
||||
|
||||
.info-text {
|
||||
font-style: italic;
|
||||
font-size: 12px;
|
||||
}
|
|
@ -1,3 +1,13 @@
|
|||
// Whoogle configurations that use boolean values and checkboxes
|
||||
CONFIG_BOOLS = [
|
||||
"nojs", "dark", "safe", "alts", "new_tab", "get_only"
|
||||
];
|
||||
|
||||
// Whoogle configurations that use string values and input fields
|
||||
CONFIG_STRS = [
|
||||
"near", "url"
|
||||
];
|
||||
|
||||
const setupSearchLayout = () => {
|
||||
// Setup search field
|
||||
const searchBar = document.getElementById("search-bar");
|
||||
|
@ -18,15 +28,6 @@ const setupSearchLayout = () => {
|
|||
};
|
||||
|
||||
const fillConfigValues = () => {
|
||||
// Establish all config value elements
|
||||
const near = document.getElementById("config-near");
|
||||
const noJS = document.getElementById("config-nojs");
|
||||
const dark = document.getElementById("config-dark");
|
||||
const safe = document.getElementById("config-safe");
|
||||
const url = document.getElementById("config-url");
|
||||
const newTab = document.getElementById("config-new-tab");
|
||||
const getOnly = document.getElementById("config-get-only");
|
||||
|
||||
// Request existing config info
|
||||
let xhrGET = new XMLHttpRequest();
|
||||
xhrGET.open("GET", "/config");
|
||||
|
@ -39,15 +40,15 @@ const fillConfigValues = () => {
|
|||
// Allow for updating/saving config values
|
||||
let configSettings = JSON.parse(xhrGET.responseText);
|
||||
|
||||
near.value = configSettings["near"] ? configSettings["near"] : "";
|
||||
noJS.checked = !!configSettings["nojs"];
|
||||
dark.checked = !!configSettings["dark"];
|
||||
safe.checked = !!configSettings["safe"];
|
||||
getOnly.checked = !!configSettings["get_only"];
|
||||
newTab.checked = !!configSettings["new_tab"];
|
||||
CONFIG_STRS.forEach(function(item) {
|
||||
let configElement = document.getElementById("config-" + item.replace("_", "-"));
|
||||
configElement.value = configSettings[item] ? configSettings[item] : "";
|
||||
});
|
||||
|
||||
// Addresses the issue of incorrect URL being used behind reverse proxy
|
||||
url.value = configSettings["url"] ? configSettings["url"] : "";
|
||||
CONFIG_BOOLS.forEach(function(item) {
|
||||
let configElement = document.getElementById("config-" + item.replace("_", "-"));
|
||||
configElement.checked = !!configSettings[item];
|
||||
});
|
||||
};
|
||||
|
||||
xhrGET.send();
|
||||
|
|
|
@ -96,6 +96,12 @@
|
|||
<label for="config-safe">Safe Search: </label>
|
||||
<input type="checkbox" name="safe" id="config-safe">
|
||||
</div>
|
||||
<div class="config-div">
|
||||
<label class="tooltip" for="config-alts">Replace Social Media Links: </label>
|
||||
<input type="checkbox" name="alts" id="config-alts">
|
||||
<div><span class="info-text"> — Replaces Twitter/YouTube/Instagram links
|
||||
with Nitter/Invidious/Bibliogram links.</span></div>
|
||||
</div>
|
||||
<div class="config-div">
|
||||
<label for="config-new-tab">Open Links in New Tab: </label>
|
||||
<input type="checkbox" name="new_tab" id="config-new-tab">
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import urllib.parse as urlparse
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
SKIP_ARGS = ['ref_src', 'utm']
|
||||
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
|
||||
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
||||
LOGO_URL = GOOG_IMG + '_desk'
|
||||
BLANK_B64 = '''
|
||||

|
||||
'''
|
||||
|
||||
BLACKLIST = [
|
||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
|
||||
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
|
||||
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
|
||||
]
|
||||
|
||||
SITE_ALTS = {
|
||||
'twitter.com': 'nitter.net',
|
||||
'youtube.com': 'invidio.us',
|
||||
'instagram.com': 'bibliogram.art/u'
|
||||
}
|
||||
|
||||
|
||||
def has_ad_content(element: str):
|
||||
return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
|
||||
|
||||
|
||||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in a['href']:
|
||||
return filter_link_args(a['href'])
|
||||
|
||||
|
||||
def get_site_alt(link: str):
|
||||
for site_key in SITE_ALTS.keys():
|
||||
if site_key not in link:
|
||||
continue
|
||||
|
||||
link = link.replace(site_key, SITE_ALTS[site_key])
|
||||
break
|
||||
|
||||
return link
|
||||
|
||||
|
||||
def filter_link_args(query_link):
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
return query_link
|
||||
|
||||
|
||||
def gen_nojs(sibling):
|
||||
nojs_link = BeautifulSoup().new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + sibling['href']
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
sibling.append(nojs_link)
|
|
@ -1,5 +1,5 @@
|
|||
from app.filter import Filter, get_first_link
|
||||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
from app.request import gen_query
|
||||
from bs4 import BeautifulSoup
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
|
|
|
@ -2,11 +2,6 @@ from cryptography.fernet import Fernet
|
|||
from flask import current_app as app
|
||||
|
||||
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys']
|
||||
BLACKLIST = [
|
||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
|
||||
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
|
||||
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
|
||||
]
|
||||
|
||||
|
||||
def generate_user_keys(cookies_disabled=False) -> dict:
|
|
@ -1,5 +1,5 @@
|
|||
from app import app
|
||||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
import pytest
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from app.utils.misc import generate_user_keys, valid_user_session
|
||||
from app.utils.session_utils import generate_user_keys, valid_user_session
|
||||
|
||||
|
||||
def test_generate_user_keys():
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from bs4 import BeautifulSoup
|
||||
from app.filter import Filter
|
||||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
from datetime import datetime
|
||||
from dateutil.parser import *
|
||||
|
||||
|
|
Loading…
Reference in New Issue