diff --git a/app/__init__.py b/app/__init__.py
index 22e436d..f21d4b4 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -1,4 +1,4 @@
-from app.utils.misc import generate_user_keys
+from app.utils.session_utils import generate_user_keys
from flask import Flask
from flask_session import Session
import os
diff --git a/app/filter.py b/app/filter.py
index 1cc9f87..41a5cef 100644
--- a/app/filter.py
+++ b/app/filter.py
@@ -1,56 +1,11 @@
from app.request import VALID_PARAMS
-from app.utils.misc import BLACKLIST
-from bs4 import BeautifulSoup
+from app.utils.filter_utils import *
from bs4.element import ResultSet
from cryptography.fernet import Fernet
import re
import urllib.parse as urlparse
from urllib.parse import parse_qs
-SKIP_ARGS = ['ref_src', 'utm']
-FULL_RES_IMG = '
Full Image'
-GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
-LOGO_URL = GOOG_IMG + '_desk'
-BLANK_B64 = '''
-data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
-'''
-
-
-def get_first_link(soup):
- # Replace hrefs with only the intended destination (no "utm" type tags)
- for a in soup.find_all('a', href=True):
- # Return the first search result URL
- if 'url?q=' in a['href']:
- return filter_link_args(a['href'])
-
-
-def filter_link_args(query_link):
- parsed_link = urlparse.urlparse(query_link)
- link_args = parse_qs(parsed_link.query)
- safe_args = {}
-
- if len(link_args) == 0 and len(parsed_link) > 0:
- return query_link
-
- for arg in link_args.keys():
- if arg in SKIP_ARGS:
- continue
-
- safe_args[arg] = link_args[arg]
-
- # Remove original link query and replace with filtered args
- query_link = query_link.replace(parsed_link.query, '')
- if len(safe_args) > 0:
- query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
- else:
- query_link = query_link.replace('?', '')
-
- return query_link
-
-
-def has_ad_content(element: str):
- return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
-
class Filter:
def __init__(self, user_keys: dict, mobile=False, config=None):
@@ -61,6 +16,7 @@ class Filter:
self.dark = config['dark'] if 'dark' in config else False
self.nojs = config['nojs'] if 'nojs' in config else False
self.new_tab = config['new_tab'] if 'new_tab' in config else False
+ self.alt_redirect = config['alts'] if 'alts' in config else False
self.mobile = mobile
self.user_keys = user_keys
self.main_divs = ResultSet('')
@@ -213,8 +169,12 @@ class Filter:
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
if query_link.startswith('/'):
+ # Internal google links (i.e. mail, maps, etc) should still be forwarded to Google
link['href'] = 'https://google.com' + query_link
elif '/search?q=' in href:
+ # "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes
+ if 'li:1' in href:
+ query_link = '"' + query_link + '"'
new_search = '/search?q=' + self.encrypt_path(query_link)
query_params = parse_qs(urlparse.urlparse(href).query)
@@ -232,11 +192,13 @@ class Filter:
else:
link['href'] = href
+ # Replace link location if "alts" config is enabled
+ if self.alt_redirect:
+ # Search and replace all link descriptions with alternative location
+ link['href'] = get_site_alt(link['href'])
+ link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys())))
+ if len(link_desc) == 0:
+ return
-def gen_nojs(sibling):
- nojs_link = BeautifulSoup().new_tag('a')
- nojs_link['href'] = '/window?location=' + sibling['href']
- nojs_link['style'] = 'display:block;width:100%;'
- nojs_link.string = 'NoJS Link: ' + nojs_link['href']
- sibling.append(BeautifulSoup('
diff --git a/app/utils/filter_utils.py b/app/utils/filter_utils.py
new file mode 100644
index 0000000..ed05d76
--- /dev/null
+++ b/app/utils/filter_utils.py
@@ -0,0 +1,79 @@
+from bs4 import BeautifulSoup
+import urllib.parse as urlparse
+from urllib.parse import parse_qs
+
+SKIP_ARGS = ['ref_src', 'utm']
+FULL_RES_IMG = '
Full Image'
+GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
+LOGO_URL = GOOG_IMG + '_desk'
+BLANK_B64 = '''
+data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
+'''
+
+BLACKLIST = [
+ 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
+ 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
+ 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
+]
+
+SITE_ALTS = {
+ 'twitter.com': 'nitter.net',
+ 'youtube.com': 'invidio.us',
+ 'instagram.com': 'bibliogram.art/u'
+}
+
+
+def has_ad_content(element: str):
+ return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
+
+
+def get_first_link(soup):
+ # Replace hrefs with only the intended destination (no "utm" type tags)
+ for a in soup.find_all('a', href=True):
+ # Return the first search result URL
+ if 'url?q=' in a['href']:
+ return filter_link_args(a['href'])
+
+
+def get_site_alt(link: str):
+ for site_key in SITE_ALTS.keys():
+ if site_key not in link:
+ continue
+
+ link = link.replace(site_key, SITE_ALTS[site_key])
+ break
+
+ return link
+
+
+def filter_link_args(query_link):
+ parsed_link = urlparse.urlparse(query_link)
+ link_args = parse_qs(parsed_link.query)
+ safe_args = {}
+
+ if len(link_args) == 0 and len(parsed_link) > 0:
+ return query_link
+
+ for arg in link_args.keys():
+ if arg in SKIP_ARGS:
+ continue
+
+ safe_args[arg] = link_args[arg]
+
+ # Remove original link query and replace with filtered args
+ query_link = query_link.replace(parsed_link.query, '')
+ if len(safe_args) > 0:
+ query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
+ else:
+ query_link = query_link.replace('?', '')
+
+ return query_link
+
+
+def gen_nojs(sibling):
+ nojs_link = BeautifulSoup().new_tag('a')
+ nojs_link['href'] = '/window?location=' + sibling['href']
+ nojs_link['style'] = 'display:block;width:100%;'
+ nojs_link.string = 'NoJS Link: ' + nojs_link['href']
+ sibling.append(BeautifulSoup('
', 'html.parser'))
+ sibling.append(nojs_link)
\ No newline at end of file
diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py
index 40f8a90..2a649b4 100644
--- a/app/utils/routing_utils.py
+++ b/app/utils/routing_utils.py
@@ -1,5 +1,5 @@
from app.filter import Filter, get_first_link
-from app.utils.misc import generate_user_keys
+from app.utils.session_utils import generate_user_keys
from app.request import gen_query
from bs4 import BeautifulSoup
from cryptography.fernet import Fernet, InvalidToken
diff --git a/app/utils/misc.py b/app/utils/session_utils.py
similarity index 62%
rename from app/utils/misc.py
rename to app/utils/session_utils.py
index b87941d..f959abe 100644
--- a/app/utils/misc.py
+++ b/app/utils/session_utils.py
@@ -2,11 +2,6 @@ from cryptography.fernet import Fernet
from flask import current_app as app
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys']
-BLACKLIST = [
- 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
- 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
- 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
-]
def generate_user_keys(cookies_disabled=False) -> dict:
diff --git a/test/conftest.py b/test/conftest.py
index 63aec3e..7a15f00 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,5 +1,5 @@
from app import app
-from app.utils.misc import generate_user_keys
+from app.utils.session_utils import generate_user_keys
import pytest
diff --git a/test/test_misc.py b/test/test_misc.py
index 8eb1d78..92fcadb 100644
--- a/test/test_misc.py
+++ b/test/test_misc.py
@@ -1,4 +1,4 @@
-from app.utils.misc import generate_user_keys, valid_user_session
+from app.utils.session_utils import generate_user_keys, valid_user_session
def test_generate_user_keys():
diff --git a/test/test_results.py b/test/test_results.py
index 463a355..a7aa771 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup
from app.filter import Filter
-from app.utils.misc import generate_user_keys
+from app.utils.session_utils import generate_user_keys
from datetime import datetime
from dateutil.parser import *