Added POST search, encrypted query strings, refactoring

The implementation of POST search support comes with a few benefits. The
most apparent is the avoidance of search queries appearing in web server
logs -- instead of the prior GET approach (i.e.
/search?q=my+search+query), using POST requests with the query stored in
the request body creates logs that simply appear as "/search".

Since a lot of relative links are generated in the results page, I came
up with a way to generate a unique key at run time that is used to
encrypt any query strings before sending to the user. This benefits both
regular text queries as well as fetching of image links and means that
web logs will only show an encrypted string where a link or query
string might slip through.

Unfortunately, GET search requests still need to be supported, as it
doesn't seem that Firefox (on iOS) supports loading search engines by
their opensearch.xml file, but instead relies on manual entry of a
search query string. Once this is updated, I'll probably remove GET
request search support.
main
Ben Busby 2020-04-28 18:19:34 -06:00
parent 74b0d30306
commit 0c0ebb8917
9 changed files with 156 additions and 121 deletions

View File

@ -102,10 +102,11 @@ To filter by a range of time, append ":past <time>" to the end of your search, w
Update browser settings: Update browser settings:
- Firefox (Desktop) - Firefox (Desktop)
- Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Shoogle" from the list. - Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Shoogle" from the list.
- Firefox (Mobile) - Firefox (iOS)
- In the mobile app Settings page, tap "Search" within the "General" section. There should be an option titled "Add Search Engine" to select. It should prompt you to enter a title and search query url - use the following elements to fill out the form: - In the mobile app Settings page, tap "Search" within the "General" section. There should be an option titled "Add Search Engine" to select. It should prompt you to enter a title and search query url - use the following elements to fill out the form:
- Title: "Shoogle" - Title: "Shoogle"
- URL: "https://\<your shoogle url\>/search?q=%s" - URL: "https://\<your shoogle url\>/search?q=%s"
- Others (TODO)
### Customizing and Configuration ### Customizing and Configuration
Shoogle currently allows a few minor configuration settings, accessible from the home page: Shoogle currently allows a few minor configuration settings, accessible from the home page:

View File

@ -1,6 +1,8 @@
from cryptography.fernet import Fernet
from flask import Flask from flask import Flask
import os import os
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static') app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
app.secret_key = Fernet.generate_key()
from app import routes from app import routes

View File

@ -1,4 +1,6 @@
from app.request import VALID_PARAMS
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cryptography.fernet import Fernet
import re import re
import urllib.parse as urlparse import urllib.parse as urlparse
from urllib.parse import parse_qs from urllib.parse import parse_qs
@ -7,7 +9,7 @@ SKIP_ARGS = ['ref_src', 'utm']
class Filter: class Filter:
def __init__(self, mobile=False, config=None): def __init__(self, mobile=False, config=None, secret_key=''):
if config is None: if config is None:
config = {} config = {}
@ -15,6 +17,7 @@ class Filter:
self.dark = config['dark'] if 'dark' in config else False self.dark = config['dark'] if 'dark' in config else False
self.nojs = config['nojs'] if 'nojs' in config else False self.nojs = config['nojs'] if 'nojs' in config else False
self.mobile = mobile self.mobile = mobile
self.secret_key = secret_key
def __getitem__(self, name): def __getitem__(self, name):
return getattr(self, name) return getattr(self, name)
@ -30,7 +33,17 @@ class Filter:
return page return page
def clean(self, soup): def clean(self, soup):
def remove_ads(): self.remove_ads(soup)
self.sync_images(soup)
self.update_styling(soup)
self.update_links(soup)
input_form = soup.find('form')
input_form['method'] = 'POST'
return soup
def remove_ads(self, soup):
main_divs = soup.find('div', {'id': 'main'}) main_divs = soup.find('div', {'id': 'main'})
if main_divs is None: if main_divs is None:
return return
@ -42,14 +55,16 @@ class Filter:
for div in ad_divs: for div in ad_divs:
div.decompose() div.decompose()
def sync_images(): def sync_images(self, soup):
for img in soup.find_all('img'): for img in [_ for _ in soup.find_all('img') if 'src' in _]:
if img['src'].startswith('//'): img_src = img['src']
img['src'] = 'https:' + img['src'] if img_src.startswith('//'):
img_src = 'https:' + img_src
img['src'] = '/tmp?image_url=' + img['src'] enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
img['src'] = '/tmp?image_url=' + enc_src.decode()
def update_styling(): def update_styling(self, soup):
# Remove unnecessary button(s) # Remove unnecessary button(s)
for button in soup.find_all('button'): for button in soup.find_all('button'):
button.decompose() button.decompose()
@ -77,7 +92,7 @@ class Filter:
for input_element in soup.findAll('input'): for input_element in soup.findAll('input'):
input_element['style'] = 'color:#fff;' input_element['style'] = 'color:#fff;'
def update_links(): def update_links(self, soup):
# Replace hrefs with only the intended destination (no "utm" type tags) # Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True): for a in soup.find_all('a', href=True):
href = a['href'] href = a['href']
@ -85,12 +100,25 @@ class Filter:
a.decompose() a.decompose()
continue continue
if '?q=' not in href:
continue
result_link = urlparse.urlparse(href)
query_link = parse_qs(result_link.query)['q'][0]
if '/search?q=' in href:
enc_result = Fernet(self.secret_key).encrypt(query_link.encode())
new_search = '/search?q=' + enc_result.decode()
for param in VALID_PARAMS:
if param in parse_qs(result_link.query):
new_search += '&' + param + '=' + parse_qs(result_link.query)[param][0]
a['href'] = new_search
continue
if 'url?q=' in href: if 'url?q=' in href:
# Strip unneeded arguments # Strip unneeded arguments
result_link = urlparse.urlparse(href) parsed_link = urlparse.urlparse(query_link)
result_link = parse_qs(result_link.query)['q'][0]
parsed_link = urlparse.urlparse(result_link)
link_args = parse_qs(parsed_link.query) link_args = parse_qs(parsed_link.query)
safe_args = {} safe_args = {}
@ -101,18 +129,18 @@ class Filter:
safe_args[arg] = link_args[arg] safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args # Remove original link query and replace with filtered args
result_link = result_link.replace(parsed_link.query, '') query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 1: if len(safe_args) > 1:
result_link = result_link + urlparse.urlencode(safe_args) query_link = query_link + urlparse.urlencode(safe_args)
else: else:
result_link = result_link.replace('?', '') query_link = query_link.replace('?', '')
a['href'] = result_link a['href'] = query_link
# Add no-js option # Add no-js option
if self.nojs: if self.nojs:
nojs_link = soup.new_tag('a') nojs_link = soup.new_tag('a')
nojs_link['href'] = '/window?location=' + result_link nojs_link['href'] = '/window?location=' + query_link
nojs_link['style'] = 'display:block;width:100%;' nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href'] nojs_link.string = 'NoJS Link: ' + nojs_link['href']
a.append(BeautifulSoup('<br><hr><br>', 'html.parser')) a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
@ -126,8 +154,3 @@ class Filter:
except Exception: except Exception:
pass pass
remove_ads()
sync_images()
update_styling()
update_links()
return soup

View File

@ -9,6 +9,9 @@ SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
# Valid query params
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
def gen_user_agent(normal_ua): def gen_user_agent(normal_ua):
is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua

View File

@ -2,6 +2,7 @@ from app import app
from app.filter import Filter from app.filter import Filter
from app.request import Request, gen_query from app.request import Request, gen_query
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cryptography.fernet import Fernet, InvalidToken
from flask import g, make_response, request, redirect, render_template, send_file from flask import g, make_response, request, redirect, render_template, send_file
import io import io
import json import json
@ -19,11 +20,6 @@ def before_request_func():
g.user_request = Request(request.headers.get('User-Agent')) g.user_request = Request(request.headers.get('User-Agent'))
# @app.after_request
# def after_request(response):
# return response
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
def index(): def index():
bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff' bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff'
@ -42,16 +38,25 @@ def opensearch():
return response return response
@app.route('/search', methods=['GET']) @app.route('/search', methods=['GET', 'POST'])
def search(): def search():
q = None
if request.method == 'GET':
q = request.args.get('q') q = request.args.get('q')
try:
q = Fernet(app.secret_key).decrypt(q.encode()).decode()
except InvalidToken:
pass
else:
q = request.form['q']
if q is None or len(q) <= 0: if q is None or len(q) <= 0:
return render_template('error.html') return render_template('error.html')
user_agent = request.headers.get('User-Agent') user_agent = request.headers.get('User-Agent')
mobile = 'Android' in user_agent or 'iPhone' in user_agent mobile = 'Android' in user_agent or 'iPhone' in user_agent
content_filter = Filter(mobile, user_config) content_filter = Filter(mobile, user_config, secret_key=app.secret_key)
full_query = gen_query(q, request.args, content_filter.near) full_query = gen_query(q, request.args, content_filter.near)
get_body = g.user_request.send(query=full_query) get_body = g.user_request.send(query=full_query)
@ -95,7 +100,9 @@ def imgres():
@app.route('/tmp') @app.route('/tmp')
def tmp(): def tmp():
file_data = g.user_request.send(base_url=request.args.get('image_url'), return_bytes=True) cipher_suite = Fernet(app.secret_key)
img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode()
file_data = g.user_request.send(base_url=img_url, return_bytes=True)
tmp_mem = io.BytesIO() tmp_mem = io.BytesIO()
tmp_mem.write(file_data) tmp_mem.write(file_data)
tmp_mem.seek(0) tmp_mem.seek(0)

View File

@ -57,6 +57,7 @@ body {
margin: auto; margin: auto;
border-radius: 0 0 10px 10px; border-radius: 0 0 10px 10px;
max-width: 600px; max-width: 600px;
-webkit-appearance: none;
} }
.config-div { .config-div {

View File

@ -18,10 +18,6 @@ document.addEventListener("DOMContentLoaded", function() {
} }
}); });
searchBtn.onclick = function() {
window.location.href = '/search?q=' + encodeURI(searchBar.value);
};
// Setup shoogle config // Setup shoogle config
const collapsible = document.getElementById("config-collapsible"); const collapsible = document.getElementById("config-collapsible");
collapsible.addEventListener("click", function() { collapsible.addEventListener("click", function() {

View File

@ -25,10 +25,12 @@
<body id="main" style="display: none; background-color: {{ bg }}"> <body id="main" style="display: none; background-color: {{ bg }}">
<div class="search-container"> <div class="search-container">
<img class="logo" src="/static/img/logo.png"> <img class="logo" src="/static/img/logo.png">
<form action="/search" method="post">
<div class="search-fields"> <div class="search-fields">
<input type="text" id="search-bar"> <input type="text" name="q" id="search-bar">
<button type="submit" id="search-submit">Search</button> <input type="submit" id="search-submit" value="Submit">
</div> </div>
</form>
<br/> <br/>
<button id="config-collapsible" class="collapsible">Configuration</button> <button id="config-collapsible" class="collapsible">Configuration</button>
<div class="content"> <div class="content">

View File

@ -1,10 +1,10 @@
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
xmlns:moz="http://www.mozilla.org/2006/browser/search/"> xmlns:moz="http://www.mozilla.org/2006/browser/search/">
<ShortName>Shoogle</ShortName> <ShortName>ShoogleTEST</ShortName>
<Description>Shoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads</Description> <Description>Shoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads</Description>
<InputEncoding>UTF-8</InputEncoding> <InputEncoding>UTF-8</InputEncoding>
<Image width="32" height="32" type="image/x-icon">/static/img/favicon/favicon-32x32.png</Image> <Image width="32" height="32" type="image/x-icon">/static/img/favicon/favicon-32x32.png</Image>
<Url type="text/html" template="{{ shoogle_url }}/search"> <Url type="text/html" method="post" template="{{ shoogle_url }}/search">
<Param name="q" value="{searchTerms}"/> <Param name="q" value="{searchTerms}"/>
</Url> </Url>
<Url type="application/x-suggestions+json" template="{{ shoogle_url }}/search"/> <Url type="application/x-suggestions+json" template="{{ shoogle_url }}/search"/>