Added POST search, encrypted query strings, refactoring
The implementation of POST search support comes with a few benefits. The most apparent is the avoidance of search queries appearing in web server logs -- instead of the prior GET approach (i.e. /search?q=my+search+query), using POST requests with the query stored in the request body creates logs that simply appear as "/search". Since a lot of relative links are generated in the results page, I came up with a way to generate a unique key at run time that is used to encrypt any query strings before sending to the user. This benefits both regular text queries as well as fetching of image links and means that web logs will only show an encrypted string where a link or query string might slip through. Unfortunately, GET search requests still need to be supported, as it doesn't seem that Firefox (on iOS) supports loading search engines by their opensearch.xml file, but instead relies on manual entry of a search query string. Once this is updated, I'll probably remove GET request search support.main
parent
74b0d30306
commit
0c0ebb8917
|
@ -102,10 +102,11 @@ To filter by a range of time, append ":past <time>" to the end of your search, w
|
||||||
Update browser settings:
|
Update browser settings:
|
||||||
- Firefox (Desktop)
|
- Firefox (Desktop)
|
||||||
- Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Shoogle" from the list.
|
- Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Shoogle" from the list.
|
||||||
- Firefox (Mobile)
|
- Firefox (iOS)
|
||||||
- In the mobile app Settings page, tap "Search" within the "General" section. There should be an option titled "Add Search Engine" to select. It should prompt you to enter a title and search query url - use the following elements to fill out the form:
|
- In the mobile app Settings page, tap "Search" within the "General" section. There should be an option titled "Add Search Engine" to select. It should prompt you to enter a title and search query url - use the following elements to fill out the form:
|
||||||
- Title: "Shoogle"
|
- Title: "Shoogle"
|
||||||
- URL: "https://\<your shoogle url\>/search?q=%s"
|
- URL: "https://\<your shoogle url\>/search?q=%s"
|
||||||
|
- Others (TODO)
|
||||||
|
|
||||||
### Customizing and Configuration
|
### Customizing and Configuration
|
||||||
Shoogle currently allows a few minor configuration settings, accessible from the home page:
|
Shoogle currently allows a few minor configuration settings, accessible from the home page:
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
import os
|
import os
|
||||||
|
|
||||||
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
||||||
|
app.secret_key = Fernet.generate_key()
|
||||||
|
|
||||||
from app import routes
|
from app import routes
|
||||||
|
|
221
app/filter.py
221
app/filter.py
|
@ -1,4 +1,6 @@
|
||||||
|
from app.request import VALID_PARAMS
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
import re
|
import re
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
from urllib.parse import parse_qs
|
from urllib.parse import parse_qs
|
||||||
|
@ -7,7 +9,7 @@ SKIP_ARGS = ['ref_src', 'utm']
|
||||||
|
|
||||||
|
|
||||||
class Filter:
|
class Filter:
|
||||||
def __init__(self, mobile=False, config=None):
|
def __init__(self, mobile=False, config=None, secret_key=''):
|
||||||
if config is None:
|
if config is None:
|
||||||
config = {}
|
config = {}
|
||||||
|
|
||||||
|
@ -15,6 +17,7 @@ class Filter:
|
||||||
self.dark = config['dark'] if 'dark' in config else False
|
self.dark = config['dark'] if 'dark' in config else False
|
||||||
self.nojs = config['nojs'] if 'nojs' in config else False
|
self.nojs = config['nojs'] if 'nojs' in config else False
|
||||||
self.mobile = mobile
|
self.mobile = mobile
|
||||||
|
self.secret_key = secret_key
|
||||||
|
|
||||||
def __getitem__(self, name):
|
def __getitem__(self, name):
|
||||||
return getattr(self, name)
|
return getattr(self, name)
|
||||||
|
@ -30,104 +33,124 @@ class Filter:
|
||||||
return page
|
return page
|
||||||
|
|
||||||
def clean(self, soup):
|
def clean(self, soup):
|
||||||
def remove_ads():
|
self.remove_ads(soup)
|
||||||
main_divs = soup.find('div', {'id': 'main'})
|
self.sync_images(soup)
|
||||||
if main_divs is None:
|
self.update_styling(soup)
|
||||||
return
|
self.update_links(soup)
|
||||||
|
|
||||||
result_divs = main_divs.findAll('div', recursive=False)
|
input_form = soup.find('form')
|
||||||
|
input_form['method'] = 'POST'
|
||||||
|
|
||||||
# Only ads/sponsored content use classes in the list of result divs
|
|
||||||
ad_divs = [ad_div for ad_div in result_divs if 'class' in ad_div.attrs]
|
|
||||||
for div in ad_divs:
|
|
||||||
div.decompose()
|
|
||||||
|
|
||||||
def sync_images():
|
|
||||||
for img in soup.find_all('img'):
|
|
||||||
if img['src'].startswith('//'):
|
|
||||||
img['src'] = 'https:' + img['src']
|
|
||||||
|
|
||||||
img['src'] = '/tmp?image_url=' + img['src']
|
|
||||||
|
|
||||||
def update_styling():
|
|
||||||
# Remove unnecessary button(s)
|
|
||||||
for button in soup.find_all('button'):
|
|
||||||
button.decompose()
|
|
||||||
|
|
||||||
# Remove svg logos
|
|
||||||
for svg in soup.find_all('svg'):
|
|
||||||
svg.decompose()
|
|
||||||
|
|
||||||
# Update logo
|
|
||||||
logo = soup.find('a', {'class': 'l'})
|
|
||||||
if logo and self.mobile:
|
|
||||||
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \
|
|
||||||
'font-size:18px; '
|
|
||||||
|
|
||||||
# Fix search bar length on mobile
|
|
||||||
try:
|
|
||||||
search_bar = soup.find('header').find('form').find('div')
|
|
||||||
search_bar['style'] = 'width: 100%;'
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Set up dark mode if active
|
|
||||||
if self.dark:
|
|
||||||
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
|
|
||||||
for input_element in soup.findAll('input'):
|
|
||||||
input_element['style'] = 'color:#fff;'
|
|
||||||
|
|
||||||
def update_links():
|
|
||||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
|
||||||
for a in soup.find_all('a', href=True):
|
|
||||||
href = a['href']
|
|
||||||
if '/advanced_search' in href:
|
|
||||||
a.decompose()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if 'url?q=' in href:
|
|
||||||
# Strip unneeded arguments
|
|
||||||
result_link = urlparse.urlparse(href)
|
|
||||||
result_link = parse_qs(result_link.query)['q'][0]
|
|
||||||
|
|
||||||
parsed_link = urlparse.urlparse(result_link)
|
|
||||||
link_args = parse_qs(parsed_link.query)
|
|
||||||
safe_args = {}
|
|
||||||
|
|
||||||
for arg in link_args.keys():
|
|
||||||
if arg in SKIP_ARGS:
|
|
||||||
continue
|
|
||||||
|
|
||||||
safe_args[arg] = link_args[arg]
|
|
||||||
|
|
||||||
# Remove original link query and replace with filtered args
|
|
||||||
result_link = result_link.replace(parsed_link.query, '')
|
|
||||||
if len(safe_args) > 1:
|
|
||||||
result_link = result_link + urlparse.urlencode(safe_args)
|
|
||||||
else:
|
|
||||||
result_link = result_link.replace('?', '')
|
|
||||||
|
|
||||||
a['href'] = result_link
|
|
||||||
|
|
||||||
# Add no-js option
|
|
||||||
if self.nojs:
|
|
||||||
nojs_link = soup.new_tag('a')
|
|
||||||
nojs_link['href'] = '/window?location=' + result_link
|
|
||||||
nojs_link['style'] = 'display:block;width:100%;'
|
|
||||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
|
||||||
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
|
||||||
a.append(nojs_link)
|
|
||||||
|
|
||||||
# Ensure no extra scripts passed through
|
|
||||||
try:
|
|
||||||
for script in soup('script'):
|
|
||||||
script.decompose()
|
|
||||||
soup.find('div', id='sfooter').decompose()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
remove_ads()
|
|
||||||
sync_images()
|
|
||||||
update_styling()
|
|
||||||
update_links()
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def remove_ads(self, soup):
|
||||||
|
main_divs = soup.find('div', {'id': 'main'})
|
||||||
|
if main_divs is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
result_divs = main_divs.findAll('div', recursive=False)
|
||||||
|
|
||||||
|
# Only ads/sponsored content use classes in the list of result divs
|
||||||
|
ad_divs = [ad_div for ad_div in result_divs if 'class' in ad_div.attrs]
|
||||||
|
for div in ad_divs:
|
||||||
|
div.decompose()
|
||||||
|
|
||||||
|
def sync_images(self, soup):
|
||||||
|
for img in [_ for _ in soup.find_all('img') if 'src' in _]:
|
||||||
|
img_src = img['src']
|
||||||
|
if img_src.startswith('//'):
|
||||||
|
img_src = 'https:' + img_src
|
||||||
|
|
||||||
|
enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
|
||||||
|
img['src'] = '/tmp?image_url=' + enc_src.decode()
|
||||||
|
|
||||||
|
def update_styling(self, soup):
|
||||||
|
# Remove unnecessary button(s)
|
||||||
|
for button in soup.find_all('button'):
|
||||||
|
button.decompose()
|
||||||
|
|
||||||
|
# Remove svg logos
|
||||||
|
for svg in soup.find_all('svg'):
|
||||||
|
svg.decompose()
|
||||||
|
|
||||||
|
# Update logo
|
||||||
|
logo = soup.find('a', {'class': 'l'})
|
||||||
|
if logo and self.mobile:
|
||||||
|
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \
|
||||||
|
'font-size:18px; '
|
||||||
|
|
||||||
|
# Fix search bar length on mobile
|
||||||
|
try:
|
||||||
|
search_bar = soup.find('header').find('form').find('div')
|
||||||
|
search_bar['style'] = 'width: 100%;'
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Set up dark mode if active
|
||||||
|
if self.dark:
|
||||||
|
soup.find('html')['style'] = 'scrollbar-color: #333 #111;'
|
||||||
|
for input_element in soup.findAll('input'):
|
||||||
|
input_element['style'] = 'color:#fff;'
|
||||||
|
|
||||||
|
def update_links(self, soup):
|
||||||
|
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||||
|
for a in soup.find_all('a', href=True):
|
||||||
|
href = a['href']
|
||||||
|
if '/advanced_search' in href:
|
||||||
|
a.decompose()
|
||||||
|
continue
|
||||||
|
|
||||||
|
if '?q=' not in href:
|
||||||
|
continue
|
||||||
|
|
||||||
|
result_link = urlparse.urlparse(href)
|
||||||
|
query_link = parse_qs(result_link.query)['q'][0]
|
||||||
|
|
||||||
|
if '/search?q=' in href:
|
||||||
|
enc_result = Fernet(self.secret_key).encrypt(query_link.encode())
|
||||||
|
new_search = '/search?q=' + enc_result.decode()
|
||||||
|
|
||||||
|
for param in VALID_PARAMS:
|
||||||
|
if param in parse_qs(result_link.query):
|
||||||
|
new_search += '&' + param + '=' + parse_qs(result_link.query)[param][0]
|
||||||
|
a['href'] = new_search
|
||||||
|
continue
|
||||||
|
|
||||||
|
if 'url?q=' in href:
|
||||||
|
# Strip unneeded arguments
|
||||||
|
parsed_link = urlparse.urlparse(query_link)
|
||||||
|
link_args = parse_qs(parsed_link.query)
|
||||||
|
safe_args = {}
|
||||||
|
|
||||||
|
for arg in link_args.keys():
|
||||||
|
if arg in SKIP_ARGS:
|
||||||
|
continue
|
||||||
|
|
||||||
|
safe_args[arg] = link_args[arg]
|
||||||
|
|
||||||
|
# Remove original link query and replace with filtered args
|
||||||
|
query_link = query_link.replace(parsed_link.query, '')
|
||||||
|
if len(safe_args) > 1:
|
||||||
|
query_link = query_link + urlparse.urlencode(safe_args)
|
||||||
|
else:
|
||||||
|
query_link = query_link.replace('?', '')
|
||||||
|
|
||||||
|
a['href'] = query_link
|
||||||
|
|
||||||
|
# Add no-js option
|
||||||
|
if self.nojs:
|
||||||
|
nojs_link = soup.new_tag('a')
|
||||||
|
nojs_link['href'] = '/window?location=' + query_link
|
||||||
|
nojs_link['style'] = 'display:block;width:100%;'
|
||||||
|
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||||
|
a.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||||
|
a.append(nojs_link)
|
||||||
|
|
||||||
|
# Ensure no extra scripts passed through
|
||||||
|
try:
|
||||||
|
for script in soup('script'):
|
||||||
|
script.decompose()
|
||||||
|
soup.find('div', id='sfooter').decompose()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,9 @@ SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
||||||
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
||||||
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
||||||
|
|
||||||
|
# Valid query params
|
||||||
|
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
|
||||||
|
|
||||||
|
|
||||||
def gen_user_agent(normal_ua):
|
def gen_user_agent(normal_ua):
|
||||||
is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
||||||
|
|
|
@ -2,6 +2,7 @@ from app import app
|
||||||
from app.filter import Filter
|
from app.filter import Filter
|
||||||
from app.request import Request, gen_query
|
from app.request import Request, gen_query
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from cryptography.fernet import Fernet, InvalidToken
|
||||||
from flask import g, make_response, request, redirect, render_template, send_file
|
from flask import g, make_response, request, redirect, render_template, send_file
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
@ -19,11 +20,6 @@ def before_request_func():
|
||||||
g.user_request = Request(request.headers.get('User-Agent'))
|
g.user_request = Request(request.headers.get('User-Agent'))
|
||||||
|
|
||||||
|
|
||||||
# @app.after_request
|
|
||||||
# def after_request(response):
|
|
||||||
# return response
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/', methods=['GET'])
|
@app.route('/', methods=['GET'])
|
||||||
def index():
|
def index():
|
||||||
bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff'
|
bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff'
|
||||||
|
@ -42,16 +38,25 @@ def opensearch():
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
@app.route('/search', methods=['GET'])
|
@app.route('/search', methods=['GET', 'POST'])
|
||||||
def search():
|
def search():
|
||||||
q = request.args.get('q')
|
q = None
|
||||||
|
if request.method == 'GET':
|
||||||
|
q = request.args.get('q')
|
||||||
|
try:
|
||||||
|
q = Fernet(app.secret_key).decrypt(q.encode()).decode()
|
||||||
|
except InvalidToken:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
q = request.form['q']
|
||||||
|
|
||||||
if q is None or len(q) <= 0:
|
if q is None or len(q) <= 0:
|
||||||
return render_template('error.html')
|
return render_template('error.html')
|
||||||
|
|
||||||
user_agent = request.headers.get('User-Agent')
|
user_agent = request.headers.get('User-Agent')
|
||||||
mobile = 'Android' in user_agent or 'iPhone' in user_agent
|
mobile = 'Android' in user_agent or 'iPhone' in user_agent
|
||||||
|
|
||||||
content_filter = Filter(mobile, user_config)
|
content_filter = Filter(mobile, user_config, secret_key=app.secret_key)
|
||||||
full_query = gen_query(q, request.args, content_filter.near)
|
full_query = gen_query(q, request.args, content_filter.near)
|
||||||
get_body = g.user_request.send(query=full_query)
|
get_body = g.user_request.send(query=full_query)
|
||||||
|
|
||||||
|
@ -95,7 +100,9 @@ def imgres():
|
||||||
|
|
||||||
@app.route('/tmp')
|
@app.route('/tmp')
|
||||||
def tmp():
|
def tmp():
|
||||||
file_data = g.user_request.send(base_url=request.args.get('image_url'), return_bytes=True)
|
cipher_suite = Fernet(app.secret_key)
|
||||||
|
img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode()
|
||||||
|
file_data = g.user_request.send(base_url=img_url, return_bytes=True)
|
||||||
tmp_mem = io.BytesIO()
|
tmp_mem = io.BytesIO()
|
||||||
tmp_mem.write(file_data)
|
tmp_mem.write(file_data)
|
||||||
tmp_mem.seek(0)
|
tmp_mem.seek(0)
|
||||||
|
|
|
@ -57,6 +57,7 @@ body {
|
||||||
margin: auto;
|
margin: auto;
|
||||||
border-radius: 0 0 10px 10px;
|
border-radius: 0 0 10px 10px;
|
||||||
max-width: 600px;
|
max-width: 600px;
|
||||||
|
-webkit-appearance: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.config-div {
|
.config-div {
|
||||||
|
|
|
@ -18,10 +18,6 @@ document.addEventListener("DOMContentLoaded", function() {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
searchBtn.onclick = function() {
|
|
||||||
window.location.href = '/search?q=' + encodeURI(searchBar.value);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Setup shoogle config
|
// Setup shoogle config
|
||||||
const collapsible = document.getElementById("config-collapsible");
|
const collapsible = document.getElementById("config-collapsible");
|
||||||
collapsible.addEventListener("click", function() {
|
collapsible.addEventListener("click", function() {
|
||||||
|
|
|
@ -25,10 +25,12 @@
|
||||||
<body id="main" style="display: none; background-color: {{ bg }}">
|
<body id="main" style="display: none; background-color: {{ bg }}">
|
||||||
<div class="search-container">
|
<div class="search-container">
|
||||||
<img class="logo" src="/static/img/logo.png">
|
<img class="logo" src="/static/img/logo.png">
|
||||||
<div class="search-fields">
|
<form action="/search" method="post">
|
||||||
<input type="text" id="search-bar">
|
<div class="search-fields">
|
||||||
<button type="submit" id="search-submit">Search</button>
|
<input type="text" name="q" id="search-bar">
|
||||||
</div>
|
<input type="submit" id="search-submit" value="Submit">
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
<br/>
|
<br/>
|
||||||
<button id="config-collapsible" class="collapsible">Configuration</button>
|
<button id="config-collapsible" class="collapsible">Configuration</button>
|
||||||
<div class="content">
|
<div class="content">
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
|
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
|
||||||
xmlns:moz="http://www.mozilla.org/2006/browser/search/">
|
xmlns:moz="http://www.mozilla.org/2006/browser/search/">
|
||||||
<ShortName>Shoogle</ShortName>
|
<ShortName>ShoogleTEST</ShortName>
|
||||||
<Description>Shoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads</Description>
|
<Description>Shoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads</Description>
|
||||||
<InputEncoding>UTF-8</InputEncoding>
|
<InputEncoding>UTF-8</InputEncoding>
|
||||||
<Image width="32" height="32" type="image/x-icon">/static/img/favicon/favicon-32x32.png</Image>
|
<Image width="32" height="32" type="image/x-icon">/static/img/favicon/favicon-32x32.png</Image>
|
||||||
<Url type="text/html" template="{{ shoogle_url }}/search">
|
<Url type="text/html" method="post" template="{{ shoogle_url }}/search">
|
||||||
<Param name="q" value="{searchTerms}"/>
|
<Param name="q" value="{searchTerms}"/>
|
||||||
</Url>
|
</Url>
|
||||||
<Url type="application/x-suggestions+json" template="{{ shoogle_url }}/search"/>
|
<Url type="application/x-suggestions+json" template="{{ shoogle_url }}/search"/>
|
||||||
|
|
Loading…
Reference in New Issue