Block websites from search results via user config (#304)

* Block websites in search results via user config

Adds a new config field "Block" to specify a comma separated list of
websites to block in search results. This is applied for all searches.

* Add test for blocking sites from search results

* Document WHOOGLE_CONFIG_BLOCK usage

* Strip '-site:' filters from query in header template

The 'behind the scenes' site filter applied for blocked sites was
appearing in the query field when navigating between search categories
(all -> images -> news, etc). This prevents the filter from appearing in
all except "images", since the image category uses a separate header.
This should eventually be addressed when the image page can begin using
the standard whoogle header, but until then, the filter will still
appear for image searches.
main
Ben Busby 2021-05-07 11:45:53 -04:00 committed by GitHub
parent a7bf9728e3
commit c8da53d4b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 58 additions and 5 deletions

View File

@ -267,6 +267,7 @@ These environment variables allow setting default config values, but can be over
| WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country | | WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country |
| WHOOGLE_CONFIG_LANGUAGE | Set interface language | | WHOOGLE_CONFIG_LANGUAGE | Set interface language |
| WHOOGLE_CONFIG_SEARCH_LANGUAGE | Set search result language | | WHOOGLE_CONFIG_SEARCH_LANGUAGE | Set search result language |
| WHOOGLE_CONFIG_BLOCK | Block websites from search results (use comma-separated list) |
| WHOOGLE_CONFIG_DARK | Enable dark theme | | WHOOGLE_CONFIG_DARK | Enable dark theme |
| WHOOGLE_CONFIG_SAFE | Enable safe searches | | WHOOGLE_CONFIG_SAFE | Enable safe searches |
| WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) | | WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) |

View File

@ -85,6 +85,11 @@
"value": "", "value": "",
"required": false "required": false
}, },
"WHOOGLE_CONFIG_BLOCK": {
"description": "[CONFIG] Block websites from search results (comma-separated list)",
"value": "",
"required": false
},
"WHOOGLE_CONFIG_DARK": { "WHOOGLE_CONFIG_DARK": {
"description": "[CONFIG] Enable dark mode (set to 1 or leave blank)", "description": "[CONFIG] Enable dark mode (set to 1 or leave blank)",
"value": "", "value": "",

View File

@ -18,6 +18,7 @@ class Config:
'WHOOGLE_CONFIG_STYLE', 'WHOOGLE_CONFIG_STYLE',
open(os.path.join(app_config['STATIC_FOLDER'], open(os.path.join(app_config['STATIC_FOLDER'],
'css/variables.css')).read()) 'css/variables.css')).read())
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '') self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE') self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
self.dark = read_config_bool('WHOOGLE_CONFIG_DARK') self.dark = read_config_bool('WHOOGLE_CONFIG_DARK')

View File

@ -120,6 +120,10 @@ def gen_query(query, args, config, near_city=None) -> str:
) if config.lang_interface else '' ) if config.lang_interface else ''
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
# Block all sites specified in the user config
for blocked in config.block.split(','):
query += (' -site:' + blocked) if blocked else ''
for val in param_dict.values(): for val in param_dict.values():
if not val: if not val:
continue continue

View File

@ -2,7 +2,6 @@ import argparse
import base64 import base64
import io import io
import json import json
import os
import pickle import pickle
import urllib.parse as urlparse import urllib.parse as urlparse
import uuid import uuid
@ -17,7 +16,7 @@ from app import app
from app.models.config import Config from app.models.config import Config
from app.request import Request, TorError from app.request import Request, TorError
from app.utils.bangs import resolve_bang from app.utils.bangs import resolve_bang
from app.utils.session import valid_user_session from app.utils.session import generate_user_key, valid_user_session
from app.utils.search import * from app.utils.search import *
# Load DDG bang json files only on init # Load DDG bang json files only on init

View File

@ -22,7 +22,7 @@
style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important; style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important;
color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}; color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};
type="text" type="text"
value="{{ query }}"> value="{{ query[:query.find('-site:')] }}">
<input style="color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}" id="search-reset" type="reset" value="x"> <input style="color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}" id="search-reset" type="reset" value="x">
<input name="tbm" value="{{ search_type }}" style="display: none"> <input name="tbm" value="{{ search_type }}" style="display: none">
<input type="submit" style="display: none;"> <input type="submit" style="display: none;">
@ -54,7 +54,7 @@
name="q" name="q"
spellcheck="false" spellcheck="false"
type="text" type="text"
value="{{ query }}" value="{{ query[:query.find('-site:')] }}"
style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important; style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important;
color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}; color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};
border-bottom: {{ '2px solid var(--whoogle-dark-element-bg)' if config.dark else '0px' }};"> border-bottom: {{ '2px solid var(--whoogle-dark-element-bg)' if config.dark else '0px' }};">

View File

@ -106,6 +106,10 @@
<label for="config-near">Near: </label> <label for="config-near">Near: </label>
<input type="text" name="near" id="config-near" placeholder="City Name" value="{{ config.near }}"> <input type="text" name="near" id="config-near" placeholder="City Name" value="{{ config.near }}">
</div> </div>
<div class="config-div config-div-block">
<label for="config-block">Block: </label>
<input type="text" name="block" id="config-block" placeholder="Comma-separated site list" value="{{ config.block }}">
</div>
<div class="config-div config-div-nojs"> <div class="config-div config-div-nojs">
<label for="config-nojs">Show NoJS Links: </label> <label for="config-nojs">Show NoJS Links: </label>
<input type="checkbox" name="nojs" id="config-nojs" {{ 'checked' if config.nojs else '' }}> <input type="checkbox" name="nojs" id="config-nojs" {{ 'checked' if config.nojs else '' }}>

View File

@ -1,5 +1,4 @@
from app.filter import Filter, get_first_link from app.filter import Filter, get_first_link
from app.utils.session import generate_user_key
from app.request import gen_query from app.request import gen_query
from bs4 import BeautifulSoup as bsoup from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet, InvalidToken from cryptography.fernet import Fernet, InvalidToken

View File

@ -3,6 +3,9 @@ from app.filter import Filter
from app.utils.session import generate_user_key from app.utils.session import generate_user_key
from datetime import datetime from datetime import datetime
from dateutil.parser import * from dateutil.parser import *
from urllib.parse import urlparse
from test.conftest import demo_config
def get_search_results(data): def get_search_results(data):
@ -46,6 +49,29 @@ def test_post_results(client):
assert len(get_search_results(rv.data)) <= 15 assert len(get_search_results(rv.data)) <= 15
def test_block_results(client):
rv = client.post('/search', data=dict(q='pinterest'))
assert rv._status_code == 200
has_pinterest = False
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
if 'pinterest.com' in urlparse(link['href']).netloc:
has_pinterest = True
break
assert has_pinterest
demo_config['block'] = 'pinterest.com'
rv = client.post('/config', data=demo_config)
assert rv._status_code == 302
rv = client.post('/search', data=dict(q='pinterest'))
assert rv._status_code == 200
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
assert 'pinterest.com' not in urlparse(link['href']).netloc
# TODO: Unit test the site alt method instead -- the results returned # TODO: Unit test the site alt method instead -- the results returned
# are too unreliable for this test in particular. # are too unreliable for this test in particular.
# def test_site_alts(client): # def test_site_alts(client):

View File

@ -18,25 +18,39 @@
# See app/static/settings/countries.json for values # See app/static/settings/countries.json for values
#WHOOGLE_CONFIG_COUNTRY=countryUK #WHOOGLE_CONFIG_COUNTRY=countryUK
# See app/static/settings/languages.json for values # See app/static/settings/languages.json for values
#WHOOGLE_CONFIG_LANGUAGE=lang_en #WHOOGLE_CONFIG_LANGUAGE=lang_en
# See app/static/settings/languages.json for values # See app/static/settings/languages.json for values
#WHOOGLE_CONFIG_SEARCH_LANGUAGE=lang_en #WHOOGLE_CONFIG_SEARCH_LANGUAGE=lang_en
# Disable changing of config from client # Disable changing of config from client
#WHOOGLE_CONFIG_DISABLE=1 #WHOOGLE_CONFIG_DISABLE=1
# Block websites from search results (comma-separated list)
#WHOOGLE_CONFIG_BLOCK=pinterest.com,whitehouse.gov
# Dark mode # Dark mode
#WHOOGLE_CONFIG_DARK=1 #WHOOGLE_CONFIG_DARK=1
# Safe search mode # Safe search mode
#WHOOGLE_CONFIG_SAFE=1 #WHOOGLE_CONFIG_SAFE=1
# Use social media site alternatives (nitter, bibliogram, etc) # Use social media site alternatives (nitter, bibliogram, etc)
#WHOOGLE_CONFIG_ALTS=1 #WHOOGLE_CONFIG_ALTS=1
# Use Tor if available # Use Tor if available
#WHOOGLE_CONFIG_TOR=1 #WHOOGLE_CONFIG_TOR=1
# Open results in new tab # Open results in new tab
#WHOOGLE_CONFIG_NEW_TAB=1 #WHOOGLE_CONFIG_NEW_TAB=1
# Search using GET requests only (exposes query in logs) # Search using GET requests only (exposes query in logs)
#WHOOGLE_CONFIG_GET_ONLY=1 #WHOOGLE_CONFIG_GET_ONLY=1
# Set instance URL # Set instance URL
#WHOOGLE_CONFIG_URL=https://<whoogle url>/ #WHOOGLE_CONFIG_URL=https://<whoogle url>/
# Set custom CSS styling/theming # Set custom CSS styling/theming
#WHOOGLE_CONFIG_STYLE=":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }" #WHOOGLE_CONFIG_STYLE=":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }"