Merge branch 'master' of github.com:benbusby/shoogle

main
Ben Busby 2020-04-24 17:25:06 -06:00
commit 525f7adf22
3 changed files with 100 additions and 75 deletions

View File

@ -14,6 +14,9 @@ class Filter:
self.nojs = config['nojs'] if 'nojs' in config else False
self.mobile = mobile
def __getitem__(self, name):
return getattr(self, name)
def reskin(self, page):
# Aesthetic only re-skinning
page = page.replace('>G<', '>Sh<')
@ -24,34 +27,6 @@ class Filter:
return page
def gen_query(self, q, args):
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
tbs = ''
if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable
q = urlparse.quote(q)
# Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in args:
tbm = '&tbm=' + args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in args:
start = '&start=' + args.get('start')
# Grab city from config, if available
near = ''
if self.near:
near = '&near=' + urlparse.quote(self.near)
return q + tbs + tbm + start + near
def clean(self, soup):
# Remove all ads
main_divs = soup.find('div', {'id': 'main'})
@ -76,6 +51,10 @@ class Filter:
if logo and self.mobile:
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;'
# Fix search bar length on mobile
search_bar = soup.find('header').find('form').find('div')
search_bar['style'] = 'width: 100%;'
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href']

76
app/request.py Normal file
View File

@ -0,0 +1,76 @@
from app import rhyme
from app.filter import Filter
from io import BytesIO
import pycurl
import urllib.parse as urlparse
# Base search url
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
def gen_user_agent(normal_ua):
is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla')
firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox')
linux = rhyme.get_rhyme('Lin') + 'ux'
if is_mobile:
return MOBILE_UA.format(mozilla, firefox)
else:
return DESKTOP_UA.format(mozilla, linux, firefox)
def gen_query(q, args, near_city=None):
# Use :past(hour/day/week/month/year) if available
# example search "new restaurants :past month"
tbs = ''
if ':past' in q:
time_range = str.strip(q.split(':past', 1)[-1])
tbs = '&tbs=qdr:' + str.lower(time_range[0])
# Ensure search query is parsable
q = urlparse.quote(q)
# Pass along type of results (news, images, books, etc)
tbm = ''
if 'tbm' in args:
tbm = '&tbm=' + args.get('tbm')
# Get results page start value (10 per page, ie page 2 start val = 20)
start = ''
if 'start' in args:
start = '&start=' + args.get('start')
# Search for results near a particular city, if available
near = ''
if near_city is not None:
near = '&near=' + urlparse.quote(near_city)
return q + tbs + tbm + start + near
class Request:
def __init__(self, normal_ua):
self.modified_user_agent = gen_user_agent(normal_ua)
def __getitem__(self, name):
return getattr(self, name)
def send(self, base_url=SEARCH_URL, query=''):
response_header = []
b_obj = BytesIO()
crl = pycurl.Curl()
crl.setopt(crl.URL, base_url + query)
crl.setopt(crl.USERAGENT, self.modified_user_agent)
crl.setopt(crl.WRITEDATA, b_obj)
crl.setopt(crl.HEADERFUNCTION, response_header.append)
crl.setopt(pycurl.FOLLOWLOCATION, 1)
crl.perform()
crl.close()
return b_obj.getvalue().decode('utf-8', 'ignore')

View File

@ -1,58 +1,27 @@
from app import app, rhyme
from app import app
from app.filter import Filter
from app.request import Request, gen_query
from bs4 import BeautifulSoup
from flask import request, redirect, render_template
from io import BytesIO
from flask import g, request, redirect, render_template
import json
import os
import pycurl
import urllib.parse as urlparse
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
# Base search url
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
user_config = json.load(open(app.config['STATIC_FOLDER'] + '/config.json'))
def get_ua(user_agent):
is_mobile = 'Android' in user_agent or 'iPhone' in user_agent
mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla')
firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox')
linux = rhyme.get_rhyme('Lin') + 'ux'
if is_mobile:
return MOBILE_UA.format(mozilla, firefox)
else:
return DESKTOP_UA.format(mozilla, linux, firefox)
def send_request(curl_url, ua):
response_header = []
b_obj = BytesIO()
crl = pycurl.Curl()
crl.setopt(crl.URL, curl_url)
crl.setopt(crl.USERAGENT, ua)
crl.setopt(crl.WRITEDATA, b_obj)
crl.setopt(crl.HEADERFUNCTION, response_header.append)
crl.setopt(pycurl.FOLLOWLOCATION, 1)
crl.perform()
crl.close()
return b_obj.getvalue().decode('utf-8', 'ignore')
@app.before_request
def before_request_func():
g.user_request = Request(request.headers.get('User-Agent'))
@app.route('/', methods=['GET'])
def index():
bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff'
return render_template('index.html', bg=bg, ua=get_ua(request.headers.get('User-Agent')))
return render_template('index.html', bg=bg, ua=g.user_request.modified_user_agent)
@app.route('/search', methods=['GET'])
@ -65,12 +34,13 @@ def search():
mobile = 'Android' in user_agent or 'iPhone' in user_agent
content_filter = Filter(mobile, user_config)
full_query = content_filter.gen_query(q, request.args)
get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent))
get_body = content_filter.reskin(get_body)
soup = content_filter.clean(BeautifulSoup(get_body, 'html.parser'))
full_query = gen_query(q, request.args, content_filter.near)
get_body = g.user_request.send(query=full_query)
return render_template('display.html', query=urlparse.unquote(q), response=soup)
shoogle_results = content_filter.reskin(get_body)
formatted_results = content_filter.clean(BeautifulSoup(shoogle_results, 'html.parser'))
return render_template('display.html', query=urlparse.unquote(q), response=formatted_results)
@app.route('/config', methods=['GET', 'POST'])
@ -107,19 +77,19 @@ def imgres():
@app.route('/window')
def window():
get_body = send_request(request.args.get('location'), get_ua(request.headers.get('User-Agent')))
get_body = g.user_request.send(base_url=request.args.get('location'))
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"')
soup = BeautifulSoup(get_body, 'html.parser')
results = BeautifulSoup(get_body, 'html.parser')
try:
for script in soup('script'):
for script in results('script'):
script.decompose()
except Exception:
pass
return render_template('display.html', response=soup)
return render_template('display.html', response=results)
if __name__ == '__main__':