From 448efb8f2aa14119cf5d25b290b72ff95569a437 Mon Sep 17 00:00:00 2001 From: "Joao A. Candido Ramos" Date: Fri, 16 Apr 2021 16:16:14 +0200 Subject: [PATCH] Add "view image" functionality (#268) * add view image option * prevent whoogle links from opening in a new tab. * remove view image template on mobile requests * change loop values to be more robust to the number of images * Update app/templates/imageresults.html * fix "Basically the .cvifge class needs width: 100%; in order to expand the search input to fit the form width." * Update app/templates/imageresults.html * remove hardcoded string from template * Add view image config var to app.json * Add view image config var to whoogle.env Co-authored-by: jacr13 Co-authored-by: Ben Busby --- README.md | 17 ++- app.json | 5 + app/filter.py | 62 ++++++++ app/models/config.py | 2 + app/request.py | 12 +- app/templates/imageresults.html | 116 +++++++++++++++ app/templates/index.html | 246 ++++++++++++++++---------------- app/utils/search.py | 14 +- whoogle.env | 3 + 9 files changed, 350 insertions(+), 127 deletions(-) create mode 100644 app/templates/imageresults.html diff --git a/README.md b/README.md index a26f6c5..dcd3087 100644 --- a/README.md +++ b/README.md @@ -103,17 +103,25 @@ Sandboxed temporary instance: ```bash $ whoogle-search --help -usage: whoogle-search [-h] [--port ] [--host ] [--debug] - [--https-only] +usage: whoogle-search [-h] [--port ] [--host ] [--debug] [--https-only] [--userpass ] + [--proxyauth ] [--proxytype ] [--proxyloc ] Whoogle Search console runner optional arguments: - -h, --help show this help message and exit + -h, --help Show this help message and exit --port Specifies a port to run on (default 5000) --host Specifies the host address to use (default 127.0.0.1) --debug Activates debug mode for the server (default False) - --https-only Enforces HTTPS redirects for all requests (default False) + --https-only Enforces HTTPS redirects for all requests + --userpass + Sets a username/password basic auth combo (default None) + --proxyauth + Sets a username/password for a HTTP/SOCKS proxy (default None) + --proxytype + Sets a proxy type for all connections (default None) + --proxyloc + Sets a proxy location for all connections (default None) ``` See the [available environment variables](#environment-variables) for additional configuration. @@ -286,6 +294,7 @@ These environment variables allow setting default config values, but can be over | WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) | | WHOOGLE_CONFIG_TOR | Use Tor routing (if available) | | WHOOGLE_CONFIG_NEW_TAB | Always open results in new tab | +| WHOOGLE_CONFIG_VIEW_IMAGE | Enable View Image option | | WHOOGLE_CONFIG_GET_ONLY | Search using GET requests only | | WHOOGLE_CONFIG_URL | The root url of the instance (`https:///`) | | WHOOGLE_CONFIG_STYLE | The custom CSS to use for styling (should be single line) | diff --git a/app.json b/app.json index 691f8fc..47f4ca0 100644 --- a/app.json +++ b/app.json @@ -115,6 +115,11 @@ "value": "", "required": false }, + "WHOOGLE_CONFIG_VIEW_IMAGE": { + "description": "[CONFIG] Enable View Image option (set to 1 or leave blank)", + "value": "", + "required": false + }, "WHOOGLE_CONFIG_GET_ONLY": { "description": "[CONFIG] Search using GET requests only (set to 1 or leave blank)", "value": "", diff --git a/app/filter.py b/app/filter.py index 7848a79..4fede8a 100644 --- a/app/filter.py +++ b/app/filter.py @@ -254,3 +254,65 @@ class Filter: # Replace link destination link_desc[0].replace_with(get_site_alt(link_desc[0])) + + def view_image(self, soup) -> BeautifulSoup: + """Replaces the soup with a new one that handles mobile results and + adds the link of the image full res to the results. + + Args: + soup: A BeautifulSoup object containing the image mobile results. + + Returns: + BeautifulSoup: The new BeautifulSoup object + """ + + # get some tags that are unchanged between mobile and pc versions + search_input = soup.find_all('td', attrs={'class': "O4cRJf"})[0] + search_options = soup.find_all('div', attrs={'class': "M7pB2"})[0] + cor_suggested = soup.find_all('table', attrs={'class': "By0U9"}) + next_pages = soup.find_all('table', attrs={'class': "uZgmoc"})[0] + information = soup.find_all('div', attrs={'class': "TuS8Ad"})[0] + + results = [] + # find results div + results_div = soup.find_all('div', attrs={'class': "nQvrDb"})[0] + # find all the results + results_all = results_div.find_all('div', attrs={'class': "lIMUZd"}) + + for item in results_all: + urls = item.find('a')['href'].split('&imgrefurl=') + + img_url = urlparse.unquote(urls[0].replace('/imgres?imgurl=', '')) + webpage = urlparse.unquote(urls[1].split('&')[0]) + img_tbn = urlparse.unquote(item.find('a').find('img')['src']) + results.append({ + 'domain': urlparse.urlparse(webpage).netloc, + 'img_url': img_url, + 'webpage': webpage, + 'img_tbn': img_tbn + }) + + soup = BeautifulSoup(render_template('imageresults.html', + length=len(results), + results=results, + view_label="View Image"), + features='html.parser') + # replace search input object + soup.find_all('td', + attrs={'class': "O4cRJf"})[0].replaceWith(search_input) + # replace search options object (All, Images, Videos, etc.) + soup.find_all('div', + attrs={'class': "M7pB2"})[0].replaceWith(search_options) + # replace correction suggested by google object if exists + if len(cor_suggested): + soup.find_all( + 'table', + attrs={'class': "By0U9"} + )[0].replaceWith(cor_suggested[0]) + # replace next page object at the bottom of the page + soup.find_all('table', + attrs={'class': "uZgmoc"})[0].replaceWith(next_pages) + # replace information about user connection at the bottom of the page + soup.find_all('div', + attrs={'class': "TuS8Ad"})[0].replaceWith(information) + return soup diff --git a/app/models/config.py b/app/models/config.py index 3898ae7..5b2f192 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -27,7 +27,9 @@ class Config: self.tor = read_config_bool('WHOOGLE_CONFIG_TOR') self.near = os.getenv('WHOOGLE_CONFIG_NEAR', '') self.new_tab = read_config_bool('WHOOGLE_CONFIG_NEW_TAB') + self.view_image = read_config_bool('WHOOGLE_CONFIG_VIEW_IMAGE') self.get_only = read_config_bool('WHOOGLE_CONFIG_GET_ONLY') + self.safe_keys = [ 'lang_search', 'lang_interface', diff --git a/app/request.py b/app/request.py index 9239b6e..1c0c2c6 100644 --- a/app/request.py +++ b/app/request.py @@ -151,6 +151,8 @@ class Request: self.language = config.lang_search self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua self.modified_user_agent = gen_user_agent(self.mobile) + if not self.mobile: + self.modified_user_agent_mobile = gen_user_agent(True) # Set up proxy, if previously configured if os.environ.get('WHOOGLE_PROXY_LOC'): @@ -197,7 +199,8 @@ class Request: return [_.attrib['data'] for _ in root.findall('.//suggestion/[@data]')] - def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: + def send(self, base_url=SEARCH_URL, query='', attempt=0, + force_mobile=False) -> Response: """Sends an outbound request to a URL. Optionally sends the request using Tor, if enabled by the user. @@ -211,8 +214,13 @@ class Request: Response: The Response object returned by the requests call """ + if force_mobile and not self.mobile: + modified_user_agent = self.modified_user_agent_mobile + else: + modified_user_agent = self.modified_user_agent + headers = { - 'User-Agent': self.modified_user_agent + 'User-Agent': modified_user_agent } # FIXME: Should investigate this further to ensure the consent diff --git a/app/templates/imageresults.html b/app/templates/imageresults.html new file mode 100644 index 0000000..17ee98a --- /dev/null +++ b/app/templates/imageresults.html @@ -0,0 +1,116 @@ + + + + + + + + + + + +
+
+ + Google + +
+
+
+ + + + + + + + + + +
+ +
+
+
+
+ +
+
+ +
+
+
+
+ + +
+
+
+
+ + {% for i in range((length // 4) + 1) %} + + {% for j in range([length - (i*4), 4]|min) %} + + {% endfor %} + + {% endfor %} +
+ +
+
+ + +
+
+
+ +
+
+ + diff --git a/app/templates/index.html b/app/templates/index.html index 9d43988..d063ba5 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -1,153 +1,159 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - Whoogle Search - - -
-
- {{ logo|safe }} -
-
-
-
- -
- -
-
- {% if not config_disabled %} -
- -
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + Whoogle Search + + +
+
+ {{ logo|safe }} +
+
+
+
+ +
+ +
+
+ {% if not config_disabled %} +
+ +
-
+
- - {% for ctry in countries %} - + {% endfor %} - -
— Note: If enabled, a website will only appear in the results if it is *hosted* in the selected country.
+ +
— Note: If enabled, a website will only appear in the results if it is *hosted* in the selected country.
- - {% for lang in languages %} {% endfor %} - +
- - {% for lang in languages %} {% endfor %} - +
- - + + +
+
+ +
-
- - -
- - + +
- - + +
- - + +
- - -
— Replaces Twitter/YouTube/Instagram/Reddit links + + +
— Replaces Twitter/YouTube/Instagram/Reddit links with Nitter/Invidious/Bibliogram/Libreddit links.
- - + + +
+
+ + +
— (Experimental) Adds the "View Image" option on desktop to view full size images in search results. + This will cause image result thumbnails to be lower resolution.
- - + +
- - + +
- - + +
- - +
-   -   - +   +   +
- +
-
- {% endif %} -
-
+
+ {% endif %} +
+ - + + diff --git a/app/utils/search.py b/app/utils/search.py index a856bf6..bb24e4b 100644 --- a/app/utils/search.py +++ b/app/utils/search.py @@ -119,11 +119,23 @@ class Search: self.request_params, self.config, content_filter.near) - get_body = g.user_request.send(query=full_query) + + # force mobile search when view image is true and + # the request is not already made by a mobile + view_image = ('tbm=isch' in full_query + and self.config.view_image + and not g.user_request.mobile) + + get_body = g.user_request.send(query=full_query, + force_mobile=view_image) # Produce cleanable html soup from response html_soup = bsoup(content_filter.reskin(get_body.text), 'html.parser') + # Replace current soup if view_image is active + if view_image: + html_soup = content_filter.view_image(html_soup) + # Indicate whether or not a Tor connection is active tor_banner = bsoup('', 'html.parser') if g.user_request.tor_valid: diff --git a/whoogle.env b/whoogle.env index 3a0f88b..594ec67 100644 --- a/whoogle.env +++ b/whoogle.env @@ -46,6 +46,9 @@ # Open results in new tab #WHOOGLE_CONFIG_NEW_TAB=1 +# Enable View Image option +#WHOOGLE_CONFIG_VIEW_IMAGE=1 + # Search using GET requests only (exposes query in logs) #WHOOGLE_CONFIG_GET_ONLY=1