Fix handling of http (vs https) proxy creation

The requests library requires both 'http' and 'https' values in any
included proxy dict, and whoogle was previously copying the http proxy
to https for simplicity. The assumption was that if the underlying
request wasn't able to connect via https, it would default to http
(otherwise why have the requirement to specify both?)

This led to connectivity issues for users with http only proxies as of
the latest urllib and requests package versions, which are a lot more
strict with connections over https. With the latest versions, if an
https connection cannot be made, the library returns an error.

As a result, the new proxy dict must look something like this for plain
http proxies:

{'http': 'http://domain.tld:port', 'https': 'http://domain.tld:port'}

where both http and https are identical, but both are still required.
main
Ben Busby 2021-06-04 15:30:21 -04:00
parent a64a86efb6
commit e7a604d428
No known key found for this signature in database
GPG Key ID: 3B08611DF6E62ED2
3 changed files with 19 additions and 10 deletions

View File

@ -183,7 +183,7 @@ Description=Whoogle
# Proxy configuration, uncomment to enable
#Environment=WHOOGLE_PROXY_USER=<proxy username>
#Environment=WHOOGLE_PROXY_PASS=<proxy password>
#Environment=WHOOGLE_PROXY_TYPE=<proxy type (http|proxy4|proxy5)
#Environment=WHOOGLE_PROXY_TYPE=<proxy type (http|https|proxy4|proxy5)
#Environment=WHOOGLE_PROXY_LOC=<proxy host/ip>
# Site alternative configurations, uncomment to enable
# Note: If not set, the feature will still be available

View File

@ -166,17 +166,26 @@ class Request:
self.modified_user_agent_mobile = gen_user_agent(True)
# Set up proxy, if previously configured
if os.environ.get('WHOOGLE_PROXY_LOC'):
proxy_path = os.environ.get('WHOOGLE_PROXY_LOC', '')
if proxy_path:
proxy_type = os.environ.get('WHOOGLE_PROXY_TYPE', '')
proxy_user = os.environ.get('WHOOGLE_PROXY_USER', '')
proxy_pass = os.environ.get('WHOOGLE_PROXY_PASS', '')
auth_str = ''
if os.environ.get('WHOOGLE_PROXY_USER', ''):
auth_str = os.environ.get('WHOOGLE_PROXY_USER', '') + \
':' + os.environ.get('WHOOGLE_PROXY_PASS', '')
if proxy_user:
auth_str = proxy_user + ':' + proxy_pass
self.proxies = {
'http': os.environ.get('WHOOGLE_PROXY_TYPE', '') + '://' +
auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC', ''),
'https': proxy_type + '://' +
((auth_str + '@') if auth_str else '') + proxy_path,
}
self.proxies['https'] = self.proxies['http'].replace('http',
'https')
# Need to ensure both HTTP and HTTPS are in the proxy dict,
# regardless of underlying protocol
if proxy_type == 'https':
self.proxies['http'] = self.proxies['https'].replace(
'https', 'http')
else:
self.proxies['http'] = self.proxies['https']
else:
self.proxies = {
'http': 'socks5://127.0.0.1:9050',

View File

@ -28,7 +28,7 @@ services:
# Proxy configuration, uncomment to enable
#- WHOOGLE_PROXY_USER=<proxy username>
#- WHOOGLE_PROXY_PASS=<proxy password>
#- WHOOGLE_PROXY_TYPE=<proxy type (http|socks4|socks5)
#- WHOOGLE_PROXY_TYPE=<proxy type (http|https|socks4|socks5)
#- WHOOGLE_PROXY_LOC=<proxy host/ip>
# Site alternative configurations, uncomment to enable
# Note: If not set, the feature will still be available