100 |
118 |
119 |
120 | {% endif %}
121 |
122 |
123 |
124 |
138 |
139 |
140 |
141 |
142 |
--------------------------------------------------------------------------------
/app/utils/search.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | from typing import Any
4 |
5 | from app.filter import Filter
6 | from app.request import gen_query
7 | from app.utils.misc import get_proxy_host_url
8 | from app.utils.results import get_first_link
9 | from bs4 import BeautifulSoup as bsoup
10 | from cryptography.fernet import Fernet, InvalidToken
11 | from flask import g
12 |
13 | TOR_BANNER = '
You are using Tor
'
14 | CAPTCHA = 'div class="g-recaptcha"'
15 |
16 |
17 | def needs_https(url: str) -> bool:
18 | """Checks if the current instance needs to be upgraded to HTTPS
19 |
20 | Note that all Heroku instances are available by default over HTTPS, but
21 | do not automatically set up a redirect when visited over HTTP.
22 |
23 | Args:
24 | url: The instance url
25 |
26 | Returns:
27 | bool: True/False representing the need to upgrade
28 |
29 | """
30 | https_only = bool(os.getenv('HTTPS_ONLY', 0))
31 | is_heroku = url.endswith('.herokuapp.com')
32 | is_http = url.startswith('http://')
33 |
34 | return (is_heroku and is_http) or (https_only and is_http)
35 |
36 |
37 | def has_captcha(results: str) -> bool:
38 | """Checks to see if the search results are blocked by a captcha
39 |
40 | Args:
41 | results: The search page html as a string
42 |
43 | Returns:
44 | bool: True/False indicating if a captcha element was found
45 |
46 | """
47 | return CAPTCHA in results
48 |
49 |
50 | class Search:
51 | """Search query preprocessor - used before submitting the query or
52 | redirecting to another site
53 |
54 | Attributes:
55 | request: the incoming flask request
56 | config: the current user config settings
57 | session_key: the flask user fernet key
58 | """
59 | def __init__(self, request, config, session_key, cookies_disabled=False):
60 | method = request.method
61 | self.request = request
62 | self.request_params = request.args if method == 'GET' else request.form
63 | self.user_agent = request.headers.get('User-Agent')
64 | self.feeling_lucky = False
65 | self.config = config
66 | self.session_key = session_key
67 | self.query = ''
68 | self.cookies_disabled = cookies_disabled
69 | self.search_type = self.request_params.get(
70 | 'tbm') if 'tbm' in self.request_params else ''
71 |
72 | def __getitem__(self, name) -> Any:
73 | return getattr(self, name)
74 |
75 | def __setitem__(self, name, value) -> None:
76 | return setattr(self, name, value)
77 |
78 | def __delitem__(self, name) -> None:
79 | return delattr(self, name)
80 |
81 | def __contains__(self, name) -> bool:
82 | return hasattr(self, name)
83 |
84 | def new_search_query(self) -> str:
85 | """Parses a plaintext query into a valid string for submission
86 |
87 | Also decrypts the query string, if encrypted (in the case of
88 | paginated results).
89 |
90 | Returns:
91 | str: A valid query string
92 |
93 | """
94 | q = self.request_params.get('q')
95 |
96 | if q is None or len(q) == 0:
97 | return ''
98 | else:
99 | # Attempt to decrypt if this is an internal link
100 | try:
101 | q = Fernet(self.session_key).decrypt(q.encode()).decode()
102 | except InvalidToken:
103 | pass
104 |
105 | # Strip leading '! ' for "feeling lucky" queries
106 | self.feeling_lucky = q.startswith('! ')
107 | self.query = q[2:] if self.feeling_lucky else q
108 | return self.query
109 |
110 | def generate_response(self) -> str:
111 | """Generates a response for the user's query
112 |
113 | Returns:
114 | str: A string response to the search query, in the form of a URL
115 | or string representation of HTML content.
116 |
117 | """
118 | mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
119 | # reconstruct url if X-Forwarded-Host header present
120 | root_url = get_proxy_host_url(
121 | self.request,
122 | self.request.url_root,
123 | root=True)
124 |
125 | content_filter = Filter(self.session_key,
126 | root_url=root_url,
127 | mobile=mobile,
128 | config=self.config,
129 | query=self.query)
130 | full_query = gen_query(self.query,
131 | self.request_params,
132 | self.config)
133 | self.full_query = full_query
134 |
135 | # force mobile search when view image is true and
136 | # the request is not already made by a mobile
137 | view_image = ('tbm=isch' in full_query
138 | and self.config.view_image
139 | and not g.user_request.mobile)
140 |
141 | get_body = g.user_request.send(query=full_query,
142 | force_mobile=view_image)
143 |
144 | # Produce cleanable html soup from response
145 | html_soup = bsoup(get_body.text, 'html.parser')
146 |
147 | # Replace current soup if view_image is active
148 | if view_image:
149 | html_soup = content_filter.view_image(html_soup)
150 |
151 | # Indicate whether or not a Tor connection is active
152 | if g.user_request.tor_valid:
153 | html_soup.insert(0, bsoup(TOR_BANNER, 'html.parser'))
154 |
155 | if self.feeling_lucky:
156 | return get_first_link(html_soup)
157 | else:
158 | formatted_results = content_filter.clean(html_soup)
159 |
160 | # Append user config to all search links, if available
161 | param_str = ''.join('&{}={}'.format(k, v)
162 | for k, v in
163 | self.request_params.to_dict(flat=True).items()
164 | if self.config.is_safe_key(k))
165 | for link in formatted_results.find_all('a', href=True):
166 | link['rel'] = "nofollow noopener noreferrer"
167 | if 'search?' not in link['href'] or link['href'].index(
168 | 'search?') > 1:
169 | continue
170 | link['href'] += param_str
171 |
172 | return str(formatted_results)
173 |
174 | def check_kw_ip(self) -> re.Match:
175 | """Checks for keywords related to 'my ip' in the query
176 |
177 | Returns:
178 | bool
179 |
180 | """
181 | return re.search("([^a-z0-9]|^)my *[^a-z0-9] *(ip|internet protocol)" +
182 | "($|( *[^a-z0-9] *(((addres|address|adres|" +
183 | "adress)|a)? *$)))", self.query.lower())
184 |
--------------------------------------------------------------------------------
/app/templates/logo.html:
--------------------------------------------------------------------------------
1 |
19 |
--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
1 | from app.filter import clean_query
2 | from app.request import send_tor_signal
3 | from app.utils.session import generate_user_key
4 | from app.utils.bangs import gen_bangs_json
5 | from app.utils.misc import gen_file_hash, read_config_bool
6 | from base64 import b64encode
7 | from datetime import datetime, timedelta
8 | from flask import Flask
9 | import json
10 | import logging.config
11 | import os
12 | from stem import Signal
13 | import threading
14 | from dotenv import load_dotenv
15 |
16 | from werkzeug.middleware.proxy_fix import ProxyFix
17 |
18 | from app.utils.misc import read_config_bool
19 | from app.version import __version__
20 |
21 | app = Flask(__name__, static_folder=os.path.dirname(
22 | os.path.abspath(__file__)) + '/static')
23 |
24 | app.wsgi_app = ProxyFix(app.wsgi_app)
25 |
26 | dot_env_path = (
27 | os.path.join(os.path.dirname(os.path.abspath(__file__)),
28 | '../whoogle.env'))
29 |
30 | # Load .env file if enabled
31 | if read_config_bool('WHOOGLE_DOTENV'):
32 | load_dotenv(dot_env_path)
33 |
34 | app.default_key = generate_user_key()
35 |
36 | if read_config_bool('HTTPS_ONLY'):
37 | app.config['SESSION_COOKIE_NAME'] = '__Secure-session'
38 | app.config['SESSION_COOKIE_SECURE'] = True
39 |
40 | app.config['VERSION_NUMBER'] = __version__
41 | app.config['APP_ROOT'] = os.getenv(
42 | 'APP_ROOT',
43 | os.path.dirname(os.path.abspath(__file__)))
44 | app.config['STATIC_FOLDER'] = os.getenv(
45 | 'STATIC_FOLDER',
46 | os.path.join(app.config['APP_ROOT'], 'static'))
47 | app.config['BUILD_FOLDER'] = os.path.join(
48 | app.config['STATIC_FOLDER'], 'build')
49 | app.config['CACHE_BUSTING_MAP'] = {}
50 | app.config['LANGUAGES'] = json.load(open(
51 | os.path.join(app.config['STATIC_FOLDER'], 'settings/languages.json'),
52 | encoding='utf-8'))
53 | app.config['COUNTRIES'] = json.load(open(
54 | os.path.join(app.config['STATIC_FOLDER'], 'settings/countries.json'),
55 | encoding='utf-8'))
56 | app.config['TRANSLATIONS'] = json.load(open(
57 | os.path.join(app.config['STATIC_FOLDER'], 'settings/translations.json'),
58 | encoding='utf-8'))
59 | app.config['THEMES'] = json.load(open(
60 | os.path.join(app.config['STATIC_FOLDER'], 'settings/themes.json'),
61 | encoding='utf-8'))
62 | app.config['HEADER_TABS'] = json.load(open(
63 | os.path.join(app.config['STATIC_FOLDER'], 'settings/header_tabs.json'),
64 | encoding='utf-8'))
65 | app.config['CONFIG_PATH'] = os.getenv(
66 | 'CONFIG_VOLUME',
67 | os.path.join(app.config['STATIC_FOLDER'], 'config'))
68 | app.config['DEFAULT_CONFIG'] = os.path.join(
69 | app.config['CONFIG_PATH'],
70 | 'config.json')
71 | app.config['CONFIG_DISABLE'] = read_config_bool('WHOOGLE_CONFIG_DISABLE')
72 | app.config['SESSION_FILE_DIR'] = os.path.join(
73 | app.config['CONFIG_PATH'],
74 | 'session')
75 | app.config['MAX_SESSION_SIZE'] = 4000 # Sessions won't exceed 4KB
76 | app.config['BANG_PATH'] = os.getenv(
77 | 'CONFIG_VOLUME',
78 | os.path.join(app.config['STATIC_FOLDER'], 'bangs'))
79 | app.config['BANG_FILE'] = os.path.join(
80 | app.config['BANG_PATH'],
81 | 'bangs.json')
82 |
83 | # Ensure all necessary directories exist
84 | if not os.path.exists(app.config['CONFIG_PATH']):
85 | os.makedirs(app.config['CONFIG_PATH'])
86 |
87 | if not os.path.exists(app.config['SESSION_FILE_DIR']):
88 | os.makedirs(app.config['SESSION_FILE_DIR'])
89 |
90 | if not os.path.exists(app.config['BANG_PATH']):
91 | os.makedirs(app.config['BANG_PATH'])
92 |
93 | if not os.path.exists(app.config['BUILD_FOLDER']):
94 | os.makedirs(app.config['BUILD_FOLDER'])
95 |
96 | # Session values
97 | app_key_path = os.path.join(app.config['CONFIG_PATH'], 'whoogle.key')
98 | if os.path.exists(app_key_path):
99 | app.config['SECRET_KEY'] = open(app_key_path, 'r').read()
100 | else:
101 | app.config['SECRET_KEY'] = str(b64encode(os.urandom(32)))
102 | with open(app_key_path, 'w') as key_file:
103 | key_file.write(app.config['SECRET_KEY'])
104 | key_file.close()
105 | app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=365)
106 |
107 | # NOTE: SESSION_COOKIE_SAMESITE must be set to 'lax' to allow the user's
108 | # previous session to persist when accessing the instance from an external
109 | # link. Setting this value to 'strict' causes Whoogle to revalidate a new
110 | # session, and fail, resulting in cookies being disabled.
111 | app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
112 |
113 | # Config fields that are used to check for updates
114 | app.config['RELEASES_URL'] = 'https://github.com/' \
115 | 'benbusby/whoogle-search/releases'
116 | app.config['LAST_UPDATE_CHECK'] = datetime.now() - timedelta(hours=24)
117 | app.config['HAS_UPDATE'] = ''
118 |
119 | # The alternative to Google Translate is treated a bit differently than other
120 | # social media site alternatives, in that it is used for any translation
121 | # related searches.
122 | translate_url = os.getenv('WHOOGLE_ALT_TL', 'https://farside.link/lingva')
123 | if not translate_url.startswith('http'):
124 | translate_url = 'https://' + translate_url
125 | app.config['TRANSLATE_URL'] = translate_url
126 |
127 | app.config['CSP'] = 'default-src \'none\';' \
128 | 'frame-src ' + translate_url + ';' \
129 | 'manifest-src \'self\';' \
130 | 'img-src \'self\' data:;' \
131 | 'style-src \'self\' \'unsafe-inline\';' \
132 | 'script-src \'self\';' \
133 | 'media-src \'self\';' \
134 | 'connect-src \'self\';'
135 |
136 | # Generate DDG bang filter
137 | if not os.path.exists(app.config['BANG_FILE']):
138 | json.dump({}, open(app.config['BANG_FILE'], 'w'))
139 | bangs_thread = threading.Thread(
140 | target=gen_bangs_json,
141 | args=(app.config['BANG_FILE'],))
142 | bangs_thread.start()
143 |
144 | # Build new mapping of static files for cache busting
145 | cache_busting_dirs = ['css', 'js']
146 | for cb_dir in cache_busting_dirs:
147 | full_cb_dir = os.path.join(app.config['STATIC_FOLDER'], cb_dir)
148 | for cb_file in os.listdir(full_cb_dir):
149 | # Create hash from current file state
150 | full_cb_path = os.path.join(full_cb_dir, cb_file)
151 | cb_file_link = gen_file_hash(full_cb_dir, cb_file)
152 | build_path = os.path.join(app.config['BUILD_FOLDER'], cb_file_link)
153 |
154 | try:
155 | os.symlink(full_cb_path, build_path)
156 | except FileExistsError:
157 | # Symlink hasn't changed, ignore
158 | pass
159 |
160 | # Create mapping for relative path urls
161 | map_path = build_path.replace(app.config['APP_ROOT'], '')
162 | if map_path.startswith('/'):
163 | map_path = map_path[1:]
164 | app.config['CACHE_BUSTING_MAP'][cb_file] = map_path
165 |
166 | # Templating functions
167 | app.jinja_env.globals.update(clean_query=clean_query)
168 | app.jinja_env.globals.update(
169 | cb_url=lambda f: app.config['CACHE_BUSTING_MAP'][f])
170 |
171 | # Attempt to acquire tor identity, to determine if Tor config is available
172 | send_tor_signal(Signal.HEARTBEAT)
173 |
174 | from app import routes # noqa
175 |
176 | # Disable logging from imported modules
177 | logging.config.dictConfig({
178 | 'version': 1,
179 | 'disable_existing_loggers': True,
180 | })
181 |
--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Whoogle Search",
3 | "description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content",
4 | "repository": "https://github.com/benbusby/whoogle-search",
5 | "logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png",
6 | "keywords": [
7 | "search",
8 | "metasearch",
9 | "flask",
10 | "docker",
11 | "heroku",
12 | "adblock",
13 | "degoogle",
14 | "privacy"
15 | ],
16 | "stack": "container",
17 | "env": {
18 | "WHOOGLE_URL_PREFIX": {
19 | "description": "The URL prefix to use for the whoogle instance (i.e. \"/whoogle\")",
20 | "value": "",
21 | "required": false
22 | },
23 | "WHOOGLE_USER": {
24 | "description": "The username for basic auth. WHOOGLE_PASS must also be set if used. Leave empty to disable.",
25 | "value": "",
26 | "required": false
27 | },
28 | "WHOOGLE_PASS": {
29 | "description": "The password for basic auth. WHOOGLE_USER must also be set if used. Leave empty to disable.",
30 | "value": "",
31 | "required": false
32 | },
33 | "WHOOGLE_PROXY_USER": {
34 | "description": "The username of the proxy server. Leave empty to disable.",
35 | "value": "",
36 | "required": false
37 | },
38 | "WHOOGLE_PROXY_PASS": {
39 | "description": "The password of the proxy server. Leave empty to disable.",
40 | "value": "",
41 | "required": false
42 | },
43 | "WHOOGLE_PROXY_TYPE": {
44 | "description": "The type of the proxy server. For example \"socks5\". Leave empty to disable.",
45 | "value": "",
46 | "required": false
47 | },
48 | "WHOOGLE_PROXY_LOC": {
49 | "description": "The location of the proxy server (host or ip). Leave empty to disable.",
50 | "value": "",
51 | "required": false
52 | },
53 | "WHOOGLE_ALT_TW": {
54 | "description": "The site to use as a replacement for twitter.com when site alternatives are enabled in the config.",
55 | "value": "farside.link/nitter",
56 | "required": false
57 | },
58 | "WHOOGLE_ALT_YT": {
59 | "description": "The site to use as a replacement for youtube.com when site alternatives are enabled in the config.",
60 | "value": "farside.link/invidious",
61 | "required": false
62 | },
63 | "WHOOGLE_ALT_IG": {
64 | "description": "The site to use as a replacement for instagram.com when site alternatives are enabled in the config.",
65 | "value": "farside.link/bibliogram/u",
66 | "required": false
67 | },
68 | "WHOOGLE_ALT_RD": {
69 | "description": "The site to use as a replacement for reddit.com when site alternatives are enabled in the config.",
70 | "value": "farside.link/libreddit",
71 | "required": false
72 | },
73 | "WHOOGLE_ALT_MD": {
74 | "description": "The site to use as a replacement for medium.com when site alternatives are enabled in the config.",
75 | "value": "farside.link/scribe",
76 | "required": false
77 | },
78 | "WHOOGLE_ALT_TL": {
79 | "description": "The Google Translate alternative to use for all searches following the 'translate ___' structure.",
80 | "value": "farside.link/lingva",
81 | "required": false
82 | },
83 | "WHOOGLE_ALT_IMG": {
84 | "description": "The site to use as a replacement for imgur.com when site alternatives are enabled in the config.",
85 | "value": "farside.link/rimgo",
86 | "required": false
87 | },
88 | "WHOOGLE_ALT_WIKI": {
89 | "description": "The site to use as a replacement for wikipedia.com when site alternatives are enabled in the config.",
90 | "value": "farside.link/wikiless",
91 | "required": false
92 | },
93 | "WHOOGLE_ALT_IMDB": {
94 | "description": "The site to use as a replacement for imdb.com when site alternatives are enabled in the config.",
95 | "value": "farside.link/libremdb",
96 | "required": false
97 | },
98 | "WHOOGLE_ALT_QUORA": {
99 | "description": "The site to use as a replacement for quora.com when site alternatives are enabled in the config.",
100 | "value": "farside.link/quetre",
101 | "required": false
102 | },
103 | "WHOOGLE_MINIMAL": {
104 | "description": "Remove everything except basic result cards from all search queries (set to 1 or leave blank)",
105 | "value": "",
106 | "required": false
107 | },
108 | "WHOOGLE_CONFIG_COUNTRY": {
109 | "description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)",
110 | "value": "",
111 | "required": false
112 | },
113 | "WHOOGLE_CONFIG_LANGUAGE": {
114 | "description": "[CONFIG] The language to use for the interface (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/languages.json)",
115 | "value": "",
116 | "required": false
117 | },
118 | "WHOOGLE_CONFIG_SEARCH_LANGUAGE": {
119 | "description": "[CONFIG] The language to use for search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/languages.json)",
120 | "value": "",
121 | "required": false
122 | },
123 | "WHOOGLE_CONFIG_DISABLE": {
124 | "description": "[CONFIG] Disable ability for client to change config (set to 1 or leave blank)",
125 | "value": "",
126 | "required": false
127 | },
128 | "WHOOGLE_CONFIG_BLOCK": {
129 | "description": "[CONFIG] Block websites from search results (comma-separated list)",
130 | "value": "",
131 | "required": false
132 | },
133 | "WHOOGLE_CONFIG_THEME": {
134 | "description": "[CONFIG] Set theme to 'dark', 'light', or 'system'",
135 | "value": "system",
136 | "required": false
137 | },
138 | "WHOOGLE_CONFIG_SAFE": {
139 | "description": "[CONFIG] Use safe mode for searches (set to 1 or leave blank)",
140 | "value": "",
141 | "required": false
142 | },
143 | "WHOOGLE_CONFIG_ALTS": {
144 | "description": "[CONFIG] Use social media alternatives (set to 1 or leave blank)",
145 | "value": "",
146 | "required": false
147 | },
148 | "WHOOGLE_CONFIG_NEAR": {
149 | "description": "[CONFIG] Restrict results to only those near a particular city",
150 | "value": "",
151 | "required": false
152 | },
153 | "WHOOGLE_CONFIG_TOR": {
154 | "description": "[CONFIG] Use Tor, if available (set to 1 or leave blank)",
155 | "value": "",
156 | "required": false
157 | },
158 | "WHOOGLE_CONFIG_NEW_TAB": {
159 | "description": "[CONFIG] Always open results in new tab (set to 1 or leave blank)",
160 | "value": "",
161 | "required": false
162 | },
163 | "WHOOGLE_CONFIG_VIEW_IMAGE": {
164 | "description": "[CONFIG] Enable View Image option (set to 1 or leave blank)",
165 | "value": "",
166 | "required": false
167 | },
168 | "WHOOGLE_CONFIG_GET_ONLY": {
169 | "description": "[CONFIG] Search using GET requests only (set to 1 or leave blank)",
170 | "value": "",
171 | "required": false
172 | },
173 | "WHOOGLE_CONFIG_STYLE": {
174 | "description": "[CONFIG] Custom CSS styling (paste in CSS or leave blank)",
175 | "value": ":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }",
176 | "required": false
177 | },
178 | "WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED": {
179 | "description": "[CONFIG] Encrypt preferences token, requires WHOOGLE_CONFIG_PREFERENCES_KEY to be set",
180 | "value": "",
181 | "required": false
182 | },
183 | "WHOOGLE_CONFIG_PREFERENCES_KEY": {
184 | "description": "[CONFIG] Key to encrypt preferences",
185 | "value": "NEEDS_TO_BE_MODIFIED",
186 | "required": false
187 | }
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/app/models/config.py:
--------------------------------------------------------------------------------
1 | from inspect import Attribute
2 | from app.utils.misc import read_config_bool
3 | from flask import current_app
4 | import os
5 | import re
6 | from base64 import urlsafe_b64encode, urlsafe_b64decode
7 | import pickle
8 | from cryptography.fernet import Fernet
9 | import hashlib
10 | import brotli
11 |
12 |
13 | class Config:
14 | def __init__(self, **kwargs):
15 | app_config = current_app.config
16 | self.url = os.getenv('WHOOGLE_CONFIG_URL', '')
17 | self.lang_search = os.getenv('WHOOGLE_CONFIG_SEARCH_LANGUAGE', '')
18 | self.lang_interface = os.getenv('WHOOGLE_CONFIG_LANGUAGE', '')
19 | self.style = os.getenv(
20 | 'WHOOGLE_CONFIG_STYLE',
21 | open(os.path.join(app_config['STATIC_FOLDER'],
22 | 'css/variables.css')).read())
23 | self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
24 | self.block_title = os.getenv('WHOOGLE_CONFIG_BLOCK_TITLE', '')
25 | self.block_url = os.getenv('WHOOGLE_CONFIG_BLOCK_URL', '')
26 | self.country = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
27 | self.theme = os.getenv('WHOOGLE_CONFIG_THEME', 'system')
28 | self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
29 | self.dark = read_config_bool('WHOOGLE_CONFIG_DARK') # deprecated
30 | self.alts = read_config_bool('WHOOGLE_CONFIG_ALTS')
31 | self.nojs = read_config_bool('WHOOGLE_CONFIG_NOJS')
32 | self.tor = read_config_bool('WHOOGLE_CONFIG_TOR')
33 | self.near = os.getenv('WHOOGLE_CONFIG_NEAR', '')
34 | self.new_tab = read_config_bool('WHOOGLE_CONFIG_NEW_TAB')
35 | self.view_image = read_config_bool('WHOOGLE_CONFIG_VIEW_IMAGE')
36 | self.get_only = read_config_bool('WHOOGLE_CONFIG_GET_ONLY')
37 | self.anon_view = read_config_bool('WHOOGLE_CONFIG_ANON_VIEW')
38 | self.preferences_encrypted = read_config_bool('WHOOGLE_CONFIG_PREFERENCES_ENCRYPTED')
39 | self.preferences_key = os.getenv('WHOOGLE_CONFIG_PREFERENCES_KEY', '')
40 |
41 | self.accept_language = False
42 |
43 | self.safe_keys = [
44 | 'lang_search',
45 | 'lang_interface',
46 | 'country',
47 | 'theme',
48 | 'alts',
49 | 'new_tab',
50 | 'view_image',
51 | 'block',
52 | 'safe',
53 | 'nojs',
54 | 'anon_view',
55 | 'preferences_encrypted'
56 | ]
57 |
58 | # Skip setting custom config if there isn't one
59 | if kwargs:
60 | mutable_attrs = self.get_mutable_attrs()
61 | for attr in mutable_attrs:
62 | if attr in kwargs.keys():
63 | setattr(self, attr, kwargs[attr])
64 | elif attr not in kwargs.keys() and mutable_attrs[attr] == bool:
65 | setattr(self, attr, False)
66 |
67 | def __getitem__(self, name):
68 | return getattr(self, name)
69 |
70 | def __setitem__(self, name, value):
71 | return setattr(self, name, value)
72 |
73 | def __delitem__(self, name):
74 | return delattr(self, name)
75 |
76 | def __contains__(self, name):
77 | return hasattr(self, name)
78 |
79 | def get_mutable_attrs(self):
80 | return {name: type(attr) for name, attr in self.__dict__.items()
81 | if not name.startswith("__")
82 | and (type(attr) is bool or type(attr) is str)}
83 |
84 | def get_attrs(self):
85 | return {name: attr for name, attr in self.__dict__.items()
86 | if not name.startswith("__")
87 | and (type(attr) is bool or type(attr) is str)}
88 |
89 | @property
90 | def preferences(self) -> str:
91 | # if encryption key is not set will uncheck preferences encryption
92 | if self.preferences_encrypted:
93 | self.preferences_encrypted = bool(self.preferences_key)
94 |
95 | # add a tag for visibility if preferences token startswith 'e' it means
96 | # the token is encrypted, 'u' means the token is unencrypted and can be
97 | # used by other whoogle instances
98 | encrypted_flag = "e" if self.preferences_encrypted else 'u'
99 | preferences_digest = self._encode_preferences()
100 | return f"{encrypted_flag}{preferences_digest}"
101 |
102 | def is_safe_key(self, key) -> bool:
103 | """Establishes a group of config options that are safe to set
104 | in the url.
105 |
106 | Args:
107 | key (str) -- the key to check against
108 |
109 | Returns:
110 | bool -- True/False depending on if the key is in the "safe"
111 | array
112 | """
113 |
114 | return key in self.safe_keys
115 |
116 | def get_localization_lang(self):
117 | """Returns the correct language to use for localization, but falls
118 | back to english if not set.
119 |
120 | Returns:
121 | str -- the localization language string
122 | """
123 | if (self.lang_interface and
124 | self.lang_interface in current_app.config['TRANSLATIONS']):
125 | return self.lang_interface
126 |
127 | return 'lang_en'
128 |
129 | def from_params(self, params) -> 'Config':
130 | """Modify user config with search parameters. This is primarily
131 | used for specifying configuration on a search-by-search basis on
132 | public instances.
133 |
134 | Args:
135 | params -- the url arguments (can be any deemed safe by is_safe())
136 |
137 | Returns:
138 | Config -- a modified config object
139 | """
140 | if 'preferences' in params:
141 | params_new = self._decode_preferences(params['preferences'])
142 | # if preferences leads to an empty dictionary it means preferences
143 | # parameter was not decrypted successfully
144 | if len(params_new):
145 | params = params_new
146 |
147 | for param_key in params.keys():
148 | if not self.is_safe_key(param_key):
149 | continue
150 | param_val = params.get(param_key)
151 |
152 | if param_val == 'off':
153 | param_val = False
154 | elif isinstance(param_val, str):
155 | if param_val.isdigit():
156 | param_val = int(param_val)
157 |
158 | self[param_key] = param_val
159 | return self
160 |
161 | def to_params(self, keys: list = []) -> str:
162 | """Generates a set of safe params for using in Whoogle URLs
163 |
164 | Args:
165 | keys (list) -- optional list of keys of URL parameters
166 |
167 | Returns:
168 | str -- a set of URL parameters
169 | """
170 | if not len(keys):
171 | keys = self.safe_keys
172 |
173 | param_str = ''
174 | for safe_key in keys:
175 | if not self[safe_key]:
176 | continue
177 | param_str = param_str + f'&{safe_key}={self[safe_key]}'
178 |
179 | return param_str
180 |
181 | def _get_fernet_key(self, password: str) -> bytes:
182 | hash_object = hashlib.md5(password.encode())
183 | key = urlsafe_b64encode(hash_object.hexdigest().encode())
184 | return key
185 |
186 | def _encode_preferences(self) -> str:
187 | encoded_preferences = brotli.compress(pickle.dumps(self.get_attrs()))
188 | if self.preferences_encrypted:
189 | if self.preferences_key != '':
190 | key = self._get_fernet_key(self.preferences_key)
191 | encoded_preferences = Fernet(key).encrypt(encoded_preferences)
192 | encoded_preferences = brotli.compress(encoded_preferences)
193 |
194 | return urlsafe_b64encode(encoded_preferences).decode()
195 |
196 | def _decode_preferences(self, preferences: str) -> dict:
197 | mode = preferences[0]
198 | preferences = preferences[1:]
199 | if mode == 'e': # preferences are encrypted
200 | try:
201 | key = self._get_fernet_key(self.preferences_key)
202 |
203 | config = Fernet(key).decrypt(
204 | brotli.decompress(urlsafe_b64decode(preferences.encode()))
205 | )
206 |
207 | config = pickle.loads(brotli.decompress(config))
208 | except Exception:
209 | config = {}
210 | elif mode == 'u': # preferences are not encrypted
211 | config = pickle.loads(
212 | brotli.decompress(urlsafe_b64decode(preferences.encode()))
213 | )
214 | else: # preferences are incorrectly formatted
215 | config = {}
216 | return config
217 |
--------------------------------------------------------------------------------
/app/templates/imageresults.html:
--------------------------------------------------------------------------------
1 |
2 |
322 |
323 |
332 |
333 | {% for i in range((length // 4) + 1) %}
334 |
335 | {% for j in range([length - (i*4), 4]|min) %}
336 | |
337 |
380 | |
381 | {% endfor %}
382 |
383 | {% endfor %}
384 |
385 |
386 |
389 |
390 |
391 |
--------------------------------------------------------------------------------
/app/static/settings/countries.json:
--------------------------------------------------------------------------------
1 | [
2 | {"name": "-------", "value": ""},
3 | {"name": "Afghanistan", "value": "AF"},
4 | {"name": "Albania", "value": "AL"},
5 | {"name": "Algeria", "value": "DZ"},
6 | {"name": "American Samoa", "value": "AS"},
7 | {"name": "Andorra", "value": "AD"},
8 | {"name": "Angola", "value": "AO"},
9 | {"name": "Anguilla", "value": "AI"},
10 | {"name": "Antarctica", "value": "AQ"},
11 | {"name": "Antigua and Barbuda", "value": "AG"},
12 | {"name": "Argentina", "value": "AR"},
13 | {"name": "Armenia", "value": "AM"},
14 | {"name": "Aruba", "value": "AW"},
15 | {"name": "Australia", "value": "AU"},
16 | {"name": "Austria", "value": "AT"},
17 | {"name": "Azerbaijan", "value": "AZ"},
18 | {"name": "Bahamas", "value": "BS"},
19 | {"name": "Bahrain", "value": "BH"},
20 | {"name": "Bangladesh", "value": "BD"},
21 | {"name": "Barbados", "value": "BB"},
22 | {"name": "Belarus", "value": "BY"},
23 | {"name": "Belgium", "value": "BE"},
24 | {"name": "Belize", "value": "BZ"},
25 | {"name": "Benin", "value": "BJ"},
26 | {"name": "Bermuda", "value": "BM"},
27 | {"name": "Bhutan", "value": "BT"},
28 | {"name": "Bolivia", "value": "BO"},
29 | {"name": "Bosnia and Herzegovina", "value": "BA"},
30 | {"name": "Botswana", "value": "BW"},
31 | {"name": "Bouvet Island", "value": "BV"},
32 | {"name": "Brazil", "value": "BR"},
33 | {"name": "British Indian Ocean Territory", "value": "IO"},
34 | {"name": "Brunei Darussalam", "value": "BN"},
35 | {"name": "Bulgaria", "value": "BG"},
36 | {"name": "Burkina Faso", "value": "BF"},
37 | {"name": "Burundi", "value": "BI"},
38 | {"name": "Cambodia", "value": "KH"},
39 | {"name": "Cameroon", "value": "CM"},
40 | {"name": "Canada", "value": "CA"},
41 | {"name": "Cape Verde", "value": "CV"},
42 | {"name": "Cayman Islands", "value": "KY"},
43 | {"name": "Central African Republic", "value": "CF"},
44 | {"name": "Chad", "value": "TD"},
45 | {"name": "Chile", "value": "CL"},
46 | {"name": "China", "value": "CN"},
47 | {"name": "Christmas Island", "value": "CX"},
48 | {"name": "Cocos (Keeling) Islands", "value": "CC"},
49 | {"name": "Colombia", "value": "CO"},
50 | {"name": "Comoros", "value": "KM"},
51 | {"name": "Congo", "value": "CG"},
52 | {"name": "Congo, Democratic Republic of the", "value": "CD"},
53 | {"name": "Cook Islands", "value": "CK"},
54 | {"name": "Costa Rica", "value": "CR"},
55 | {"name": "Cote D'ivoire", "value": "CI"},
56 | {"name": "Croatia (Hrvatska)", "value": "HR"},
57 | {"name": "Cuba", "value": "CU"},
58 | {"name": "Cyprus", "value": "CY"},
59 | {"name": "Czech Republic", "value": "CZ"},
60 | {"name": "Denmark", "value": "DK"},
61 | {"name": "Djibouti", "value": "DJ"},
62 | {"name": "Dominica", "value": "DM"},
63 | {"name": "Dominican Republic", "value": "DO"},
64 | {"name": "East Timor", "value": "TP"},
65 | {"name": "Ecuador", "value": "EC"},
66 | {"name": "Egypt", "value": "EG"},
67 | {"name": "El Salvador", "value": "SV"},
68 | {"name": "Equatorial Guinea", "value": "GQ"},
69 | {"name": "Eritrea", "value": "ER"},
70 | {"name": "Estonia", "value": "EE"},
71 | {"name": "Ethiopia", "value": "ET"},
72 | {"name": "European Union", "value": "EU"},
73 | {"name": "Falkland Islands (Malvinas)", "value": "FK"},
74 | {"name": "Faroe Islands", "value": "FO"},
75 | {"name": "Fiji", "value": "FJ"},
76 | {"name": "Finland", "value": "FI"},
77 | {"name": "France", "value": "FR"},
78 | {"name": "France, Metropolitan", "value": "FX"},
79 | {"name": "French Guiana", "value": "GF"},
80 | {"name": "French Polynesia", "value": "PF"},
81 | {"name": "French Southern Territories", "value": "TF"},
82 | {"name": "Gabon", "value": "GA"},
83 | {"name": "Gambia", "value": "GM"},
84 | {"name": "Georgia", "value": "GE"},
85 | {"name": "Germany", "value": "DE"},
86 | {"name": "Ghana", "value": "GH"},
87 | {"name": "Gibraltar", "value": "GI"},
88 | {"name": "Greece", "value": "GR"},
89 | {"name": "Greenland", "value": "GL"},
90 | {"name": "Grenada", "value": "GD"},
91 | {"name": "Guadeloupe", "value": "GP"},
92 | {"name": "Guam", "value": "GU"},
93 | {"name": "Guatemala", "value": "GT"},
94 | {"name": "Guinea", "value": "GN"},
95 | {"name": "Guinea-Bissau", "value": "GW"},
96 | {"name": "Guyana", "value": "GY"},
97 | {"name": "Haiti", "value": "HT"},
98 | {"name": "Heard Island and Mcdonald Islands", "value": "HM"},
99 | {"name": "Holy See (Vatican City State)", "value": "VA"},
100 | {"name": "Honduras", "value": "HN"},
101 | {"name": "Hong Kong", "value": "HK"},
102 | {"name": "Hungary", "value": "HU"},
103 | {"name": "Iceland", "value": "IS"},
104 | {"name": "India", "value": "IN"},
105 | {"name": "Indonesia", "value": "ID"},
106 | {"name": "Iran, Islamic Republic of", "value": "IR"},
107 | {"name": "Iraq", "value": "IQ"},
108 | {"name": "Ireland", "value": "IE"},
109 | {"name": "Israel", "value": "IL"},
110 | {"name": "Italy", "value": "IT"},
111 | {"name": "Jamaica", "value": "JM"},
112 | {"name": "Japan", "value": "JP"},
113 | {"name": "Jordan", "value": "JO"},
114 | {"name": "Kazakhstan", "value": "KZ"},
115 | {"name": "Kenya", "value": "KE"},
116 | {"name": "Kiribati", "value": "KI"},
117 | {"name": "Korea, Democratic People's Republic of", "value": "KP"},
118 | {"name": "Korea, Republic of", "value": "KR"},
119 | {"name": "Kuwait", "value": "KW"},
120 | {"name": "Kyrgyzstan", "value": "KG"},
121 | {"name": "Lao People's Democratic Republic", "value": "LA"},
122 | {"name": "Latvia", "value": "LV"},
123 | {"name": "Lebanon", "value": "LB"},
124 | {"name": "Lesotho", "value": "LS"},
125 | {"name": "Liberia", "value": "LR"},
126 | {"name": "Libyan Arab Jamahiriya", "value": "LY"},
127 | {"name": "Liechtenstein", "value": "LI"},
128 | {"name": "Lithuania", "value": "LT"},
129 | {"name": "Luxembourg", "value": "LU"},
130 | {"name": "Macao", "value": "MO"},
131 | {"name": "Macedonia, the Former Yugosalv Republic of",
132 | "value": "MK"},
133 | {"name": "Madagascar", "value": "MG"},
134 | {"name": "Malawi", "value": "MW"},
135 | {"name": "Malaysia", "value": "MY"},
136 | {"name": "Maldives", "value": "MV"},
137 | {"name": "Mali", "value": "ML"},
138 | {"name": "Malta", "value": "MT"},
139 | {"name": "Marshall Islands", "value": "MH"},
140 | {"name": "Martinique", "value": "MQ"},
141 | {"name": "Mauritania", "value": "MR"},
142 | {"name": "Mauritius", "value": "MU"},
143 | {"name": "Mayotte", "value": "YT"},
144 | {"name": "Mexico", "value": "MX"},
145 | {"name": "Micronesia, Federated States of", "value": "FM"},
146 | {"name": "Moldova, Republic of", "value": "MD"},
147 | {"name": "Monaco", "value": "MC"},
148 | {"name": "Mongolia", "value": "MN"},
149 | {"name": "Montserrat", "value": "MS"},
150 | {"name": "Morocco", "value": "MA"},
151 | {"name": "Mozambique", "value": "MZ"},
152 | {"name": "Myanmar", "value": "MM"},
153 | {"name": "Namibia", "value": "NA"},
154 | {"name": "Nauru", "value": "NR"},
155 | {"name": "Nepal", "value": "NP"},
156 | {"name": "Netherlands", "value": "NL"},
157 | {"name": "Netherlands Antilles", "value": "AN"},
158 | {"name": "New Caledonia", "value": "NC"},
159 | {"name": "New Zealand", "value": "NZ"},
160 | {"name": "Nicaragua", "value": "NI"},
161 | {"name": "Niger", "value": "NE"},
162 | {"name": "Nigeria", "value": "NG"},
163 | {"name": "Niue", "value": "NU"},
164 | {"name": "Norfolk Island", "value": "NF"},
165 | {"name": "Northern Mariana Islands", "value": "MP"},
166 | {"name": "Norway", "value": "NO"},
167 | {"name": "Oman", "value": "OM"},
168 | {"name": "Pakistan", "value": "PK"},
169 | {"name": "Palau", "value": "PW"},
170 | {"name": "Palestinian Territory", "value": "PS"},
171 | {"name": "Panama", "value": "PA"},
172 | {"name": "Papua New Guinea", "value": "PG"},
173 | {"name": "Paraguay", "value": "PY"},
174 | {"name": "Peru", "value": "PE"},
175 | {"name": "Philippines", "value": "PH"},
176 | {"name": "Pitcairn", "value": "PN"},
177 | {"name": "Poland", "value": "PL"},
178 | {"name": "Portugal", "value": "PT"},
179 | {"name": "Puerto Rico", "value": "PR"},
180 | {"name": "Qatar", "value": "QA"},
181 | {"name": "Reunion", "value": "RE"},
182 | {"name": "Romania", "value": "RO"},
183 | {"name": "Russian Federation", "value": "RU"},
184 | {"name": "Rwanda", "value": "RW"},
185 | {"name": "Saint Helena", "value": "SH"},
186 | {"name": "Saint Kitts and Nevis", "value": "KN"},
187 | {"name": "Saint Lucia", "value": "LC"},
188 | {"name": "Saint Pierre and Miquelon", "value": "PM"},
189 | {"name": "Saint Vincent and the Grenadines", "value": "VC"},
190 | {"name": "Samoa", "value": "WS"},
191 | {"name": "San Marino", "value": "SM"},
192 | {"name": "Sao Tome and Principe", "value": "ST"},
193 | {"name": "Saudi Arabia", "value": "SA"},
194 | {"name": "Senegal", "value": "SN"},
195 | {"name": "Serbia and Montenegro", "value": "CS"},
196 | {"name": "Seychelles", "value": "SC"},
197 | {"name": "Sierra Leone", "value": "SL"},
198 | {"name": "Singapore", "value": "SG"},
199 | {"name": "Slovakia", "value": "SK"},
200 | {"name": "Slovenia", "value": "SI"},
201 | {"name": "Solomon Islands", "value": "SB"},
202 | {"name": "Somalia", "value": "SO"},
203 | {"name": "South Africa", "value": "ZA"},
204 | {"name": "South Georgia and the South Sandwich Islands",
205 | "value": "GS"},
206 | {"name": "Spain", "value": "ES"},
207 | {"name": "Sri Lanka", "value": "LK"},
208 | {"name": "Sudan", "value": "SD"},
209 | {"name": "Suriname", "value": "SR"},
210 | {"name": "Svalbard and Jan Mayen", "value": "SJ"},
211 | {"name": "Swaziland", "value": "SZ"},
212 | {"name": "Sweden", "value": "SE"},
213 | {"name": "Switzerland", "value": "CH"},
214 | {"name": "Syrian Arab Republic", "value": "SY"},
215 | {"name": "Taiwan", "value": "TW"},
216 | {"name": "Tajikistan", "value": "TJ"},
217 | {"name": "Tanzania, United Republic of", "value": "TZ"},
218 | {"name": "Thailand", "value": "TH"},
219 | {"name": "Togo", "value": "TG"},
220 | {"name": "Tokelau", "value": "TK"},
221 | {"name": "Tonga", "value": "TO"},
222 | {"name": "Trinidad and Tobago", "value": "TT"},
223 | {"name": "Tunisia", "value": "TN"},
224 | {"name": "Turkey", "value": "TR"},
225 | {"name": "Turkmenistan", "value": "TM"},
226 | {"name": "Turks and Caicos Islands", "value": "TC"},
227 | {"name": "Tuvalu", "value": "TV"},
228 | {"name": "Uganda", "value": "UG"},
229 | {"name": "Ukraine", "value": "UA"},
230 | {"name": "United Arab Emirates", "value": "AE"},
231 | {"name": "United Kingdom", "value": "UK"},
232 | {"name": "United States", "value": "US"},
233 | {"name": "United States Minor Outlying Islands", "value": "UM"},
234 | {"name": "Uruguay", "value": "UY"},
235 | {"name": "Uzbekistan", "value": "UZ"},
236 | {"name": "Vanuatu", "value": "VU"},
237 | {"name": "Venezuela", "value": "VE"},
238 | {"name": "Vietnam", "value": "VN"},
239 | {"name": "Virgin Islands, British", "value": "VG"},
240 | {"name": "Virgin Islands, U.S.", "value": "VI"},
241 | {"name": "Wallis and Futuna", "value": "WF"},
242 | {"name": "Western Sahara", "value": "EH"},
243 | {"name": "Yemen", "value": "YE"},
244 | {"name": "Yugoslavia", "value": "YU"},
245 | {"name": "Zambia", "value": "ZM"},
246 | {"name": "Zimbabwe", "value": "ZW"}
247 | ]
248 |
--------------------------------------------------------------------------------
/app/request.py:
--------------------------------------------------------------------------------
1 | from app.models.config import Config
2 | from app.utils.misc import read_config_bool
3 | from datetime import datetime
4 | from defusedxml import ElementTree as ET
5 | import random
6 | import requests
7 | from requests import Response, ConnectionError
8 | import urllib.parse as urlparse
9 | import os
10 | from stem import Signal, SocketError
11 | from stem.connection import AuthenticationFailure
12 | from stem.control import Controller
13 | from stem.connection import authenticate_cookie, authenticate_password
14 |
15 | MAPS_URL = 'https://maps.google.com/maps'
16 | AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/'
17 | 'complete/search?client=toolbar&')
18 |
19 | MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
20 | DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
21 |
22 | # Valid query params
23 | VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr']
24 |
25 |
26 | class TorError(Exception):
27 | """Exception raised for errors in Tor requests.
28 |
29 | Attributes:
30 | message: a message describing the error that occurred
31 | disable: optionally disables Tor in the user config (note:
32 | this should only happen if the connection has been dropped
33 | altogether).
34 | """
35 |
36 | def __init__(self, message, disable=False) -> None:
37 | self.message = message
38 | self.disable = disable
39 | super().__init__(message)
40 |
41 |
42 | def send_tor_signal(signal: Signal) -> bool:
43 | use_pass = read_config_bool('WHOOGLE_TOR_USE_PASS')
44 |
45 | confloc = './misc/tor/control.conf'
46 | # Check that the custom location of conf is real.
47 | temp = os.getenv('WHOOGLE_TOR_CONF', '')
48 | if os.path.isfile(temp):
49 | confloc = temp
50 |
51 | # Attempt to authenticate and send signal.
52 | try:
53 | with Controller.from_port(port=9051) as c:
54 | if use_pass:
55 | with open(confloc, "r") as conf:
56 | # Scan for the last line of the file.
57 | for line in conf:
58 | pass
59 | secret = line.strip('\n')
60 | authenticate_password(c, password=secret)
61 | else:
62 | cookie_path = '/var/lib/tor/control_auth_cookie'
63 | authenticate_cookie(c, cookie_path=cookie_path)
64 | c.signal(signal)
65 | os.environ['TOR_AVAILABLE'] = '1'
66 | return True
67 | except (SocketError, AuthenticationFailure,
68 | ConnectionRefusedError, ConnectionError):
69 | # TODO: Handle Tor authentication (password and cookie)
70 | os.environ['TOR_AVAILABLE'] = '0'
71 |
72 | return False
73 |
74 |
75 | def gen_user_agent(is_mobile) -> str:
76 | firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
77 | linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
78 |
79 | if is_mobile:
80 | return MOBILE_UA.format("Mozilla", firefox)
81 |
82 | return DESKTOP_UA.format("Mozilla", linux, firefox)
83 |
84 |
85 | def gen_query(query, args, config) -> str:
86 | param_dict = {key: '' for key in VALID_PARAMS}
87 |
88 | # Use :past(hour/day/week/month/year) if available
89 | # example search "new restaurants :past month"
90 | lang = ''
91 | if ':past' in query and 'tbs' not in args:
92 | time_range = str.strip(query.split(':past', 1)[-1])
93 | param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0]))
94 | elif 'tbs' in args:
95 | result_tbs = args.get('tbs')
96 | param_dict['tbs'] = '&tbs=' + result_tbs
97 |
98 | # Occasionally the 'tbs' param provided by google also contains a
99 | # field for 'lr', but formatted strangely. This is a rough solution
100 | # for this.
101 | #
102 | # Example:
103 | # &tbs=qdr:h,lr:lang_1pl
104 | # -- the lr param needs to be extracted and remove the leading '1'
105 | result_params = [_ for _ in result_tbs.split(',') if 'lr:' in _]
106 | if len(result_params) > 0:
107 | result_param = result_params[0]
108 | lang = result_param[result_param.find('lr:') + 3:len(result_param)]
109 |
110 | # Ensure search query is parsable
111 | query = urlparse.quote(query)
112 |
113 | # Pass along type of results (news, images, books, etc)
114 | if 'tbm' in args:
115 | param_dict['tbm'] = '&tbm=' + args.get('tbm')
116 |
117 | # Get results page start value (10 per page, ie page 2 start val = 20)
118 | if 'start' in args:
119 | param_dict['start'] = '&start=' + args.get('start')
120 |
121 | # Search for results near a particular city, if available
122 | if config.near:
123 | param_dict['near'] = '&near=' + urlparse.quote(config.near)
124 |
125 | # Set language for results (lr) if source isn't set, otherwise use the
126 | # result language param provided in the results
127 | if 'source' in args:
128 | param_dict['source'] = '&source=' + args.get('source')
129 | param_dict['lr'] = ('&lr=' + ''.join(
130 | [_ for _ in lang if not _.isdigit()]
131 | )) if lang else ''
132 | else:
133 | param_dict['lr'] = (
134 | '&lr=' + config.lang_search
135 | ) if config.lang_search else ''
136 |
137 | # 'nfpr' defines the exclusion of results from an auto-corrected query
138 | if 'nfpr' in args:
139 | param_dict['nfpr'] = '&nfpr=' + args.get('nfpr')
140 |
141 | # 'chips' is used in image tabs to pass the optional 'filter' to add to the
142 | # given search term
143 | if 'chips' in args:
144 | param_dict['chips'] = '&chips=' + args.get('chips')
145 |
146 | param_dict['gl'] = (
147 | '&gl=' + config.country
148 | ) if config.country else ''
149 | param_dict['hl'] = (
150 | '&hl=' + config.lang_interface.replace('lang_', '')
151 | ) if config.lang_interface else ''
152 | param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
153 |
154 | # Block all sites specified in the user config
155 | unquoted_query = urlparse.unquote(query)
156 | for blocked_site in config.block.replace(' ', '').split(','):
157 | if not blocked_site:
158 | continue
159 | block = (' -site:' + blocked_site)
160 | query += block if block not in unquoted_query else ''
161 |
162 | for val in param_dict.values():
163 | if not val:
164 | continue
165 | query += val
166 |
167 | return query
168 |
169 |
170 | class Request:
171 | """Class used for handling all outbound requests, including search queries,
172 | search suggestions, and loading of external content (images, audio, etc).
173 |
174 | Attributes:
175 | normal_ua: the user's current user agent
176 | root_path: the root path of the whoogle instance
177 | config: the user's current whoogle configuration
178 | """
179 |
180 | def __init__(self, normal_ua, root_path, config: Config):
181 | self.search_url = 'https://www.google.com/search?gbv=1&num=' + str(
182 | os.getenv('WHOOGLE_RESULTS_PER_PAGE', 10)) + '&q='
183 | # Send heartbeat to Tor, used in determining if the user can or cannot
184 | # enable Tor for future requests
185 | send_tor_signal(Signal.HEARTBEAT)
186 |
187 | self.language = (
188 | config.lang_search if config.lang_search else ''
189 | )
190 |
191 | self.country = config.country if config.country else ''
192 |
193 | # For setting Accept-language Header
194 | self.lang_interface = ''
195 | if config.accept_language:
196 | self.lang_interface = config.lang_interface
197 |
198 | self.mobile = bool(normal_ua) and ('Android' in normal_ua
199 | or 'iPhone' in normal_ua)
200 | self.modified_user_agent = gen_user_agent(self.mobile)
201 | if not self.mobile:
202 | self.modified_user_agent_mobile = gen_user_agent(True)
203 |
204 | # Set up proxy, if previously configured
205 | proxy_path = os.environ.get('WHOOGLE_PROXY_LOC', '')
206 | if proxy_path:
207 | proxy_type = os.environ.get('WHOOGLE_PROXY_TYPE', '')
208 | proxy_user = os.environ.get('WHOOGLE_PROXY_USER', '')
209 | proxy_pass = os.environ.get('WHOOGLE_PROXY_PASS', '')
210 | auth_str = ''
211 | if proxy_user:
212 | auth_str = proxy_user + ':' + proxy_pass
213 | self.proxies = {
214 | 'https': proxy_type + '://' +
215 | ((auth_str + '@') if auth_str else '') + proxy_path,
216 | }
217 |
218 | # Need to ensure both HTTP and HTTPS are in the proxy dict,
219 | # regardless of underlying protocol
220 | if proxy_type == 'https':
221 | self.proxies['http'] = self.proxies['https'].replace(
222 | 'https', 'http')
223 | else:
224 | self.proxies['http'] = self.proxies['https']
225 | else:
226 | self.proxies = {
227 | 'http': 'socks5://127.0.0.1:9050',
228 | 'https': 'socks5://127.0.0.1:9050'
229 | } if config.tor else {}
230 | self.tor = config.tor
231 | self.tor_valid = False
232 | self.root_path = root_path
233 |
234 | def __getitem__(self, name):
235 | return getattr(self, name)
236 |
237 | def autocomplete(self, query) -> list:
238 | """Sends a query to Google's search suggestion service
239 |
240 | Args:
241 | query: The in-progress query to send
242 |
243 | Returns:
244 | list: The list of matches for possible search suggestions
245 |
246 | """
247 | ac_query = dict(q=query)
248 | if self.language:
249 | ac_query['lr'] = self.language
250 | if self.country:
251 | ac_query['gl'] = self.country
252 | if self.lang_interface:
253 | ac_query['hl'] = self.lang_interface
254 |
255 | response = self.send(base_url=AUTOCOMPLETE_URL,
256 | query=urlparse.urlencode(ac_query)).text
257 |
258 | if not response:
259 | return []
260 |
261 | try:
262 | root = ET.fromstring(response)
263 | return [_.attrib['data'] for _ in
264 | root.findall('.//suggestion/[@data]')]
265 | except ET.ParseError:
266 | # Malformed XML response
267 | return []
268 |
269 | def send(self, base_url='', query='', attempt=0,
270 | force_mobile=False) -> Response:
271 | """Sends an outbound request to a URL. Optionally sends the request
272 | using Tor, if enabled by the user.
273 |
274 | Args:
275 | base_url: The URL to use in the request
276 | query: The optional query string for the request
277 | attempt: The number of attempts made for the request
278 | (used for cycling through Tor identities, if enabled)
279 | force_mobile: Optional flag to enable a mobile user agent
280 | (used for fetching full size images in search results)
281 |
282 | Returns:
283 | Response: The Response object returned by the requests call
284 |
285 | """
286 | if force_mobile and not self.mobile:
287 | modified_user_agent = self.modified_user_agent_mobile
288 | else:
289 | modified_user_agent = self.modified_user_agent
290 |
291 | headers = {
292 | 'User-Agent': modified_user_agent
293 | }
294 |
295 | # Adding the Accept-Language to the Header if possible
296 | if self.lang_interface:
297 | headers.update({'Accept-Language':
298 | self.lang_interface.replace('lang_', '')
299 | + ';q=1.0'})
300 |
301 | # view is suppressed correctly
302 | now = datetime.now()
303 | cookies = {
304 | 'CONSENT': 'YES+cb.{:d}{:02d}{:02d}-17-p0.de+F+678'.format(
305 | now.year, now.month, now.day
306 | )
307 | }
308 |
309 | # Validate Tor conn and request new identity if the last one failed
310 | if self.tor and not send_tor_signal(
311 | Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT):
312 | raise TorError(
313 | "Tor was previously enabled, but the connection has been "
314 | "dropped. Please check your Tor configuration and try again.",
315 | disable=True)
316 |
317 | # Make sure that the tor connection is valid, if enabled
318 | if self.tor:
319 | try:
320 | tor_check = requests.get('https://check.torproject.org/',
321 | proxies=self.proxies, headers=headers)
322 | self.tor_valid = 'Congratulations' in tor_check.text
323 |
324 | if not self.tor_valid:
325 | raise TorError(
326 | "Tor connection succeeded, but the connection could "
327 | "not be validated by torproject.org",
328 | disable=True)
329 | except ConnectionError:
330 | raise TorError(
331 | "Error raised during Tor connection validation",
332 | disable=True)
333 |
334 | response = requests.get(
335 | (base_url or self.search_url) + query,
336 | proxies=self.proxies,
337 | headers=headers,
338 | cookies=cookies)
339 |
340 | # Retry query with new identity if using Tor (max 10 attempts)
341 | if 'form id="captcha-form"' in response.text and self.tor:
342 | attempt += 1
343 | if attempt > 10:
344 | raise TorError("Tor query failed -- max attempts exceeded 10")
345 | return self.send((base_url or self.search_url), query, attempt)
346 |
347 | return response
348 |
--------------------------------------------------------------------------------