├── .gitignore
├── LICENSE
├── README.md
├── hackq_trivia
├── __init__.py
├── bearer_finder.py
├── config.py
├── hq_config.conf
├── hq_main.py
├── live_show.py
├── logging_config.json
├── question_handler.py
└── searcher.py
├── requirements.txt
├── resources
├── 1.png
└── hackq.png
└── tests
├── __init__.py
├── test_logger.py
├── test_question_handler.py
├── test_question_handler_answers.py
└── test_searcher.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | *.log
104 | .idea/
105 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Kevin Wu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | HackQ-Trivia is a Python HQ Trivia bot.
6 | It receives HQ Trivia questions through their WebSocket
7 | connection and answers the questions automatically.
8 |
9 | ## Installation
10 |
11 | Requires Python 3.7 or above.
12 |
13 | ### Code and dependencies
14 |
15 | ```console
16 | $ git clone https://github.com/Exaphis/HackQ-Trivia.git
17 | $ cd HackQ-Trivia
18 | $ pip install -r requirements.txt
19 | ```
20 |
21 | ### Bearer token
22 |
23 | The easiest way to find your bearer token is to run `bearer_finder.py`.
24 |
25 | Make sure you are in the `HackQ-Trivia` folder, not `hackq_trivia`.
26 |
27 | ```console
28 | $ python3 -m hackq_trivia.bearer_finder
29 | ```
30 |
31 | Alternatively, it can be found by sniffing the traffic
32 | on your phone. The bearer token is easily found on an
33 | emulator, since they are easy to root and most use Android
34 | versions without certificate pinning. Popular tools used
35 | to obtain bearer tokens are Charles, Fiddler, and Burp Suite.
36 |
37 | Paste your bearer token after `Bearer` in `hq_config.conf`,
38 | all within one line.
39 |
40 | ### Search APIs
41 |
42 | HackQ-Trivia can utilize either Google or Bing search APIs.
43 |
44 | The search settings are under the `[SEARCH]` section in `hq_config.conf`.
45 |
46 | * To use the Google Custom Search Engine API, set `Service = Google`.
47 | * To use the Bing Web Search API, set `Service = Bing`.
48 |
49 | ### Google Search
50 |
51 | #### Google Custom Search Engine API Key
52 |
53 | CAUTION — First 100 queries per day are free,
54 | additional requests cost $5 per 1000 queries.
55 |
56 | * Obtain an API key from
57 | * Paste it after `GoogleApiKey` in `hq_config.conf`
58 |
59 | #### Google Custom Search Engine ID
60 |
61 | * Create a new custom search engine at
62 | * Name your custom search engine and type in any valid URL in `Sites to search`
63 | * Click `Control Panel`
64 | * Enable `Search the entire web`
65 | * Delete the site you added initially in `Sites to search`
66 | * Copy the `Search engine ID` to clipboard
67 | * Paste it after `GoogleCseId` in `hq_config.conf`
68 |
69 | ### Bing Search
70 |
71 | * Create a free account at
72 | * Enter the Azure portal
73 | * Create a `Bing Search` resource from the Marketplace
74 | * Wait for setup...
75 | * Open the service from your dashboard
76 | * Open `Keys and Endpoint`
77 | * Copy `Key 1` or `Key 2` to clipboard
78 | * Paste it after `BingApiKey` in `hq_config.conf`
79 |
80 | ## Usage
81 |
82 | Make sure you are in the `HackQ-Trivia` folder, not `hackq_trivia`.
83 |
84 | ```console
85 | $ python3 -m hackq_trivia.hq_main
86 | ```
87 |
88 | ## Screenshots
89 |
90 | 
91 |
--------------------------------------------------------------------------------
/hackq_trivia/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/hackq_trivia/__init__.py
--------------------------------------------------------------------------------
/hackq_trivia/bearer_finder.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from datetime import datetime
3 | import logging
4 | from time import time
5 |
6 | from hackq_trivia.hq_main import init_root_logger
7 |
8 | HQ_URL = "https://api-quiz.hype.space/"
9 | HQ_REQUEST_HEADERS = {"x-hq-client": "Android/1.40.0"}
10 |
11 |
12 | class HQResponseError(Exception):
13 | """Raise when the HQ verifications endpoint returns an error code."""
14 |
15 |
16 | def hq_post(endpoint, data):
17 | logger.debug(f"POST to {HQ_URL}{endpoint} w/ data {data}")
18 |
19 | resp = requests.post(
20 | f"{HQ_URL}{endpoint}", headers=HQ_REQUEST_HEADERS, data=data
21 | ).json()
22 |
23 | logger.debug(f"resp: {resp}")
24 |
25 | if "errorCode" in resp:
26 | raise HQResponseError(f'Error code {resp["errorCode"]}: {resp["error"]}')
27 |
28 | return resp
29 |
30 |
31 | def main():
32 | print("Enter your phone number, including + and country code (e.g. +14155552671)")
33 | print("Example: (415) 555-0171 (U.S. number) -> +14155550171")
34 | print("Alternatively, enter a previous verification ID: ")
35 | phone = input("? ")
36 |
37 | if "+" in phone:
38 | verify_resp = hq_post("verifications", {"phone": phone, "method": "sms"})
39 |
40 | verification_id = verify_resp["verificationId"]
41 |
42 | now = time()
43 | local_utc_offset = datetime.fromtimestamp(now) - datetime.utcfromtimestamp(now)
44 | exp_time = datetime.strptime(verify_resp["expires"], "%Y-%m-%dT%H:%M:%S.%fZ")
45 | exp_time += local_utc_offset
46 |
47 | print("Your verification ID is:")
48 | print(verification_id)
49 | print(f'Code expires at {exp_time.strftime("%Y-%m-%d %I:%M %p")}.')
50 | else:
51 | verification_id = phone
52 |
53 | print("Enter the code received via SMS: ")
54 | code = int(input("? "))
55 |
56 | auth_resp = hq_post(f"verifications/{verification_id}", {"code": code})
57 |
58 | print("Your bearer token is:")
59 | print(auth_resp["auth"]["authToken"])
60 |
61 |
62 | if __name__ == "__main__":
63 | init_root_logger()
64 | logger = logging.getLogger(__name__)
65 | main()
66 |
--------------------------------------------------------------------------------
/hackq_trivia/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | from configparser import ConfigParser
3 |
4 | config = ConfigParser()
5 | config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)), "hq_config.conf"))
6 |
--------------------------------------------------------------------------------
/hackq_trivia/hq_config.conf:
--------------------------------------------------------------------------------
1 | [CONNECTION]
2 | Bearer = INSERT_BEARER_HERE
3 | Timeout = 3
4 |
5 | [SEARCH]
6 | Service = Google
7 | GoogleApiKey = INSERT_GOOGLE_API_KEY_HERE
8 | GoogleCseId = INSERT_GOOGLE_CSE_ID_HERE
9 | BingApiKey = INSERT_BING_API_KEY_HERE
10 | NumSitesToSearch = 5
11 |
12 | [LOGGING]
13 | File = data.log
14 | # If IncrementFileNames is True, File must contain a filename with
15 | # a format() replacement field.
16 | # e.g. File = data{}.log will check data1.log, data2.log, etc.
17 | # until an unused file name is found.
18 | IncrementFileNames = False
19 |
20 | [LIVE]
21 | ShowQuestionSummary = True
22 | ShowChat = True
23 | SimplifiedOutput = False
24 |
25 | [MAIN]
26 | DownloadNLTKResources = True
27 | ShowNextShowInfo = True
28 | ShowBearerInfo = True
29 | ExitIfShowOffline = False
--------------------------------------------------------------------------------
/hackq_trivia/hq_main.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json.decoder
3 | import time
4 | from typing import Optional
5 | from datetime import datetime
6 | import os
7 |
8 | import colorama
9 | import jwt
10 | import nltk
11 | import requests
12 | import logging
13 | import logging.config
14 |
15 | from hackq_trivia.config import config
16 | from hackq_trivia.live_show import LiveShow
17 |
18 |
19 | class BearerError(Exception):
20 | """Raise when bearer token is invalid/expired"""
21 |
22 |
23 | def next_available_name(base_name: str) -> str:
24 | """
25 | Finds lowest available file name using .format() to insert numbers (starts at 1).
26 | :param base_name: File name containing format placeholder ({})
27 | :return: File name with lowest number inserted.
28 | """
29 | num = 1
30 | curr_name = base_name.format(num)
31 | while os.path.exists(curr_name):
32 | num += 1
33 | curr_name = base_name.format(num)
34 |
35 | return curr_name
36 |
37 |
38 | def init_root_logger() -> None:
39 | import os
40 |
41 | class LogFilterColor(logging.Filter):
42 | def filter(self, record):
43 | if "hackq" not in record.name and "__main__" not in record.name:
44 | return None
45 |
46 | if not hasattr(record, "pre"):
47 | record.pre = ""
48 | record.post = ""
49 | elif not hasattr(record, "post"):
50 | record.post = colorama.Style.RESET_ALL
51 |
52 | return record
53 |
54 | log_filename = config.get("LOGGING", "File")
55 | script_dir = os.path.dirname(os.path.abspath(__file__))
56 | if not os.path.isabs(log_filename):
57 | log_filename = os.path.join(script_dir, log_filename)
58 |
59 | inc_filenames = config.getboolean("LOGGING", "IncrementFileNames")
60 | # check if name contains format string placeholder
61 | if inc_filenames and log_filename.format(0) == log_filename:
62 | inc_filenames = False
63 | if inc_filenames:
64 | log_filename = next_available_name(log_filename)
65 |
66 | with open(os.path.join(script_dir, "logging_config.json")) as log_conf_file:
67 | log_conf_dict = json.load(log_conf_file)
68 | log_conf_dict["handlers"]["fileHandler"]["filename"] = log_filename
69 | log_conf_dict["filters"]["LogFilterColor"]["()"] = LogFilterColor
70 |
71 | logging.config.dictConfig(log_conf_dict)
72 |
73 |
74 | def download_nltk_resources() -> None:
75 | nltk.download("stopwords", raise_on_error=True)
76 | nltk.download("punkt", raise_on_error=True)
77 |
78 |
79 | class HackQ:
80 | HQ_SCHEDULE_URL = f"https://api-quiz.hype.space/shows/schedule?type=hq"
81 |
82 | def __init__(self):
83 | if config.getboolean("MAIN", "DownloadNLTKResources"):
84 | download_nltk_resources()
85 | colorama.init()
86 |
87 | self.bearer = config.get("CONNECTION", "Bearer")
88 | self.timeout = config.getfloat("CONNECTION", "Timeout")
89 | self.show_next_info = config.getboolean("MAIN", "ShowNextShowInfo")
90 | self.exit_if_offline = config.getboolean("MAIN", "ExitIfShowOffline")
91 | self.show_bearer_info = config.getboolean("MAIN", "ShowBearerInfo")
92 | self.headers = {
93 | "User-Agent": "Android/1.40.0",
94 | "x-hq-client": "Android/1.40.0",
95 | "x-hq-country": "US",
96 | "x-hq-lang": "en",
97 | "x-hq-timezone": "America/New_York",
98 | "Authorization": f"Bearer {self.bearer}",
99 | }
100 |
101 | self.session = requests.Session()
102 | self.session.headers.update(self.headers)
103 |
104 | init_root_logger()
105 | self.logger = logging.getLogger(__name__)
106 |
107 | # Find local UTC offset
108 | now = time.time()
109 | self.local_utc_offset = datetime.fromtimestamp(now) - datetime.utcfromtimestamp(
110 | now
111 | )
112 |
113 | self.validate_bearer()
114 | self.logger.info(
115 | "HackQ-Trivia initialized.\n", extra={"pre": colorama.Fore.GREEN}
116 | )
117 |
118 | def validate_bearer(self) -> None:
119 | try:
120 | bearer_info = jwt.decode(self.bearer, options={"verify_signature": False})
121 | except jwt.exceptions.DecodeError as e:
122 | raise BearerError(
123 | "Bearer token decode failed. Please check your settings.ini."
124 | ) from e
125 |
126 | expiration_time = datetime.utcfromtimestamp(bearer_info["exp"])
127 | issue_time = datetime.utcfromtimestamp(bearer_info["iat"])
128 |
129 | if datetime.utcnow() > expiration_time:
130 | raise BearerError(
131 | "Bearer token expired. Please obtain another from your device."
132 | )
133 |
134 | if self.show_bearer_info:
135 | exp_local = expiration_time + self.local_utc_offset
136 | iat_local = issue_time + self.local_utc_offset
137 |
138 | self.logger.info("Bearer token details:")
139 | self.logger.info(f' Username: {bearer_info["username"]}')
140 | self.logger.info(
141 | f' Issuing time: {iat_local.strftime("%Y-%m-%d %I:%M %p")}'
142 | )
143 | self.logger.info(
144 | f' Expiration time: {exp_local.strftime("%Y-%m-%d %I:%M %p")}'
145 | )
146 |
147 | async def __connect_show(self, uri) -> None:
148 | async with LiveShow(self.headers) as show:
149 | await show.connect(uri)
150 |
151 | def connect(self) -> None:
152 | while True:
153 | try:
154 | websocket_uri = self.get_next_show_info()
155 |
156 | if websocket_uri is not None:
157 | self.logger.info(
158 | "Found WebSocket, connecting...\n",
159 | extra={"pre": colorama.Fore.GREEN},
160 | )
161 | self.logger.debug(websocket_uri)
162 | asyncio.run(self.__connect_show(websocket_uri))
163 | except KeyboardInterrupt:
164 | self.logger.error("Interrupted, exiting...")
165 | break
166 |
167 | def get_next_show_info(self) -> Optional[str]:
168 | """
169 | Gets info of upcoming shows from HQ, prints it out if ShowNextShowInfo is True
170 | :return: The show's WebSocket URI if it is live, else None
171 | """
172 | try:
173 | response = self.session.get(
174 | self.HQ_SCHEDULE_URL, timeout=self.timeout
175 | ).json()
176 | self.logger.debug(response)
177 | except json.decoder.JSONDecodeError:
178 | self.logger.info(
179 | "Server response not JSON, retrying...",
180 | extra={"pre": colorama.Fore.RED},
181 | )
182 | time.sleep(1)
183 | return None
184 |
185 | if "error" in response:
186 | if response["error"] == "Auth not valid":
187 | raise BearerError(
188 | "Bearer token rejected. Please check your settings.ini or use a VPN."
189 | )
190 | else:
191 | self.logger.warning(f'Error in server response: {response["error"]}')
192 | time.sleep(1)
193 | return None
194 |
195 | next_show = response["shows"][0]
196 | if self.show_next_info: # If desired, print info of next show
197 | start_time = datetime.strptime(
198 | next_show["startTime"], "%Y-%m-%dT%H:%M:%S.%fZ"
199 | )
200 | start_time_local = start_time + self.local_utc_offset
201 |
202 | self.logger.info("Upcoming show:")
203 | self.logger.info(
204 | f'{next_show["display"]["title"]} - {next_show["display"]["summary"]}'
205 | )
206 | self.logger.info(next_show["display"]["description"])
207 | if "subtitle" in next_show["display"]:
208 | self.logger.info(f'Subtitle: {next_show["display"]["subtitle"]}')
209 | self.logger.info(
210 | f'Prize: ${(next_show["prizeCents"] / 100):0,.2f} {next_show["currency"]}'
211 | )
212 | self.logger.info(
213 | f'Show start time: {start_time_local.strftime("%Y-%m-%d %I:%M %p")}'
214 | )
215 |
216 | if "live" in next_show: # Return found WebSocket URI
217 | return next_show["live"]["socketUrl"].replace("https", "wss")
218 | else:
219 | self.logger.info("Show not live.\n", extra={"pre": colorama.Fore.RED})
220 | if self.exit_if_offline:
221 | exit()
222 |
223 | time.sleep(5)
224 | return None
225 |
226 |
227 | if __name__ == "__main__":
228 | HackQ().connect()
229 |
--------------------------------------------------------------------------------
/hackq_trivia/live_show.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | from typing import Dict
4 |
5 | import aiohttp
6 | import colorama
7 | from anyascii import anyascii
8 |
9 | from hackq_trivia.config import config
10 | from hackq_trivia.question_handler import QuestionHandler
11 |
12 |
13 | class LiveShow:
14 | async def __aenter__(self):
15 | self.question_handler = QuestionHandler()
16 | return self
17 |
18 | async def __aexit__(self, exc_type, exc_val, exc_tb):
19 | await self.question_handler.close()
20 |
21 | def __init__(self, headers):
22 | self.headers = headers
23 | self.show_question_summary = config.getboolean("LIVE", "ShowQuestionSummary")
24 | self.show_chat = config.getboolean("LIVE", "ShowChat")
25 | self.block_chat = False # Block chat while question is active
26 | self.logger = logging.getLogger(__name__)
27 | self.logger.info("LiveShow initialized.")
28 |
29 | async def connect(self, uri: str) -> None:
30 | session = aiohttp.ClientSession()
31 |
32 | rejoin = True
33 | while rejoin:
34 | async with session.ws_connect(uri, headers=self.headers, heartbeat=5) as ws:
35 | async for msg in ws:
36 | # suppress incorrect type warning for msg in PyCharm
37 | if msg.type != aiohttp.WSMsgType.TEXT: # noqa
38 | continue
39 | message = json.loads(msg.data) # noqa
40 |
41 | await self.handle_msg(message)
42 |
43 | rejoin = self.should_rejoin(message)
44 | if rejoin:
45 | break
46 |
47 | self.logger.info("Disconnected.")
48 |
49 | @staticmethod
50 | def should_rejoin(message: Dict) -> bool:
51 | if message["type"] != "broadcastEnded":
52 | return False
53 |
54 | return (
55 | message.get("reason", "")
56 | == "You are no longer in the game. Please join again."
57 | )
58 |
59 | async def handle_msg(self, message: Dict) -> None:
60 | self.logger.debug(message)
61 |
62 | if "error" in message and message["error"] == "Auth not valid":
63 | raise ConnectionRefusedError(
64 | "User ID/Bearer invalid. Please check your settings.ini."
65 | )
66 |
67 | message_type = message["type"]
68 |
69 | if message_type == "broadcastEnded":
70 | if "reason" in message:
71 | reason = message["reason"]
72 | self.logger.info(f"Disconnected: {reason}")
73 | else:
74 | self.logger.info("Disconnected.")
75 |
76 | elif message_type == "interaction" and self.show_chat and not self.block_chat:
77 | self.logger.info(
78 | f'{message["metadata"]["username"]}: {message["metadata"]["message"]}'
79 | )
80 |
81 | elif message_type == "question":
82 | question = anyascii(message["question"])
83 | choices = [anyascii(choice["text"]) for choice in message["answers"]]
84 |
85 | self.logger.info("\n" * 5)
86 | self.logger.info(
87 | f'Question {message["questionNumber"]} out of {message["questionCount"]}'
88 | )
89 | self.logger.info(question, extra={"pre": colorama.Fore.BLUE})
90 | self.logger.info(
91 | f'Choices: {", ".join(choices)}', extra={"pre": colorama.Fore.BLUE}
92 | )
93 |
94 | await self.question_handler.answer_question(question, choices)
95 |
96 | self.block_chat = True
97 |
98 | elif message_type == "questionSummary" and self.show_question_summary:
99 | question = anyascii(message["question"])
100 | self.logger.info(
101 | f"Question summary: {question}", extra={"pre": colorama.Fore.BLUE}
102 | )
103 |
104 | for answer in message["answerCounts"]:
105 | ans_str = anyascii(answer["answer"])
106 |
107 | self.logger.info(
108 | f'{ans_str}:{answer["count"]}:{answer["correct"]}',
109 | extra={
110 | "pre": colorama.Fore.GREEN
111 | if answer["correct"]
112 | else colorama.Fore.RED
113 | },
114 | )
115 |
116 | self.logger.info(f'{message["advancingPlayersCount"]} players advancing')
117 | self.logger.info(
118 | f'{message["eliminatedPlayersCount"]} players eliminated\n'
119 | )
120 |
121 | elif message_type == "questionClosed" and self.block_chat:
122 | self.block_chat = False
123 | if self.show_chat:
124 | self.logger.info("\n" * 5)
125 |
--------------------------------------------------------------------------------
/hackq_trivia/logging_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": 1,
3 | "disable_existing_loggers": true,
4 | "formatters": {
5 | "fileFormatter": {
6 | "format": "%(asctime)s %(name)-12s %(levelname)-8s %(message)s",
7 | "datefmt": "%m-%d %H:%M:%S"
8 | },
9 | "consoleFormatter": {
10 | "format": "%(pre)s%(message)s%(post)s"
11 | }
12 | },
13 | "handlers": {
14 | "fileHandler": {
15 | "class": "logging.FileHandler",
16 | "formatter": "fileFormatter",
17 | "filename": "data.log",
18 | "mode": "w"
19 | },
20 | "consoleHandler": {
21 | "level": "INFO",
22 | "class": "logging.StreamHandler",
23 | "formatter": "consoleFormatter",
24 | "stream": "ext://sys.stdout",
25 | "filters": ["LogFilterColor"]
26 | }
27 | },
28 | "filters": {
29 | "LogFilterColor": {
30 | "()": "LogFilterColor"
31 | }
32 | },
33 | "loggers": {
34 | "": {
35 | "handlers": ["consoleHandler", "fileHandler"],
36 | "level": "DEBUG"
37 | }
38 | }
39 | }
--------------------------------------------------------------------------------
/hackq_trivia/question_handler.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import re
3 | import string
4 | from time import time
5 | from typing import Dict, List, Match
6 |
7 | import nltk
8 | import colorama
9 |
10 | from hackq_trivia.config import config
11 | from hackq_trivia.searcher import Searcher
12 |
13 |
14 | class QuestionHandler:
15 | def __init__(self):
16 | self.simplified_output = config.getboolean("LIVE", "SimplifiedOutput")
17 | self.num_sites = config.getint("SEARCH", "NumSitesToSearch")
18 |
19 | self.searcher = Searcher()
20 | self.search_methods_to_use = [self._method1, self._method2]
21 | self.logger = logging.getLogger(__name__)
22 |
23 | self.stopwords = set(nltk.corpus.stopwords.words("english")) - {"most", "least"}
24 | self.punctuation_to_none = str.maketrans(
25 | {key: None for key in string.punctuation}
26 | )
27 | self.punctuation_to_space = str.maketrans(
28 | {key: " " for key in string.punctuation}
29 | )
30 |
31 | async def close(self):
32 | await self.searcher.close()
33 |
34 | async def answer_question(self, question: str, original_choices: List[str]):
35 | self.logger.info("Searching...")
36 | start_time = time()
37 |
38 | question_lower = question.lower()
39 |
40 | reverse = (
41 | "NOT" in question
42 | or "NEVER" in question
43 | or "NEITHER" in question
44 | or ("least" in question_lower and "at least" not in question_lower)
45 | )
46 |
47 | choice_groups = [
48 | [
49 | choice.translate(self.punctuation_to_none),
50 | choice.translate(self.punctuation_to_space),
51 | ]
52 | for choice in original_choices
53 | ]
54 | choices: List[str] = sum(choice_groups, [])
55 |
56 | # Step 1: Search web for results
57 | keyword_start_time = time()
58 | question_keywords = self.find_keywords(question)
59 | if not self.simplified_output:
60 | self.logger.info(f"Question keywords: {question_keywords}")
61 | self.logger.debug(
62 | f"Keywords took {round(time() - keyword_start_time, 2)} seconds"
63 | )
64 |
65 | search_start_time = time()
66 | links = await self.searcher.get_search_links(
67 | " ".join(question_keywords), self.num_sites
68 | )
69 | self.logger.debug(
70 | f"Web search took {round(time() - search_start_time, 2)} seconds"
71 | )
72 | self.logger.debug(f"Found links: {links}")
73 |
74 | # Step 2: Fetch links and clean up text
75 | fetch_start_time = time()
76 | link_texts = [
77 | Searcher.html_to_visible_text(html).translate(self.punctuation_to_none)
78 | for html in await self.searcher.fetch_multiple(links)
79 | ]
80 | self.logger.debug(
81 | f"Fetching took {round(time() - fetch_start_time, 2)} seconds"
82 | )
83 |
84 | # Step 3: Find best answer for all search methods
85 | post_process_start_time = time()
86 | answers = []
87 | for search_method in self.search_methods_to_use:
88 | answer = await search_method(link_texts, choices, choice_groups, reverse)
89 | answers.append(answer)
90 | if answer:
91 | self.logger.info(answer, extra={"pre": colorama.Fore.BLUE})
92 | else:
93 | self.logger.info("Tie", extra={"pre": colorama.Fore.BLUE})
94 |
95 | self.logger.debug(
96 | f"Post-processing took {round(time() - post_process_start_time, 2)} seconds"
97 | )
98 |
99 | self.logger.info(f"Search took {round(time() - start_time, 2)} seconds")
100 | return answers
101 |
102 | async def _method1(
103 | self,
104 | texts: List[str],
105 | answers: List[str],
106 | answer_groups: List[List[str]],
107 | reverse: bool,
108 | ) -> str:
109 | """
110 | Returns the answer with the best number of exact occurrences in texts.
111 | :param texts: List of webpages (strings) to analyze
112 | :param answers: List of answers
113 | :param answer_groups: Groupings of different ways of writing the answer
114 | :param reverse: True if the best answer occurs the least, False otherwise
115 | :return: Answer that occurs the most/least in the texts, empty string if there is a tie
116 | """
117 | self.logger.info("Running method 1")
118 |
119 | counts = {answer: 0 for answer in answers}
120 | for text in texts:
121 | for answer in answers:
122 | counts[answer] += text.count(f" {answer.lower()} ")
123 |
124 | self.logger.info(counts)
125 | return self.__get_best_answer(counts, answer_groups, reverse)
126 |
127 | async def _method2(
128 | self,
129 | texts: List[str],
130 | answers: List[str],
131 | answer_groups: List[List[str]],
132 | reverse: bool,
133 | ) -> str:
134 | """
135 | Returns the answers with the best number of occurrences of the answer's keywords in texts.
136 | :param texts: List of webpages (strings) to analyze
137 | :param answers: List of answers
138 | :param answer_groups: Groupings of different ways of writing the answer
139 | :param reverse: True if the best answer occurs the least, False otherwise
140 | :return: Answer that occurs the most/least in the texts, empty string if there is a tie
141 | """
142 | self.logger.info("Running method 2")
143 |
144 | counts = {answer: 0 for answer in answers}
145 | for text in texts:
146 | for answer in answers:
147 | for keyword in self.find_keywords(answer, sentences=False):
148 | counts[answer] += text.count(f" {keyword.lower()} ")
149 |
150 | self.logger.info(counts)
151 | return self.__get_best_answer(counts, answer_groups, reverse)
152 |
153 | def find_keywords(self, text: str, sentences: bool = True) -> List[str]:
154 | """
155 | Returns the keywords from a string containing text, in the order they appear.
156 | Keywords:
157 | - Words within quotes
158 | - Consecutively capitalized words
159 | - Words that aren't stopwords
160 | :param text: Text to analyze
161 | :param sentences: Whether or not text is comprised of sentences
162 | :return: List of keywords of text
163 | """
164 | keyword_indices = {}
165 |
166 | if sentences:
167 | # Remove capitalization at start of sentences
168 | sent_tokenized = nltk.tokenize.sent_tokenize(text)
169 | text = " ".join(
170 | sentence[0].lower() + sentence[1:] for sentence in sent_tokenized
171 | )
172 |
173 | # Remove all punctuation except quotes
174 | text = text.translate(
175 | str.maketrans({key: None for key in set(string.punctuation) - {'"', "'"}})
176 | )
177 |
178 | # If a match is encountered:
179 | # Add entry to keyword_indices
180 | # Return string containing spaces of same length as the match to replace match with
181 | def process_match(match: Match[str]):
182 | keyword_indices[match[1]] = match.start()
183 | return " " * len(match[0])
184 |
185 | # Find words in quotes and replace words in quotes with whitespace
186 | # of same length to avoid matching words multiple times
187 | text = re.sub('"([^"]*)"', process_match, text)
188 |
189 | # Find and replace consecutively capitalized words (includes single
190 | # apostrophe to match possessives). Slightly modified from this accepted answer:
191 | # https://stackoverflow.com/a/9526027/6686559
192 | text = re.sub(
193 | r"([A-Z][a-z]+(?=\s[A-Z])(?:\s[A-Z][a-z']+)+)", process_match, text
194 | )
195 |
196 | # Find remaining words that are not stopwords
197 | for m in re.finditer(r"\S+", text):
198 | if m[0] not in self.stopwords:
199 | keyword_indices[m[0]] = m.start()
200 |
201 | # Return keywords, sorted by index of occurrence
202 | keywords = list(sorted(keyword_indices, key=keyword_indices.get))
203 | # TODO: handle plural and singular, see test_question_handler.py
204 | return keywords
205 |
206 | @staticmethod
207 | def __get_best_answer(
208 | all_scores: Dict, choice_groups: List[List[str]], reverse: bool = False
209 | ):
210 | """
211 | Returns best answer based on scores for each choice and groups of choices.
212 | :param all_scores: Dict mapping choices to scores
213 | :param choice_groups: List of lists (groups) of choices
214 | :param reverse: If True, return lowest scoring choice group, otherwise return highest
215 | :return: String (first entry in group) of the group with the highest/lowest total score
216 | """
217 | # Add scores of the same answer together due to two ways of removing punctuation
218 | scores = {
219 | choices[0]: sum(all_scores[choice] for choice in choices)
220 | for choices in choice_groups
221 | }
222 |
223 | best_value = min(scores.values()) if reverse else max(scores.values())
224 |
225 | # Make sure the scores are not all 0 and the best value doesn't occur more than once
226 | if (
227 | not all(c == 0 for c in scores.values())
228 | and list(scores.values()).count(best_value) == 1
229 | ):
230 | return (
231 | min(scores, key=scores.get) if reverse else max(scores, key=scores.get)
232 | )
233 | return ""
234 |
--------------------------------------------------------------------------------
/hackq_trivia/searcher.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import logging
3 | from html import unescape
4 | from typing import Iterable, List
5 |
6 | import aiohttp
7 | import bs4
8 | from anyascii import anyascii
9 |
10 | from hackq_trivia.config import config
11 |
12 |
13 | class InvalidSearchServiceError(Exception):
14 | """Raise when search service specified in config is not recognized."""
15 |
16 |
17 | class Searcher:
18 | HEADERS = {"User-Agent": "HQbot"}
19 | BING_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"
20 | GOOGLE_ENDPOINT = "https://www.googleapis.com/customsearch/v1"
21 |
22 | def __init__(self):
23 | self.timeout = config.getfloat("CONNECTION", "Timeout")
24 | self.search_service = config.get("SEARCH", "Service")
25 |
26 | bing_api_key = config.get("SEARCH", "BingApiKey")
27 | self.bing_headers = {"Ocp-Apim-Subscription-Key": bing_api_key}
28 |
29 | self.google_cse_id = config.get("SEARCH", "GoogleCseId")
30 | self.google_api_key = config.get("SEARCH", "GoogleApiKey")
31 |
32 | # don't use default headers for Bing search so searcher tests
33 | # can run get_bing_links/get_google_links on its own
34 | # without depending on search_service being set correctly
35 | self.search_session = aiohttp.ClientSession()
36 |
37 | if self.search_service == "Bing":
38 | self.search_func = self.get_bing_links
39 | elif self.search_service == "Google":
40 | self.search_func = self.get_google_links
41 | else:
42 | raise InvalidSearchServiceError(
43 | f"Search service type {self.search_service} was not recognized."
44 | )
45 |
46 | client_timeout = aiohttp.ClientTimeout(total=self.timeout)
47 | self.fetch_session = aiohttp.ClientSession(
48 | headers=Searcher.HEADERS, timeout=client_timeout
49 | )
50 | self.logger = logging.getLogger(__name__)
51 |
52 | async def close(self) -> None:
53 | await self.fetch_session.close()
54 | await self.search_session.close()
55 |
56 | async def fetch(self, url: str) -> str:
57 | try:
58 | async with self.fetch_session.get(url, timeout=self.timeout) as response:
59 | return await response.text()
60 | except asyncio.TimeoutError:
61 | self.logger.error(f"Server timeout to {url}")
62 | except Exception as e:
63 | self.logger.error(f"Server error to {url}")
64 | self.logger.error(e)
65 |
66 | return ""
67 |
68 | # no typing info for return value because https://github.com/python/typeshed/issues/2652
69 | async def fetch_multiple(self, urls: Iterable[str]):
70 | coroutines = [self.fetch(url) for url in urls]
71 | responses = await asyncio.gather(*coroutines)
72 | return responses
73 |
74 | async def get_search_links(self, query: str, num_results: int) -> List[str]:
75 | return await self.search_func(query, num_results)
76 |
77 | async def get_google_links(self, query: str, num_results: int) -> List[str]:
78 | search_params = {
79 | "key": self.google_api_key,
80 | "cx": self.google_cse_id,
81 | "q": query,
82 | "num": num_results,
83 | }
84 |
85 | async with self.search_session.get(
86 | self.GOOGLE_ENDPOINT, params=search_params
87 | ) as resp:
88 | resp_status = resp.status
89 | resp_data = await resp.json()
90 |
91 | if resp_status != 200:
92 | logging.error(f"Google search failed with status code {resp_status}")
93 | logging.error(resp_data)
94 | return []
95 |
96 | self.logger.debug(f"google: {query}, n={num_results}")
97 | self.logger.debug(resp_data)
98 |
99 | return [item["link"] for item in resp_data["items"]]
100 |
101 | async def get_bing_links(self, query: str, num_results: int) -> List[str]:
102 | # why does Bing consistently deliver 1 fewer result than requested?
103 | search_params = {"q": query, "count": num_results + 1}
104 |
105 | async with self.search_session.get(
106 | self.BING_ENDPOINT, params=search_params, headers=self.bing_headers
107 | ) as resp:
108 | resp_status = resp.status
109 | resp_data = await resp.json()
110 |
111 | if resp_status != 200:
112 | logging.error(f"Bing search failed with status code {resp_status}")
113 | logging.error(resp_data)
114 | return []
115 |
116 | self.logger.debug(f"bing: {query}, n={num_results}")
117 | self.logger.debug(resp_data)
118 |
119 | return [item["url"] for item in resp_data["webPages"]["value"]]
120 |
121 | @staticmethod
122 | def html_to_visible_text(html):
123 | soup = bs4.BeautifulSoup(html, features="html.parser")
124 | for s in soup(["style", "script", "[document]", "head", "title"]):
125 | s.extract()
126 |
127 | return anyascii(unescape(soup.get_text())).lower()
128 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | colorama~=0.4.4
2 | requests~=2.25.1
3 | aiohttp~=3.7.4
4 | beautifulsoup4~=4.9.3
5 | nltk~=3.5
6 | anyascii~=0.1.7
7 | pyjwt~=2.0.1
--------------------------------------------------------------------------------
/resources/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/resources/1.png
--------------------------------------------------------------------------------
/resources/hackq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/resources/hackq.png
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_logger.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import logging
3 |
4 | from hackq_trivia.hq_main import init_root_logger
5 |
6 |
7 | class MyTestCase(unittest.TestCase):
8 | def setUp(self) -> None:
9 | init_root_logger()
10 | self.logger = logging.getLogger(__name__)
11 |
12 | def test_emojis(self):
13 | self.logger.info("👁 👃🏾👄👁")
14 |
15 |
16 | if __name__ == "__main__":
17 | unittest.main()
18 |
--------------------------------------------------------------------------------
/tests/test_question_handler.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import unittest
3 |
4 | from hackq_trivia.question_handler import QuestionHandler
5 |
6 |
7 | class MyTestCase(unittest.TestCase):
8 | async def setUpAsync(self):
9 | self.qh = QuestionHandler()
10 |
11 | def setUp(self) -> None:
12 | self.loop = asyncio.get_event_loop()
13 | self.loop.run_until_complete(self.setUpAsync())
14 |
15 | def tearDown(self) -> None:
16 | self.loop.run_until_complete(self.qh.close())
17 |
18 | def test_find_keywords_consecutive_capitals(self):
19 | self.assertEqual(
20 | self.qh.find_keywords("Do you love Nathaniel Hawthorne's books?"),
21 | ["love", "Nathaniel Hawthorne's", "books"],
22 | )
23 |
24 | def test_find_keywords_quotations(self):
25 | self.assertEqual(
26 | self.qh.find_keywords('I do love "The Scarlet Letter".'),
27 | ["love", "The Scarlet Letter"],
28 | )
29 |
30 | def test_answer_question(self):
31 | self.loop.run_until_complete(
32 | self.qh.answer_question(
33 | "What is the word for a landmass like Florida that is "
34 | "surrounded on three sides by water?",
35 | ["Peninsula", "Piñata", "Trifecta"],
36 | )
37 | )
38 |
39 |
40 | if __name__ == "__main__":
41 | unittest.main()
42 |
--------------------------------------------------------------------------------
/tests/test_question_handler_answers.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from hackq_trivia.question_handler import QuestionHandler
4 | from hackq_trivia.hq_main import init_root_logger
5 |
6 |
7 | async def test():
8 | qh = QuestionHandler()
9 | # fails because all pages say foot/footwear instead of feet
10 | # await qh.answer_question('In the 19th century, where were spats typically worn?',
11 | # ['Ears', 'Arms', 'Feet'])
12 |
13 | # await qh.answer_question('Which of these games is played on a court?',
14 | # ['Basketball', 'Super Mario Kart', 'Uno'])
15 |
16 | # for is removed as a stopword
17 | await qh.answer_question(
18 | "What do NEITHER of the N's in CNN stand for?", ["News", "Netflix", "Network"]
19 | )
20 | await qh.close()
21 |
22 |
23 | if __name__ == "__main__":
24 | init_root_logger()
25 |
26 | asyncio.run(test())
27 |
--------------------------------------------------------------------------------
/tests/test_searcher.py:
--------------------------------------------------------------------------------
1 | import json
2 | import unittest
3 | from urllib.parse import urlparse
4 | import warnings
5 |
6 | from hackq_trivia.searcher import Searcher
7 |
8 |
9 | class SearcherFetchTest(unittest.IsolatedAsyncioTestCase):
10 | async def asyncSetUp(self) -> None:
11 | self._searcher = Searcher()
12 |
13 | async def asyncTearDown(self) -> None:
14 | await self._searcher.close()
15 |
16 | async def test_fetch_single(self):
17 | resp = await self._searcher.fetch("http://httpbin.org/user-agent")
18 | resp = json.loads(resp)
19 | self.assertEqual(resp["user-agent"], Searcher.HEADERS["User-Agent"])
20 |
21 | async def test_fetch_multiple(self):
22 | resps = await self._searcher.fetch_multiple(
23 | ["http://httpbin.org/user-agent"] * 5
24 | )
25 | self.assertEqual(len(resps), 5)
26 | for resp in resps:
27 | resp = json.loads(resp)
28 | self.assertEqual(resp["user-agent"], Searcher.HEADERS["User-Agent"])
29 |
30 | async def test_fetch_error(self):
31 | with self.assertLogs() as log_cm:
32 | await self._searcher.fetch("http://aaaa.aaa")
33 | self.assertIn(
34 | "ERROR:hackq_trivia.searcher:Server error to http://aaaa.aaa", log_cm.output
35 | )
36 |
37 | async def test_fetch_delay(self):
38 | max_timeout = self._searcher.timeout
39 | fail_url = f"http://httpbin.org/delay/{max_timeout + 1}"
40 |
41 | with self.assertLogs() as log_cm:
42 | resps = await self._searcher.fetch_multiple(
43 | ["http://httpbin.org/delay/0", fail_url]
44 | )
45 | self.assertTrue(resps[0])
46 | self.assertFalse(resps[1])
47 |
48 | self.assertEqual(
49 | [f"ERROR:hackq_trivia.searcher:Server timeout to {fail_url}"], log_cm.output
50 | )
51 |
52 |
53 | class SearcherSearchEngineTest(unittest.IsolatedAsyncioTestCase):
54 | async def asyncSetUp(self) -> None:
55 | self._searcher = Searcher()
56 |
57 | async def asyncTearDown(self) -> None:
58 | await self._searcher.close()
59 |
60 | def setUp(self) -> None:
61 | # google-api-python-client raises benign ResourceWarnings, ignore for now
62 | warnings.simplefilter("ignore", ResourceWarning)
63 |
64 | async def test_get_google_links(self):
65 | links = await self._searcher.get_google_links("test test test test", 5)
66 | print("Google links:")
67 | for link in links:
68 | print(link)
69 | parsed = urlparse(link)
70 | self.assertTrue(all((parsed.scheme, parsed.netloc)))
71 | self.assertEqual(len(links), 5)
72 |
73 | async def test_get_bing_links(self):
74 | links = await self._searcher.get_bing_links("test test test test", 5)
75 | print("Bing links:")
76 | for link in links:
77 | print(link)
78 | parsed = urlparse(link)
79 | self.assertTrue(all((parsed.scheme, parsed.netloc)))
80 | self.assertEqual(len(links), 5)
81 |
82 |
83 | if __name__ == "__main__":
84 | unittest.main()
85 |
--------------------------------------------------------------------------------