├── .gitignore ├── LICENSE ├── README.md ├── hackq_trivia ├── __init__.py ├── bearer_finder.py ├── config.py ├── hq_config.conf ├── hq_main.py ├── live_show.py ├── logging_config.json ├── question_handler.py └── searcher.py ├── requirements.txt ├── resources ├── 1.png └── hackq.png └── tests ├── __init__.py ├── test_logger.py ├── test_question_handler.py ├── test_question_handler_answers.py └── test_searcher.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | *.log 104 | .idea/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Kevin Wu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | HackQ-Trivia is a Python HQ Trivia bot. 6 | It receives HQ Trivia questions through their WebSocket 7 | connection and answers the questions automatically. 8 | 9 | ## Installation 10 | 11 | Requires Python 3.7 or above. 12 | 13 | ### Code and dependencies 14 | 15 | ```console 16 | $ git clone https://github.com/Exaphis/HackQ-Trivia.git 17 | $ cd HackQ-Trivia 18 | $ pip install -r requirements.txt 19 | ``` 20 | 21 | ### Bearer token 22 | 23 | The easiest way to find your bearer token is to run `bearer_finder.py`. 24 | 25 | Make sure you are in the `HackQ-Trivia` folder, not `hackq_trivia`. 26 | 27 | ```console 28 | $ python3 -m hackq_trivia.bearer_finder 29 | ``` 30 | 31 | Alternatively, it can be found by sniffing the traffic 32 | on your phone. The bearer token is easily found on an 33 | emulator, since they are easy to root and most use Android 34 | versions without certificate pinning. Popular tools used 35 | to obtain bearer tokens are Charles, Fiddler, and Burp Suite. 36 | 37 | Paste your bearer token after `Bearer` in `hq_config.conf`, 38 | all within one line. 39 | 40 | ### Search APIs 41 | 42 | HackQ-Trivia can utilize either Google or Bing search APIs. 43 | 44 | The search settings are under the `[SEARCH]` section in `hq_config.conf`. 45 | 46 | * To use the Google Custom Search Engine API, set `Service = Google`. 47 | * To use the Bing Web Search API, set `Service = Bing`. 48 | 49 | ### Google Search 50 | 51 | #### Google Custom Search Engine API Key 52 | 53 | CAUTION — First 100 queries per day are free, 54 | additional requests cost $5 per 1000 queries. 55 | 56 | * Obtain an API key from 57 | * Paste it after `GoogleApiKey` in `hq_config.conf` 58 | 59 | #### Google Custom Search Engine ID 60 | 61 | * Create a new custom search engine at 62 | * Name your custom search engine and type in any valid URL in `Sites to search` 63 | * Click `Control Panel` 64 | * Enable `Search the entire web` 65 | * Delete the site you added initially in `Sites to search` 66 | * Copy the `Search engine ID` to clipboard 67 | * Paste it after `GoogleCseId` in `hq_config.conf` 68 | 69 | ### Bing Search 70 | 71 | * Create a free account at 72 | * Enter the Azure portal 73 | * Create a `Bing Search` resource from the Marketplace 74 | * Wait for setup... 75 | * Open the service from your dashboard 76 | * Open `Keys and Endpoint` 77 | * Copy `Key 1` or `Key 2` to clipboard 78 | * Paste it after `BingApiKey` in `hq_config.conf` 79 | 80 | ## Usage 81 | 82 | Make sure you are in the `HackQ-Trivia` folder, not `hackq_trivia`. 83 | 84 | ```console 85 | $ python3 -m hackq_trivia.hq_main 86 | ``` 87 | 88 | ## Screenshots 89 | 90 | ![Screenshot when HQ is not live](https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/master/resources/1.png) 91 | -------------------------------------------------------------------------------- /hackq_trivia/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/hackq_trivia/__init__.py -------------------------------------------------------------------------------- /hackq_trivia/bearer_finder.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from datetime import datetime 3 | import logging 4 | from time import time 5 | 6 | from hackq_trivia.hq_main import init_root_logger 7 | 8 | HQ_URL = "https://api-quiz.hype.space/" 9 | HQ_REQUEST_HEADERS = {"x-hq-client": "Android/1.40.0"} 10 | 11 | 12 | class HQResponseError(Exception): 13 | """Raise when the HQ verifications endpoint returns an error code.""" 14 | 15 | 16 | def hq_post(endpoint, data): 17 | logger.debug(f"POST to {HQ_URL}{endpoint} w/ data {data}") 18 | 19 | resp = requests.post( 20 | f"{HQ_URL}{endpoint}", headers=HQ_REQUEST_HEADERS, data=data 21 | ).json() 22 | 23 | logger.debug(f"resp: {resp}") 24 | 25 | if "errorCode" in resp: 26 | raise HQResponseError(f'Error code {resp["errorCode"]}: {resp["error"]}') 27 | 28 | return resp 29 | 30 | 31 | def main(): 32 | print("Enter your phone number, including + and country code (e.g. +14155552671)") 33 | print("Example: (415) 555-0171 (U.S. number) -> +14155550171") 34 | print("Alternatively, enter a previous verification ID: ") 35 | phone = input("? ") 36 | 37 | if "+" in phone: 38 | verify_resp = hq_post("verifications", {"phone": phone, "method": "sms"}) 39 | 40 | verification_id = verify_resp["verificationId"] 41 | 42 | now = time() 43 | local_utc_offset = datetime.fromtimestamp(now) - datetime.utcfromtimestamp(now) 44 | exp_time = datetime.strptime(verify_resp["expires"], "%Y-%m-%dT%H:%M:%S.%fZ") 45 | exp_time += local_utc_offset 46 | 47 | print("Your verification ID is:") 48 | print(verification_id) 49 | print(f'Code expires at {exp_time.strftime("%Y-%m-%d %I:%M %p")}.') 50 | else: 51 | verification_id = phone 52 | 53 | print("Enter the code received via SMS: ") 54 | code = int(input("? ")) 55 | 56 | auth_resp = hq_post(f"verifications/{verification_id}", {"code": code}) 57 | 58 | print("Your bearer token is:") 59 | print(auth_resp["auth"]["authToken"]) 60 | 61 | 62 | if __name__ == "__main__": 63 | init_root_logger() 64 | logger = logging.getLogger(__name__) 65 | main() 66 | -------------------------------------------------------------------------------- /hackq_trivia/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from configparser import ConfigParser 3 | 4 | config = ConfigParser() 5 | config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)), "hq_config.conf")) 6 | -------------------------------------------------------------------------------- /hackq_trivia/hq_config.conf: -------------------------------------------------------------------------------- 1 | [CONNECTION] 2 | Bearer = INSERT_BEARER_HERE 3 | Timeout = 3 4 | 5 | [SEARCH] 6 | Service = Google 7 | GoogleApiKey = INSERT_GOOGLE_API_KEY_HERE 8 | GoogleCseId = INSERT_GOOGLE_CSE_ID_HERE 9 | BingApiKey = INSERT_BING_API_KEY_HERE 10 | NumSitesToSearch = 5 11 | 12 | [LOGGING] 13 | File = data.log 14 | # If IncrementFileNames is True, File must contain a filename with 15 | # a format() replacement field. 16 | # e.g. File = data{}.log will check data1.log, data2.log, etc. 17 | # until an unused file name is found. 18 | IncrementFileNames = False 19 | 20 | [LIVE] 21 | ShowQuestionSummary = True 22 | ShowChat = True 23 | SimplifiedOutput = False 24 | 25 | [MAIN] 26 | DownloadNLTKResources = True 27 | ShowNextShowInfo = True 28 | ShowBearerInfo = True 29 | ExitIfShowOffline = False -------------------------------------------------------------------------------- /hackq_trivia/hq_main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json.decoder 3 | import time 4 | from typing import Optional 5 | from datetime import datetime 6 | import os 7 | 8 | import colorama 9 | import jwt 10 | import nltk 11 | import requests 12 | import logging 13 | import logging.config 14 | 15 | from hackq_trivia.config import config 16 | from hackq_trivia.live_show import LiveShow 17 | 18 | 19 | class BearerError(Exception): 20 | """Raise when bearer token is invalid/expired""" 21 | 22 | 23 | def next_available_name(base_name: str) -> str: 24 | """ 25 | Finds lowest available file name using .format() to insert numbers (starts at 1). 26 | :param base_name: File name containing format placeholder ({}) 27 | :return: File name with lowest number inserted. 28 | """ 29 | num = 1 30 | curr_name = base_name.format(num) 31 | while os.path.exists(curr_name): 32 | num += 1 33 | curr_name = base_name.format(num) 34 | 35 | return curr_name 36 | 37 | 38 | def init_root_logger() -> None: 39 | import os 40 | 41 | class LogFilterColor(logging.Filter): 42 | def filter(self, record): 43 | if "hackq" not in record.name and "__main__" not in record.name: 44 | return None 45 | 46 | if not hasattr(record, "pre"): 47 | record.pre = "" 48 | record.post = "" 49 | elif not hasattr(record, "post"): 50 | record.post = colorama.Style.RESET_ALL 51 | 52 | return record 53 | 54 | log_filename = config.get("LOGGING", "File") 55 | script_dir = os.path.dirname(os.path.abspath(__file__)) 56 | if not os.path.isabs(log_filename): 57 | log_filename = os.path.join(script_dir, log_filename) 58 | 59 | inc_filenames = config.getboolean("LOGGING", "IncrementFileNames") 60 | # check if name contains format string placeholder 61 | if inc_filenames and log_filename.format(0) == log_filename: 62 | inc_filenames = False 63 | if inc_filenames: 64 | log_filename = next_available_name(log_filename) 65 | 66 | with open(os.path.join(script_dir, "logging_config.json")) as log_conf_file: 67 | log_conf_dict = json.load(log_conf_file) 68 | log_conf_dict["handlers"]["fileHandler"]["filename"] = log_filename 69 | log_conf_dict["filters"]["LogFilterColor"]["()"] = LogFilterColor 70 | 71 | logging.config.dictConfig(log_conf_dict) 72 | 73 | 74 | def download_nltk_resources() -> None: 75 | nltk.download("stopwords", raise_on_error=True) 76 | nltk.download("punkt", raise_on_error=True) 77 | 78 | 79 | class HackQ: 80 | HQ_SCHEDULE_URL = f"https://api-quiz.hype.space/shows/schedule?type=hq" 81 | 82 | def __init__(self): 83 | if config.getboolean("MAIN", "DownloadNLTKResources"): 84 | download_nltk_resources() 85 | colorama.init() 86 | 87 | self.bearer = config.get("CONNECTION", "Bearer") 88 | self.timeout = config.getfloat("CONNECTION", "Timeout") 89 | self.show_next_info = config.getboolean("MAIN", "ShowNextShowInfo") 90 | self.exit_if_offline = config.getboolean("MAIN", "ExitIfShowOffline") 91 | self.show_bearer_info = config.getboolean("MAIN", "ShowBearerInfo") 92 | self.headers = { 93 | "User-Agent": "Android/1.40.0", 94 | "x-hq-client": "Android/1.40.0", 95 | "x-hq-country": "US", 96 | "x-hq-lang": "en", 97 | "x-hq-timezone": "America/New_York", 98 | "Authorization": f"Bearer {self.bearer}", 99 | } 100 | 101 | self.session = requests.Session() 102 | self.session.headers.update(self.headers) 103 | 104 | init_root_logger() 105 | self.logger = logging.getLogger(__name__) 106 | 107 | # Find local UTC offset 108 | now = time.time() 109 | self.local_utc_offset = datetime.fromtimestamp(now) - datetime.utcfromtimestamp( 110 | now 111 | ) 112 | 113 | self.validate_bearer() 114 | self.logger.info( 115 | "HackQ-Trivia initialized.\n", extra={"pre": colorama.Fore.GREEN} 116 | ) 117 | 118 | def validate_bearer(self) -> None: 119 | try: 120 | bearer_info = jwt.decode(self.bearer, options={"verify_signature": False}) 121 | except jwt.exceptions.DecodeError as e: 122 | raise BearerError( 123 | "Bearer token decode failed. Please check your settings.ini." 124 | ) from e 125 | 126 | expiration_time = datetime.utcfromtimestamp(bearer_info["exp"]) 127 | issue_time = datetime.utcfromtimestamp(bearer_info["iat"]) 128 | 129 | if datetime.utcnow() > expiration_time: 130 | raise BearerError( 131 | "Bearer token expired. Please obtain another from your device." 132 | ) 133 | 134 | if self.show_bearer_info: 135 | exp_local = expiration_time + self.local_utc_offset 136 | iat_local = issue_time + self.local_utc_offset 137 | 138 | self.logger.info("Bearer token details:") 139 | self.logger.info(f' Username: {bearer_info["username"]}') 140 | self.logger.info( 141 | f' Issuing time: {iat_local.strftime("%Y-%m-%d %I:%M %p")}' 142 | ) 143 | self.logger.info( 144 | f' Expiration time: {exp_local.strftime("%Y-%m-%d %I:%M %p")}' 145 | ) 146 | 147 | async def __connect_show(self, uri) -> None: 148 | async with LiveShow(self.headers) as show: 149 | await show.connect(uri) 150 | 151 | def connect(self) -> None: 152 | while True: 153 | try: 154 | websocket_uri = self.get_next_show_info() 155 | 156 | if websocket_uri is not None: 157 | self.logger.info( 158 | "Found WebSocket, connecting...\n", 159 | extra={"pre": colorama.Fore.GREEN}, 160 | ) 161 | self.logger.debug(websocket_uri) 162 | asyncio.run(self.__connect_show(websocket_uri)) 163 | except KeyboardInterrupt: 164 | self.logger.error("Interrupted, exiting...") 165 | break 166 | 167 | def get_next_show_info(self) -> Optional[str]: 168 | """ 169 | Gets info of upcoming shows from HQ, prints it out if ShowNextShowInfo is True 170 | :return: The show's WebSocket URI if it is live, else None 171 | """ 172 | try: 173 | response = self.session.get( 174 | self.HQ_SCHEDULE_URL, timeout=self.timeout 175 | ).json() 176 | self.logger.debug(response) 177 | except json.decoder.JSONDecodeError: 178 | self.logger.info( 179 | "Server response not JSON, retrying...", 180 | extra={"pre": colorama.Fore.RED}, 181 | ) 182 | time.sleep(1) 183 | return None 184 | 185 | if "error" in response: 186 | if response["error"] == "Auth not valid": 187 | raise BearerError( 188 | "Bearer token rejected. Please check your settings.ini or use a VPN." 189 | ) 190 | else: 191 | self.logger.warning(f'Error in server response: {response["error"]}') 192 | time.sleep(1) 193 | return None 194 | 195 | next_show = response["shows"][0] 196 | if self.show_next_info: # If desired, print info of next show 197 | start_time = datetime.strptime( 198 | next_show["startTime"], "%Y-%m-%dT%H:%M:%S.%fZ" 199 | ) 200 | start_time_local = start_time + self.local_utc_offset 201 | 202 | self.logger.info("Upcoming show:") 203 | self.logger.info( 204 | f'{next_show["display"]["title"]} - {next_show["display"]["summary"]}' 205 | ) 206 | self.logger.info(next_show["display"]["description"]) 207 | if "subtitle" in next_show["display"]: 208 | self.logger.info(f'Subtitle: {next_show["display"]["subtitle"]}') 209 | self.logger.info( 210 | f'Prize: ${(next_show["prizeCents"] / 100):0,.2f} {next_show["currency"]}' 211 | ) 212 | self.logger.info( 213 | f'Show start time: {start_time_local.strftime("%Y-%m-%d %I:%M %p")}' 214 | ) 215 | 216 | if "live" in next_show: # Return found WebSocket URI 217 | return next_show["live"]["socketUrl"].replace("https", "wss") 218 | else: 219 | self.logger.info("Show not live.\n", extra={"pre": colorama.Fore.RED}) 220 | if self.exit_if_offline: 221 | exit() 222 | 223 | time.sleep(5) 224 | return None 225 | 226 | 227 | if __name__ == "__main__": 228 | HackQ().connect() 229 | -------------------------------------------------------------------------------- /hackq_trivia/live_show.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from typing import Dict 4 | 5 | import aiohttp 6 | import colorama 7 | from anyascii import anyascii 8 | 9 | from hackq_trivia.config import config 10 | from hackq_trivia.question_handler import QuestionHandler 11 | 12 | 13 | class LiveShow: 14 | async def __aenter__(self): 15 | self.question_handler = QuestionHandler() 16 | return self 17 | 18 | async def __aexit__(self, exc_type, exc_val, exc_tb): 19 | await self.question_handler.close() 20 | 21 | def __init__(self, headers): 22 | self.headers = headers 23 | self.show_question_summary = config.getboolean("LIVE", "ShowQuestionSummary") 24 | self.show_chat = config.getboolean("LIVE", "ShowChat") 25 | self.block_chat = False # Block chat while question is active 26 | self.logger = logging.getLogger(__name__) 27 | self.logger.info("LiveShow initialized.") 28 | 29 | async def connect(self, uri: str) -> None: 30 | session = aiohttp.ClientSession() 31 | 32 | rejoin = True 33 | while rejoin: 34 | async with session.ws_connect(uri, headers=self.headers, heartbeat=5) as ws: 35 | async for msg in ws: 36 | # suppress incorrect type warning for msg in PyCharm 37 | if msg.type != aiohttp.WSMsgType.TEXT: # noqa 38 | continue 39 | message = json.loads(msg.data) # noqa 40 | 41 | await self.handle_msg(message) 42 | 43 | rejoin = self.should_rejoin(message) 44 | if rejoin: 45 | break 46 | 47 | self.logger.info("Disconnected.") 48 | 49 | @staticmethod 50 | def should_rejoin(message: Dict) -> bool: 51 | if message["type"] != "broadcastEnded": 52 | return False 53 | 54 | return ( 55 | message.get("reason", "") 56 | == "You are no longer in the game. Please join again." 57 | ) 58 | 59 | async def handle_msg(self, message: Dict) -> None: 60 | self.logger.debug(message) 61 | 62 | if "error" in message and message["error"] == "Auth not valid": 63 | raise ConnectionRefusedError( 64 | "User ID/Bearer invalid. Please check your settings.ini." 65 | ) 66 | 67 | message_type = message["type"] 68 | 69 | if message_type == "broadcastEnded": 70 | if "reason" in message: 71 | reason = message["reason"] 72 | self.logger.info(f"Disconnected: {reason}") 73 | else: 74 | self.logger.info("Disconnected.") 75 | 76 | elif message_type == "interaction" and self.show_chat and not self.block_chat: 77 | self.logger.info( 78 | f'{message["metadata"]["username"]}: {message["metadata"]["message"]}' 79 | ) 80 | 81 | elif message_type == "question": 82 | question = anyascii(message["question"]) 83 | choices = [anyascii(choice["text"]) for choice in message["answers"]] 84 | 85 | self.logger.info("\n" * 5) 86 | self.logger.info( 87 | f'Question {message["questionNumber"]} out of {message["questionCount"]}' 88 | ) 89 | self.logger.info(question, extra={"pre": colorama.Fore.BLUE}) 90 | self.logger.info( 91 | f'Choices: {", ".join(choices)}', extra={"pre": colorama.Fore.BLUE} 92 | ) 93 | 94 | await self.question_handler.answer_question(question, choices) 95 | 96 | self.block_chat = True 97 | 98 | elif message_type == "questionSummary" and self.show_question_summary: 99 | question = anyascii(message["question"]) 100 | self.logger.info( 101 | f"Question summary: {question}", extra={"pre": colorama.Fore.BLUE} 102 | ) 103 | 104 | for answer in message["answerCounts"]: 105 | ans_str = anyascii(answer["answer"]) 106 | 107 | self.logger.info( 108 | f'{ans_str}:{answer["count"]}:{answer["correct"]}', 109 | extra={ 110 | "pre": colorama.Fore.GREEN 111 | if answer["correct"] 112 | else colorama.Fore.RED 113 | }, 114 | ) 115 | 116 | self.logger.info(f'{message["advancingPlayersCount"]} players advancing') 117 | self.logger.info( 118 | f'{message["eliminatedPlayersCount"]} players eliminated\n' 119 | ) 120 | 121 | elif message_type == "questionClosed" and self.block_chat: 122 | self.block_chat = False 123 | if self.show_chat: 124 | self.logger.info("\n" * 5) 125 | -------------------------------------------------------------------------------- /hackq_trivia/logging_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "disable_existing_loggers": true, 4 | "formatters": { 5 | "fileFormatter": { 6 | "format": "%(asctime)s %(name)-12s %(levelname)-8s %(message)s", 7 | "datefmt": "%m-%d %H:%M:%S" 8 | }, 9 | "consoleFormatter": { 10 | "format": "%(pre)s%(message)s%(post)s" 11 | } 12 | }, 13 | "handlers": { 14 | "fileHandler": { 15 | "class": "logging.FileHandler", 16 | "formatter": "fileFormatter", 17 | "filename": "data.log", 18 | "mode": "w" 19 | }, 20 | "consoleHandler": { 21 | "level": "INFO", 22 | "class": "logging.StreamHandler", 23 | "formatter": "consoleFormatter", 24 | "stream": "ext://sys.stdout", 25 | "filters": ["LogFilterColor"] 26 | } 27 | }, 28 | "filters": { 29 | "LogFilterColor": { 30 | "()": "LogFilterColor" 31 | } 32 | }, 33 | "loggers": { 34 | "": { 35 | "handlers": ["consoleHandler", "fileHandler"], 36 | "level": "DEBUG" 37 | } 38 | } 39 | } -------------------------------------------------------------------------------- /hackq_trivia/question_handler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import string 4 | from time import time 5 | from typing import Dict, List, Match 6 | 7 | import nltk 8 | import colorama 9 | 10 | from hackq_trivia.config import config 11 | from hackq_trivia.searcher import Searcher 12 | 13 | 14 | class QuestionHandler: 15 | def __init__(self): 16 | self.simplified_output = config.getboolean("LIVE", "SimplifiedOutput") 17 | self.num_sites = config.getint("SEARCH", "NumSitesToSearch") 18 | 19 | self.searcher = Searcher() 20 | self.search_methods_to_use = [self._method1, self._method2] 21 | self.logger = logging.getLogger(__name__) 22 | 23 | self.stopwords = set(nltk.corpus.stopwords.words("english")) - {"most", "least"} 24 | self.punctuation_to_none = str.maketrans( 25 | {key: None for key in string.punctuation} 26 | ) 27 | self.punctuation_to_space = str.maketrans( 28 | {key: " " for key in string.punctuation} 29 | ) 30 | 31 | async def close(self): 32 | await self.searcher.close() 33 | 34 | async def answer_question(self, question: str, original_choices: List[str]): 35 | self.logger.info("Searching...") 36 | start_time = time() 37 | 38 | question_lower = question.lower() 39 | 40 | reverse = ( 41 | "NOT" in question 42 | or "NEVER" in question 43 | or "NEITHER" in question 44 | or ("least" in question_lower and "at least" not in question_lower) 45 | ) 46 | 47 | choice_groups = [ 48 | [ 49 | choice.translate(self.punctuation_to_none), 50 | choice.translate(self.punctuation_to_space), 51 | ] 52 | for choice in original_choices 53 | ] 54 | choices: List[str] = sum(choice_groups, []) 55 | 56 | # Step 1: Search web for results 57 | keyword_start_time = time() 58 | question_keywords = self.find_keywords(question) 59 | if not self.simplified_output: 60 | self.logger.info(f"Question keywords: {question_keywords}") 61 | self.logger.debug( 62 | f"Keywords took {round(time() - keyword_start_time, 2)} seconds" 63 | ) 64 | 65 | search_start_time = time() 66 | links = await self.searcher.get_search_links( 67 | " ".join(question_keywords), self.num_sites 68 | ) 69 | self.logger.debug( 70 | f"Web search took {round(time() - search_start_time, 2)} seconds" 71 | ) 72 | self.logger.debug(f"Found links: {links}") 73 | 74 | # Step 2: Fetch links and clean up text 75 | fetch_start_time = time() 76 | link_texts = [ 77 | Searcher.html_to_visible_text(html).translate(self.punctuation_to_none) 78 | for html in await self.searcher.fetch_multiple(links) 79 | ] 80 | self.logger.debug( 81 | f"Fetching took {round(time() - fetch_start_time, 2)} seconds" 82 | ) 83 | 84 | # Step 3: Find best answer for all search methods 85 | post_process_start_time = time() 86 | answers = [] 87 | for search_method in self.search_methods_to_use: 88 | answer = await search_method(link_texts, choices, choice_groups, reverse) 89 | answers.append(answer) 90 | if answer: 91 | self.logger.info(answer, extra={"pre": colorama.Fore.BLUE}) 92 | else: 93 | self.logger.info("Tie", extra={"pre": colorama.Fore.BLUE}) 94 | 95 | self.logger.debug( 96 | f"Post-processing took {round(time() - post_process_start_time, 2)} seconds" 97 | ) 98 | 99 | self.logger.info(f"Search took {round(time() - start_time, 2)} seconds") 100 | return answers 101 | 102 | async def _method1( 103 | self, 104 | texts: List[str], 105 | answers: List[str], 106 | answer_groups: List[List[str]], 107 | reverse: bool, 108 | ) -> str: 109 | """ 110 | Returns the answer with the best number of exact occurrences in texts. 111 | :param texts: List of webpages (strings) to analyze 112 | :param answers: List of answers 113 | :param answer_groups: Groupings of different ways of writing the answer 114 | :param reverse: True if the best answer occurs the least, False otherwise 115 | :return: Answer that occurs the most/least in the texts, empty string if there is a tie 116 | """ 117 | self.logger.info("Running method 1") 118 | 119 | counts = {answer: 0 for answer in answers} 120 | for text in texts: 121 | for answer in answers: 122 | counts[answer] += text.count(f" {answer.lower()} ") 123 | 124 | self.logger.info(counts) 125 | return self.__get_best_answer(counts, answer_groups, reverse) 126 | 127 | async def _method2( 128 | self, 129 | texts: List[str], 130 | answers: List[str], 131 | answer_groups: List[List[str]], 132 | reverse: bool, 133 | ) -> str: 134 | """ 135 | Returns the answers with the best number of occurrences of the answer's keywords in texts. 136 | :param texts: List of webpages (strings) to analyze 137 | :param answers: List of answers 138 | :param answer_groups: Groupings of different ways of writing the answer 139 | :param reverse: True if the best answer occurs the least, False otherwise 140 | :return: Answer that occurs the most/least in the texts, empty string if there is a tie 141 | """ 142 | self.logger.info("Running method 2") 143 | 144 | counts = {answer: 0 for answer in answers} 145 | for text in texts: 146 | for answer in answers: 147 | for keyword in self.find_keywords(answer, sentences=False): 148 | counts[answer] += text.count(f" {keyword.lower()} ") 149 | 150 | self.logger.info(counts) 151 | return self.__get_best_answer(counts, answer_groups, reverse) 152 | 153 | def find_keywords(self, text: str, sentences: bool = True) -> List[str]: 154 | """ 155 | Returns the keywords from a string containing text, in the order they appear. 156 | Keywords: 157 | - Words within quotes 158 | - Consecutively capitalized words 159 | - Words that aren't stopwords 160 | :param text: Text to analyze 161 | :param sentences: Whether or not text is comprised of sentences 162 | :return: List of keywords of text 163 | """ 164 | keyword_indices = {} 165 | 166 | if sentences: 167 | # Remove capitalization at start of sentences 168 | sent_tokenized = nltk.tokenize.sent_tokenize(text) 169 | text = " ".join( 170 | sentence[0].lower() + sentence[1:] for sentence in sent_tokenized 171 | ) 172 | 173 | # Remove all punctuation except quotes 174 | text = text.translate( 175 | str.maketrans({key: None for key in set(string.punctuation) - {'"', "'"}}) 176 | ) 177 | 178 | # If a match is encountered: 179 | # Add entry to keyword_indices 180 | # Return string containing spaces of same length as the match to replace match with 181 | def process_match(match: Match[str]): 182 | keyword_indices[match[1]] = match.start() 183 | return " " * len(match[0]) 184 | 185 | # Find words in quotes and replace words in quotes with whitespace 186 | # of same length to avoid matching words multiple times 187 | text = re.sub('"([^"]*)"', process_match, text) 188 | 189 | # Find and replace consecutively capitalized words (includes single 190 | # apostrophe to match possessives). Slightly modified from this accepted answer: 191 | # https://stackoverflow.com/a/9526027/6686559 192 | text = re.sub( 193 | r"([A-Z][a-z]+(?=\s[A-Z])(?:\s[A-Z][a-z']+)+)", process_match, text 194 | ) 195 | 196 | # Find remaining words that are not stopwords 197 | for m in re.finditer(r"\S+", text): 198 | if m[0] not in self.stopwords: 199 | keyword_indices[m[0]] = m.start() 200 | 201 | # Return keywords, sorted by index of occurrence 202 | keywords = list(sorted(keyword_indices, key=keyword_indices.get)) 203 | # TODO: handle plural and singular, see test_question_handler.py 204 | return keywords 205 | 206 | @staticmethod 207 | def __get_best_answer( 208 | all_scores: Dict, choice_groups: List[List[str]], reverse: bool = False 209 | ): 210 | """ 211 | Returns best answer based on scores for each choice and groups of choices. 212 | :param all_scores: Dict mapping choices to scores 213 | :param choice_groups: List of lists (groups) of choices 214 | :param reverse: If True, return lowest scoring choice group, otherwise return highest 215 | :return: String (first entry in group) of the group with the highest/lowest total score 216 | """ 217 | # Add scores of the same answer together due to two ways of removing punctuation 218 | scores = { 219 | choices[0]: sum(all_scores[choice] for choice in choices) 220 | for choices in choice_groups 221 | } 222 | 223 | best_value = min(scores.values()) if reverse else max(scores.values()) 224 | 225 | # Make sure the scores are not all 0 and the best value doesn't occur more than once 226 | if ( 227 | not all(c == 0 for c in scores.values()) 228 | and list(scores.values()).count(best_value) == 1 229 | ): 230 | return ( 231 | min(scores, key=scores.get) if reverse else max(scores, key=scores.get) 232 | ) 233 | return "" 234 | -------------------------------------------------------------------------------- /hackq_trivia/searcher.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from html import unescape 4 | from typing import Iterable, List 5 | 6 | import aiohttp 7 | import bs4 8 | from anyascii import anyascii 9 | 10 | from hackq_trivia.config import config 11 | 12 | 13 | class InvalidSearchServiceError(Exception): 14 | """Raise when search service specified in config is not recognized.""" 15 | 16 | 17 | class Searcher: 18 | HEADERS = {"User-Agent": "HQbot"} 19 | BING_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search" 20 | GOOGLE_ENDPOINT = "https://www.googleapis.com/customsearch/v1" 21 | 22 | def __init__(self): 23 | self.timeout = config.getfloat("CONNECTION", "Timeout") 24 | self.search_service = config.get("SEARCH", "Service") 25 | 26 | bing_api_key = config.get("SEARCH", "BingApiKey") 27 | self.bing_headers = {"Ocp-Apim-Subscription-Key": bing_api_key} 28 | 29 | self.google_cse_id = config.get("SEARCH", "GoogleCseId") 30 | self.google_api_key = config.get("SEARCH", "GoogleApiKey") 31 | 32 | # don't use default headers for Bing search so searcher tests 33 | # can run get_bing_links/get_google_links on its own 34 | # without depending on search_service being set correctly 35 | self.search_session = aiohttp.ClientSession() 36 | 37 | if self.search_service == "Bing": 38 | self.search_func = self.get_bing_links 39 | elif self.search_service == "Google": 40 | self.search_func = self.get_google_links 41 | else: 42 | raise InvalidSearchServiceError( 43 | f"Search service type {self.search_service} was not recognized." 44 | ) 45 | 46 | client_timeout = aiohttp.ClientTimeout(total=self.timeout) 47 | self.fetch_session = aiohttp.ClientSession( 48 | headers=Searcher.HEADERS, timeout=client_timeout 49 | ) 50 | self.logger = logging.getLogger(__name__) 51 | 52 | async def close(self) -> None: 53 | await self.fetch_session.close() 54 | await self.search_session.close() 55 | 56 | async def fetch(self, url: str) -> str: 57 | try: 58 | async with self.fetch_session.get(url, timeout=self.timeout) as response: 59 | return await response.text() 60 | except asyncio.TimeoutError: 61 | self.logger.error(f"Server timeout to {url}") 62 | except Exception as e: 63 | self.logger.error(f"Server error to {url}") 64 | self.logger.error(e) 65 | 66 | return "" 67 | 68 | # no typing info for return value because https://github.com/python/typeshed/issues/2652 69 | async def fetch_multiple(self, urls: Iterable[str]): 70 | coroutines = [self.fetch(url) for url in urls] 71 | responses = await asyncio.gather(*coroutines) 72 | return responses 73 | 74 | async def get_search_links(self, query: str, num_results: int) -> List[str]: 75 | return await self.search_func(query, num_results) 76 | 77 | async def get_google_links(self, query: str, num_results: int) -> List[str]: 78 | search_params = { 79 | "key": self.google_api_key, 80 | "cx": self.google_cse_id, 81 | "q": query, 82 | "num": num_results, 83 | } 84 | 85 | async with self.search_session.get( 86 | self.GOOGLE_ENDPOINT, params=search_params 87 | ) as resp: 88 | resp_status = resp.status 89 | resp_data = await resp.json() 90 | 91 | if resp_status != 200: 92 | logging.error(f"Google search failed with status code {resp_status}") 93 | logging.error(resp_data) 94 | return [] 95 | 96 | self.logger.debug(f"google: {query}, n={num_results}") 97 | self.logger.debug(resp_data) 98 | 99 | return [item["link"] for item in resp_data["items"]] 100 | 101 | async def get_bing_links(self, query: str, num_results: int) -> List[str]: 102 | # why does Bing consistently deliver 1 fewer result than requested? 103 | search_params = {"q": query, "count": num_results + 1} 104 | 105 | async with self.search_session.get( 106 | self.BING_ENDPOINT, params=search_params, headers=self.bing_headers 107 | ) as resp: 108 | resp_status = resp.status 109 | resp_data = await resp.json() 110 | 111 | if resp_status != 200: 112 | logging.error(f"Bing search failed with status code {resp_status}") 113 | logging.error(resp_data) 114 | return [] 115 | 116 | self.logger.debug(f"bing: {query}, n={num_results}") 117 | self.logger.debug(resp_data) 118 | 119 | return [item["url"] for item in resp_data["webPages"]["value"]] 120 | 121 | @staticmethod 122 | def html_to_visible_text(html): 123 | soup = bs4.BeautifulSoup(html, features="html.parser") 124 | for s in soup(["style", "script", "[document]", "head", "title"]): 125 | s.extract() 126 | 127 | return anyascii(unescape(soup.get_text())).lower() 128 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | colorama~=0.4.4 2 | requests~=2.25.1 3 | aiohttp~=3.7.4 4 | beautifulsoup4~=4.9.3 5 | nltk~=3.5 6 | anyascii~=0.1.7 7 | pyjwt~=2.0.1 -------------------------------------------------------------------------------- /resources/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/resources/1.png -------------------------------------------------------------------------------- /resources/hackq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/resources/hackq.png -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Exaphis/HackQ-Trivia/e42314e63295d53018471514a46ea7febea6db19/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_logger.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import logging 3 | 4 | from hackq_trivia.hq_main import init_root_logger 5 | 6 | 7 | class MyTestCase(unittest.TestCase): 8 | def setUp(self) -> None: 9 | init_root_logger() 10 | self.logger = logging.getLogger(__name__) 11 | 12 | def test_emojis(self): 13 | self.logger.info("👁 👃🏾👄👁") 14 | 15 | 16 | if __name__ == "__main__": 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /tests/test_question_handler.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import unittest 3 | 4 | from hackq_trivia.question_handler import QuestionHandler 5 | 6 | 7 | class MyTestCase(unittest.TestCase): 8 | async def setUpAsync(self): 9 | self.qh = QuestionHandler() 10 | 11 | def setUp(self) -> None: 12 | self.loop = asyncio.get_event_loop() 13 | self.loop.run_until_complete(self.setUpAsync()) 14 | 15 | def tearDown(self) -> None: 16 | self.loop.run_until_complete(self.qh.close()) 17 | 18 | def test_find_keywords_consecutive_capitals(self): 19 | self.assertEqual( 20 | self.qh.find_keywords("Do you love Nathaniel Hawthorne's books?"), 21 | ["love", "Nathaniel Hawthorne's", "books"], 22 | ) 23 | 24 | def test_find_keywords_quotations(self): 25 | self.assertEqual( 26 | self.qh.find_keywords('I do love "The Scarlet Letter".'), 27 | ["love", "The Scarlet Letter"], 28 | ) 29 | 30 | def test_answer_question(self): 31 | self.loop.run_until_complete( 32 | self.qh.answer_question( 33 | "What is the word for a landmass like Florida that is " 34 | "surrounded on three sides by water?", 35 | ["Peninsula", "Piñata", "Trifecta"], 36 | ) 37 | ) 38 | 39 | 40 | if __name__ == "__main__": 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /tests/test_question_handler_answers.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from hackq_trivia.question_handler import QuestionHandler 4 | from hackq_trivia.hq_main import init_root_logger 5 | 6 | 7 | async def test(): 8 | qh = QuestionHandler() 9 | # fails because all pages say foot/footwear instead of feet 10 | # await qh.answer_question('In the 19th century, where were spats typically worn?', 11 | # ['Ears', 'Arms', 'Feet']) 12 | 13 | # await qh.answer_question('Which of these games is played on a court?', 14 | # ['Basketball', 'Super Mario Kart', 'Uno']) 15 | 16 | # for is removed as a stopword 17 | await qh.answer_question( 18 | "What do NEITHER of the N's in CNN stand for?", ["News", "Netflix", "Network"] 19 | ) 20 | await qh.close() 21 | 22 | 23 | if __name__ == "__main__": 24 | init_root_logger() 25 | 26 | asyncio.run(test()) 27 | -------------------------------------------------------------------------------- /tests/test_searcher.py: -------------------------------------------------------------------------------- 1 | import json 2 | import unittest 3 | from urllib.parse import urlparse 4 | import warnings 5 | 6 | from hackq_trivia.searcher import Searcher 7 | 8 | 9 | class SearcherFetchTest(unittest.IsolatedAsyncioTestCase): 10 | async def asyncSetUp(self) -> None: 11 | self._searcher = Searcher() 12 | 13 | async def asyncTearDown(self) -> None: 14 | await self._searcher.close() 15 | 16 | async def test_fetch_single(self): 17 | resp = await self._searcher.fetch("http://httpbin.org/user-agent") 18 | resp = json.loads(resp) 19 | self.assertEqual(resp["user-agent"], Searcher.HEADERS["User-Agent"]) 20 | 21 | async def test_fetch_multiple(self): 22 | resps = await self._searcher.fetch_multiple( 23 | ["http://httpbin.org/user-agent"] * 5 24 | ) 25 | self.assertEqual(len(resps), 5) 26 | for resp in resps: 27 | resp = json.loads(resp) 28 | self.assertEqual(resp["user-agent"], Searcher.HEADERS["User-Agent"]) 29 | 30 | async def test_fetch_error(self): 31 | with self.assertLogs() as log_cm: 32 | await self._searcher.fetch("http://aaaa.aaa") 33 | self.assertIn( 34 | "ERROR:hackq_trivia.searcher:Server error to http://aaaa.aaa", log_cm.output 35 | ) 36 | 37 | async def test_fetch_delay(self): 38 | max_timeout = self._searcher.timeout 39 | fail_url = f"http://httpbin.org/delay/{max_timeout + 1}" 40 | 41 | with self.assertLogs() as log_cm: 42 | resps = await self._searcher.fetch_multiple( 43 | ["http://httpbin.org/delay/0", fail_url] 44 | ) 45 | self.assertTrue(resps[0]) 46 | self.assertFalse(resps[1]) 47 | 48 | self.assertEqual( 49 | [f"ERROR:hackq_trivia.searcher:Server timeout to {fail_url}"], log_cm.output 50 | ) 51 | 52 | 53 | class SearcherSearchEngineTest(unittest.IsolatedAsyncioTestCase): 54 | async def asyncSetUp(self) -> None: 55 | self._searcher = Searcher() 56 | 57 | async def asyncTearDown(self) -> None: 58 | await self._searcher.close() 59 | 60 | def setUp(self) -> None: 61 | # google-api-python-client raises benign ResourceWarnings, ignore for now 62 | warnings.simplefilter("ignore", ResourceWarning) 63 | 64 | async def test_get_google_links(self): 65 | links = await self._searcher.get_google_links("test test test test", 5) 66 | print("Google links:") 67 | for link in links: 68 | print(link) 69 | parsed = urlparse(link) 70 | self.assertTrue(all((parsed.scheme, parsed.netloc))) 71 | self.assertEqual(len(links), 5) 72 | 73 | async def test_get_bing_links(self): 74 | links = await self._searcher.get_bing_links("test test test test", 5) 75 | print("Bing links:") 76 | for link in links: 77 | print(link) 78 | parsed = urlparse(link) 79 | self.assertTrue(all((parsed.scheme, parsed.netloc))) 80 | self.assertEqual(len(links), 5) 81 | 82 | 83 | if __name__ == "__main__": 84 | unittest.main() 85 | --------------------------------------------------------------------------------