├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── custom-response-request.md ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── bot ├── __init__.py ├── account.py └── worker.py ├── config.py ├── parsers ├── __init__.py ├── css_parser.py └── wiki_parser.py ├── requirements.txt ├── run.py ├── setup.py ├── test.py ├── tests ├── test_bot.py └── test_wiki_parser.py └── util ├── __init__.py ├── caching ├── __init__.py ├── caching.py ├── db_cache.py ├── memory_cache.py └── redis_cache.py ├── database ├── __init__.py ├── database.py └── models.py ├── logger.py ├── response_info.py ├── response_request.py └── str_utils.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: MePsyDuck 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Link to the comment** 14 | If the bug happened in a comment, provide a link to it. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Additional context** 20 | Add any other context about the problem here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom-response-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom response request 3 | about: Suggest an custom response for this bot 4 | title: '' 5 | labels: enhancement 6 | assignees: MePsyDuck 7 | 8 | --- 9 | 10 | **Describe the custom response you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Link to the response** 14 | Link to the response/wiki page. 15 | 16 | **Custom format** 17 | Format for the response, if any. 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | # User-specific stuff 7 | .idea/**/tasks.xml 8 | .idea/**/usage.statistics.xml 9 | .idea/**/dictionaries 10 | .idea/**/shelf 11 | 12 | # Sensitive or high-churn files 13 | .idea/**/dataSources/ 14 | .idea/**/dataSources.ids 15 | .idea/**/dataSources.local.xml 16 | .idea/**/sqlDataSources.xml 17 | .idea/**/dynamic.xml 18 | .idea/**/uiDesigner.xml 19 | .idea/**/dbnavigator.xml 20 | 21 | # Gradle 22 | .idea/**/gradle.xml 23 | .idea/**/libraries 24 | 25 | # Gradle and Maven with auto-import 26 | # When using Gradle or Maven with auto-import, you should exclude module files, 27 | # since they will be recreated, and may cause churn. Uncomment if using 28 | # auto-import. 29 | # .idea/modules.xml 30 | # .idea/*.iml 31 | # .idea/modules 32 | 33 | # CMake 34 | cmake-build-*/ 35 | 36 | # Mongo Explorer plugin 37 | .idea/**/mongoSettings.xml 38 | 39 | # File-based project format 40 | *.iws 41 | 42 | # IntelliJ 43 | out/ 44 | 45 | # mpeltonen/sbt-idea plugin 46 | .idea_modules/ 47 | 48 | # JIRA plugin 49 | atlassian-ide-plugin.xml 50 | 51 | # Cursive Clojure plugin 52 | .idea/replstate.xml 53 | 54 | # Crashlytics plugin (for Android Studio and IntelliJ) 55 | com_crashlytics_export_strings.xml 56 | crashlytics.properties 57 | crashlytics-build.properties 58 | fabric.properties 59 | 60 | # Editor-based Rest Client 61 | .idea/httpRequests 62 | ### Example user template template 63 | ### Example user template 64 | 65 | # IntelliJ project files 66 | .idea 67 | out 68 | gen### Python template 69 | # Byte-compiled / optimized / DLL files 70 | __pycache__/ 71 | *$py.class 72 | 73 | # C extensions 74 | *.so 75 | 76 | # Distribution / packaging 77 | .Python 78 | build/ 79 | develop-eggs/ 80 | dist/ 81 | downloads/ 82 | eggs/ 83 | .eggs/ 84 | lib/ 85 | lib64/ 86 | parts/ 87 | sdist/ 88 | var/ 89 | wheels/ 90 | *.egg-info/ 91 | .installed.cfg 92 | MANIFEST 93 | 94 | # PyInstaller 95 | # Usually these files are written by a python script from a template 96 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 97 | *.manifest 98 | *.spec 99 | 100 | # Installer logs 101 | pip-log.txt 102 | pip-delete-this-directory.txt 103 | 104 | # Unit test / coverage reports 105 | htmlcov/ 106 | .tox/ 107 | .coverage 108 | .coverage.* 109 | .cache 110 | nosetests.xml 111 | coverage.xml 112 | *.cover 113 | .hypothesis/ 114 | .pytest_cache/ 115 | 116 | # Translations 117 | *.mo 118 | *.pot 119 | 120 | # Django stuff: 121 | *.log 122 | local_settings.py 123 | db.sqlite3 124 | 125 | # Flask stuff: 126 | instance/ 127 | .webassets-cache 128 | 129 | # Scrapy stuff: 130 | .scrapy 131 | 132 | # Sphinx documentation 133 | docs/_build/ 134 | 135 | # PyBuilder 136 | target/ 137 | 138 | # Jupyter Notebook 139 | .ipynb_checkpoints 140 | 141 | # pyenv 142 | .python-version 143 | 144 | # celery beat schedule file 145 | celerybeat-schedule 146 | 147 | # SageMath parsed files 148 | *.sage.py 149 | 150 | # Environments 151 | .env 152 | .venv 153 | env/ 154 | venv/ 155 | ENV/ 156 | env.bak/ 157 | venv.bak/ 158 | 159 | # Spyder project settings 160 | .spyderproject 161 | .spyproject 162 | 163 | # Rope project settings 164 | .ropeproject 165 | 166 | # mkdocs documentation 167 | /site 168 | 169 | # mypy 170 | .mypy_cache/ 171 | 172 | # bot sqlite local db 173 | bot.db 174 | 175 | # bot sqlite local db 176 | cache.json 177 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.6.4" 5 | 6 | before_install: 7 | - sudo apt-get update 8 | 9 | install: 10 | - pip install codecov 11 | - pip install -r requirements.txt 12 | 13 | virtualenv: 14 | system_site_package: true 15 | 16 | script: 17 | - coverage run test.py 18 | 19 | after_success: 20 | - codecov 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Jonarzz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Dota Responses Reddit Bot 2 | [![Python 3.6.4](https://img.shields.io/badge/python-3.6.4-blue.svg)](https://www.python.org/downloads/release/python-364/) 3 | [![Build Status](https://api.travis-ci.org/Jonarzz/DotaResponsesRedditBot.svg?branch=master)](https://travis-ci.org/Jonarzz/DotaResponsesRedditBot) 4 | [![Maintainability](https://api.codeclimate.com/v1/badges/de2c724018076b34064f/maintainability)](https://codeclimate.com/github/Jonarzz/DotaResponsesRedditBot/maintainability) 5 | [![codecov](https://codecov.io/gh/Jonarzz/DotaResponsesRedditBot/branch/master/graph/badge.svg)](https://codecov.io/gh/Jonarzz/DotaResponsesRedditBot) 6 | 7 | > Bot adding reply comments with links to appropriate responses found on [/r/dota2](https://www.reddit.com/r/DotA2). 8 | 9 | ![Example](https://i.imgur.com/PAcg57z.png) 10 | 11 | The bot replies only for the comments that **are** responses. 12 | 13 | For example: `"Selemene commands"` will return a Luna response *(like on the screenshot above)*. 14 | 15 | **The original bot account is no longer active, but over 4 years of running [it gained over 100,000 karma points on Reddit](https://www.reddit.com/user/dota_responses_bot/comments/?t=all&sort=top)**. 16 | **Currently the bot's account is [dota2_responses_bot](https://www.reddit.com/user/dota2_responses_bot)**. 17 | 18 | ### Active maintainer 19 | [MePsyDuck](https://github.com/MePsyDuck) 20 | 21 | ### Comment/Submission text processing 22 | * All the body text is transformed into lowercase 23 | * Any punctuation is replaced with spaces 24 | * Multiple spaces are trimmed to single space. 25 | * If comment has blockquote, first blockquote is considered for matching. 26 | 27 | 30 | 33 | 34 | --- 35 | ### TODO 36 | * (If possible) Add hero flair in responses (Waiting on reddit to support this in reddit redesign). 37 | 38 | --- 39 | ### Some stats and general info 40 | [/r/dota2](https://www.reddit.com/r/DotA2) subreddit generates around 3.5k comments/day, 41 | peaking around 12.5k during December (stats via [subbreditstats](https://subredditstats.com/r/dota2). 42 | Bot should be able to handle more than 15k comments/day(10 comments/minute) easily (Just an estimate, actual performance not yet tested). 43 | 44 | --- 45 | ### Environment variables 46 | Config variables needed to be set in environment for running the bot: 47 | 48 | | Variable | Required? | Default | Description | 49 | |-------------------|-----------|--------------|--------------------------------------------------------------------------------------------------------| 50 | | CLIENT_ID | Required | None. | `client_id` generated by Reddit. | 51 | | CLIENT_SECRET | Required | None. | `secret` generated by Reddit. | 52 | | SUBREDDIT | Optional | `dota2` | Subreddit the bot is going to work on. | 53 | | REDDIT_USERNAME | Required | None. | Username for the Reddit account being used. | 54 | | REDDIT_PASSWORD | Required | None. | Password for the Reddit account being used. | 55 | | CACHE_PROVIDER | Optional | `memory` | Caching module to be used. Valid choices : `redis`, `memory`, `db`. | 56 | | CACHE_URL | Optional | `cache.json` | URL path to redis instance/database/file in memory. Based on `CACHE_PROVIDER`. | 57 | | DATABASE_PROVIDER | Optional | `sqlite` | DBMS to be used. Valid choices : `sqlite`, `mysql`, `postgres` | 58 | | DATABASE_URL | Optional | `bot.db` | URL to the database. | 59 | | LOGGING_LEVEL | Optional | `INFO` | Logging level. Valid choices : [Logging levels](https://docs.python.org/3/library/logging.html#levels) | 60 | 61 | --- 62 | ### Changelog 63 | 64 | #### 3.1 65 | * User(OP) can now request to update the response using another comment under bot's comment. 66 | The comment should be in the format ```Try ``` 67 | * Users can now request for a hero specific response by adding ``` ::``` prefix to the response. 68 | Has more priority than user's flair. 69 | 70 | #### 3.0 71 | Major revamp for the bot. 72 | Things that are new: 73 | * Bot can reply to responses that are in blockquotes and ignore rest of comment. 74 | * Added support for TI chat wheel sounds. 75 | * Comment on post submission if title is a response. 76 | 77 | Things updated: 78 | * Support sqlite, MySQL and PostgreSQL dbs via Pony-ORM. 79 | * Added caching for comment ids (redis, db and in memory/file based). 80 | * Revamped parsing of responses from wiki (now directly from the sources). 81 | * Revamped parsing flair css and image directories from subreddit css. 82 | * Better parsing for comments. 83 | * Added better support for custom responses. 84 | * Updated excluded responses. 85 | * Updated docs. 86 | * Updated tests. 87 | 88 | #### 2.7 89 | * Now hero portraits (flairs) are added before the response 90 | * The bot tries to match the hero response with the hero in the comment's author flair first 91 | 92 | #### 2.6 93 | * Added a few Io and Phoenix responses 94 | 95 | #### 2.5 96 | * Fixed random responses for the "Shitty wizard" line - now it's working properly 97 | * Added special treatment for the comments related to "One of my favorites" response 98 | * Moved from a dictionary for responses and a list for comment ids to databases 99 | 100 | #### 2.4 101 | * Added random responses for the "Shitty wizard" line (needs testing) 102 | 103 | #### 2.3 104 | * Added tests 105 | * Code refactoring 106 | * Added Travis CI and CodeClimate checking 107 | 108 | #### 2.2 109 | * Code refactoring (renaming variables, deleting unnecessary methods, etc.) 110 | * Response in the reply is now an exact quote of the original comment 111 | * Added comments 112 | 113 | #### 2.1 114 | * Bot is now adding the source of the response (e.g. hero name) to the comment *(needs testing)* 115 | 116 | #### 2.0 117 | * Added Arc Warden responses 118 | 119 | #### 1.9 120 | * File paths are now relative to the script file location (using os) 121 | * Added dates to logging 122 | * Logs are saved in respective files on the server 123 | * Fixed a bug with adding same comment a few times in sticky threads that are on the subreddit for a long time (time-saving workaround) 124 | 125 | #### 1.8 126 | * Change in the main loop of the script - much better efficiency (time) 127 | 128 | #### 1.7 129 | * Changed reply comment formatting 130 | 131 | #### 1.6 132 | * Removed keyword triggering as /r/dota2 community did not like it 133 | 134 | #### 1.5 135 | * Added keywords that trigger the bot: "just do it", "beautiful", "from the ghastly eyrie", "sniper", "ppd", "leafeator", "ur/your brother" 136 | * Code refactoring 137 | 138 | #### 1.4 139 | * Fixed a bug created by 1.2 changes 140 | * Changed submissions number in hot to 25 141 | * Added more excluded responses 142 | 143 | #### 1.3 144 | * Removed responses such as "thank you", hero names and item names (anti-spam) 145 | 146 | #### 1.2 147 | * Accept comments with extra letters added for emphasis 148 | 149 | #### 1.1 150 | * One word responses are no longer in the dictionary 151 | * Replaced double spaces with single space 152 | * Bot is now working with hot submissions 153 | 154 | --- 155 | ### Treeware License 156 | Basically MIT License, but if you use the code (learning or project purposes), you have to plant at least one tree at some future time. 157 | -------------------------------------------------------------------------------- /bot/__init__.py: -------------------------------------------------------------------------------- 1 | from bot.account import * 2 | from bot.worker import * 3 | 4 | __all__ = ['account', 'worker'] 5 | -------------------------------------------------------------------------------- /bot/account.py: -------------------------------------------------------------------------------- 1 | """Module used to configure the connection to the Reddit API. 2 | Removed old Code Flow auth in favor of Password Flow auth. 3 | Reason: https://www.reddit.com/r/redditdev/comments/5fxlk8/praw_refresh_tokens/dantjyk/ 4 | """ 5 | 6 | import praw 7 | 8 | import config 9 | 10 | __author__ = 'Jonarzz' 11 | __maintainer__ = 'MePsyDuck' 12 | 13 | 14 | def get_account(): 15 | """Method that provides the connection to Reddit API using OAuth. 16 | :return: Reddit instance. 17 | """ 18 | return praw.Reddit(client_id=config.CLIENT_ID, 19 | client_secret=config.CLIENT_SECRET, 20 | user_agent=config.USER_AGENT, 21 | username=config.USERNAME, 22 | password=config.PASSWORD) 23 | -------------------------------------------------------------------------------- /bot/worker.py: -------------------------------------------------------------------------------- 1 | """Main module of the Dota 2 subreddit Responses Bot. 2 | 3 | The main body of the script is running in this file. The comments are loaded from the subreddit 4 | and the script checks if the comment or submission is a response from Dota 2. If it is, a proper reply for response is 5 | prepared. The response is posted as a reply to the original comment/submission on Reddit. 6 | """ 7 | import time 8 | 9 | from praw.exceptions import APIException 10 | from praw.models import Comment 11 | from prawcore import ServerError 12 | 13 | import config 14 | from bot import account 15 | from util.caching import get_cache_api 16 | from util.database.database import db_api 17 | from util.logger import logger 18 | from util.response_info import ResponseInfo 19 | from util.str_utils import preprocess_text 20 | 21 | __author__ = 'Jonarzz' 22 | __maintainer__ = 'MePsyDuck' 23 | 24 | cache_api = get_cache_api() 25 | 26 | 27 | def work(): 28 | """Main method executing the script. 29 | 30 | It connects to an account, loads dictionaries from proper files (declared in config file). 31 | Afterwards it executes process_comments method with proper arguments passed. 32 | """ 33 | 34 | reddit = account.get_account() 35 | logger.info('Connected to Reddit account : ' + config.USERNAME) 36 | 37 | comment_stream, submission_stream = get_reddit_stream(reddit) 38 | while True: 39 | try: 40 | for comment in comment_stream: 41 | if comment is None: 42 | break 43 | process_replyable(reddit, comment) 44 | for submission in submission_stream: 45 | if submission is None: 46 | break 47 | process_replyable(reddit, submission) 48 | except ServerError as e: 49 | comment_stream, submission_stream = get_reddit_stream(reddit) 50 | logger.critical("Reddit server is down : " + str(e)) 51 | time.sleep(120) 52 | except APIException as e: 53 | comment_stream, submission_stream = get_reddit_stream(reddit) 54 | logger.critical("API Exception occurred : " + str(e)) 55 | time.sleep(60) 56 | 57 | 58 | def get_reddit_stream(reddit): 59 | """Returns the comment and submission stream. 60 | Streams need to be restarted/re-obtained when they throw exception. 61 | 62 | :param reddit: The reddit account instance 63 | :return: The comment and subreddit stream 64 | """ 65 | comment_stream = reddit.subreddit(config.SUBREDDIT).stream.comments(pause_after=-1) 66 | submission_stream = reddit.subreddit(config.SUBREDDIT).stream.submissions(pause_after=-1) 67 | return comment_stream, submission_stream 68 | 69 | 70 | def process_replyable(reddit, replyable): 71 | """Method used to check all the comments in a submission and add replies if they are responses. 72 | 73 | PRAW generates past ~100 comments/submissions on the first iteration. Then the loop only runs if there is a new 74 | comment/submission added to the stream. This also means that once PRAW is up and running, after the initial comments 75 | list it won't generate any duplicate comments. 76 | 77 | However, just as a safeguard, Caching is used to store replyable ids as they are processed for the first time. 78 | Otherwise, when the bot is restarted it might reply twice to same comments. If replyable id is in the already present 79 | in the cache_api, then it is ignored, else processed and added to the cache_api. 80 | * Self comments are ignored. 81 | * It is prepared for comparison to the responses in dictionary. 82 | * If the replyable is not on the excluded responses list (loaded from config) and if it is in the responses db or 83 | specific responses list, a reply replyable is prepared and posted. 84 | 85 | :param reddit: The reddit account instance 86 | :param replyable: comment or submission 87 | :return: None 88 | """ 89 | 90 | if cache_api.exists(thing_id=replyable.fullname): 91 | return 92 | 93 | # Ignore thyself 94 | if replyable.author == reddit.user.me(): 95 | return 96 | 97 | logger.info("Found new replyable: " + replyable.fullname) 98 | 99 | processed_text = process_text(replyable.body if isinstance(replyable, Comment) else replyable.title) 100 | 101 | # TODO make use of assignment expression for all below 102 | if is_excluded_response(processed_text): 103 | pass 104 | elif is_custom_response(processed_text): 105 | add_custom_reply(replyable, processed_text) 106 | elif (response_info := is_hero_specific_response(processed_text)) is not None: 107 | add_hero_specific_reply(replyable, response_info) 108 | elif (response_info := is_flair_specific_response(replyable, processed_text)) is not None: 109 | add_flair_specific_reply(replyable, response_info) 110 | elif (response_info := is_update_request(reddit, replyable, processed_text)) is not None: 111 | update_reply(replyable, response_info) 112 | elif (response_info := is_hero_response(processed_text)) is not None: 113 | add_regular_reply(replyable, response_info) 114 | 115 | 116 | def process_text(text): 117 | """Method used to clean the replyable body/title text. 118 | If text contains a quote, the first quote text is considered as the text. 119 | 120 | :param text: The replyable body/title text 121 | :return: Processed text 122 | """ 123 | hero_name = None 124 | if '>' in text: 125 | text = get_quoted_text(text) 126 | if '::' in text: 127 | hero_name, text = text.split('::', 1) 128 | hero_name = hero_name.strip() + '::' 129 | 130 | return (hero_name or '') + preprocess_text(text) 131 | 132 | 133 | def get_quoted_text(text): 134 | """Method used to get quoted text. 135 | If body/title text contains a quote, the first quote is considered as the text. 136 | 137 | :param text: The replyable text 138 | :return: The first quote in the text. If no quotes are found, then the entire text is returned 139 | """ 140 | lines = text.split('\n\n') 141 | for line in lines: 142 | if line.startswith('>'): 143 | return line[1:] 144 | return text 145 | 146 | 147 | def is_excluded_response(text): 148 | """Method to check if the given body/title is in excluded responses set. 149 | Also return False for single word text (they're mostly common phrases). 150 | 151 | :param text: The processed body/title text 152 | :return: True if text is an excluded response, else False 153 | """ 154 | return ' ' not in text or text in config.EXCLUDED_RESPONSES 155 | 156 | 157 | def is_custom_response(text): 158 | """Method to check if given body/title text is in custom response set. 159 | 160 | :param text: The body/title text 161 | :return: True if text is a custom response, else False 162 | """ 163 | return text in config.CUSTOM_RESPONSES 164 | 165 | 166 | def add_custom_reply(replyable, body): 167 | """Method to create a custom reply for specific cases that match the custom responses set. 168 | 169 | :param replyable: The comment/submission on reddit 170 | :param body: The processed body/title text 171 | :return: None 172 | """ 173 | custom_response = config.CUSTOM_RESPONSES[body] 174 | original_text = replyable.body if isinstance(replyable, Comment) else replyable.title 175 | 176 | reply = custom_response.format(original_text, config.COMMENT_ENDING) 177 | replyable.reply(reply) 178 | logger.info("Replied to: " + replyable.fullname) 179 | 180 | 181 | def is_hero_specific_response(text): 182 | """Method that checks if response for specified hero name and text exists. 183 | 184 | :param text: The processed body/title text 185 | :return: ResponseInfo containing hero_id and link for response if the response for specified hero was found, otherwise None 186 | """ 187 | if '::' in text: 188 | hero_name, text = text.split('::', 1) 189 | 190 | if not hero_name or not text: 191 | return None 192 | 193 | hero_id = db_api.get_hero_id_by_name(hero_name=hero_name) 194 | if hero_id: 195 | link, _ = db_api.get_link_for_response(processed_text=text, hero_id=hero_id) 196 | if link: 197 | return ResponseInfo(hero_id=hero_id, link=link) 198 | return None 199 | 200 | 201 | def add_hero_specific_reply(replyable, response_info): 202 | """Method to add a hero specific reply to the comment/submission. 203 | 204 | :param replyable: The comment/submission on reddit 205 | :param response_info: ResponseInfo containing hero_id and link for response 206 | :return: None 207 | """ 208 | create_and_add_reply(replyable=replyable, response_url=response_info.link, hero_id=response_info.hero_id) 209 | 210 | 211 | def is_flair_specific_response(replyable, text): 212 | """Method that checks if response for hero in author's flair and text exists. 213 | 214 | :param replyable: The comment/submission on reddit 215 | :param text: The processed body/title text 216 | :return: ResponseInfo containing hero_id and link for response if the response for author's flair's hero was found, otherwise None 217 | """ 218 | hero_id = db_api.get_hero_id_by_flair_css(flair_css=replyable.author_flair_css_class) 219 | if hero_id: 220 | link, _ = db_api.get_link_for_response(processed_text=text, hero_id=hero_id) 221 | if link: 222 | return ResponseInfo(hero_id=hero_id, link=link) 223 | return None 224 | 225 | 226 | def add_flair_specific_reply(replyable, response_info): 227 | """Method to add a author's flair specific reply to the comment/submission. 228 | 229 | :param replyable: The comment/submission on reddit 230 | :param response_info: ResponseInfo containing hero_id and link for response 231 | :return: None 232 | """ 233 | create_and_add_reply(replyable=replyable, response_url=response_info.link, hero_id=response_info.hero_id) 234 | 235 | 236 | def is_update_request(reddit, replyable, text): 237 | """Method to check whether the comment is a request to update existing response. 238 | Only works if 239 | * Comment begins with "try" 240 | * Comment ends with valid hero name 241 | * Given hero has the original response 242 | * Root/Original comment/submission was not hero specific response. 243 | 244 | Examples: 245 | "Try legion commander" : Valid 246 | "Try leGiOn ComManDer" : Valid - case does not matter 247 | "legion commander" : Invalid - does not begin with `try` 248 | "Try legion" : Invalid - invalid hero name 249 | 250 | :param reddit: The reddit account instance 251 | :param replyable: The comment/submission on reddit 252 | :param text: The processed body/title text 253 | :return: ResponseInfo containing hero_id and link for response if this is a valid update request, otherwise None 254 | """ 255 | 256 | if not text.startswith(config.UPDATE_REQUEST_KEYWORD): 257 | return None 258 | 259 | if not validate_update_request_comment_tree(reddit, replyable): 260 | return None 261 | 262 | hero_name = text.replace(config.UPDATE_REQUEST_KEYWORD, '', 1) 263 | hero_id = db_api.get_hero_id_by_name(hero_name=hero_name) 264 | if hero_id is None: 265 | return None 266 | 267 | root_replyable = replyable.parent().parent() 268 | processed_text = process_text(root_replyable.body if isinstance(root_replyable, Comment) else root_replyable.title) 269 | 270 | if is_hero_specific_response(processed_text): 271 | return None 272 | 273 | link, _ = db_api.get_link_for_response(processed_text=processed_text, hero_id=hero_id) 274 | 275 | if link is None: 276 | return None 277 | 278 | return ResponseInfo(hero_id=hero_id, link=link) 279 | 280 | 281 | def validate_update_request_comment_tree(reddit, replyable): 282 | """Method to check whether the comment in the request to update existing response is valid. 283 | A valid comment tree is when: 284 | * Comment was made as a reply to bot's comment 285 | * Comment was added by OP, who made the original request(Response/Comment) for the response. 286 | 287 | The comment tree should look something like below, where root(original) replyable can be Comment or Submission. 288 | Only valid case is c3. 289 | c1/s1 user: Foo 290 | c2 bot: "Foo" response by Bar hero 291 | c3 user: Try Bar2 292 | c4 other_user: Try Bar2 293 | 294 | :param reddit: The reddit account instance 295 | :param replyable: The comment/submission on reddit 296 | :return: True if this is a valid comment tree, else False 297 | """ 298 | if not isinstance(replyable, Comment): 299 | return False 300 | 301 | op = replyable.author 302 | parent_comment = replyable.parent() 303 | 304 | if not isinstance(parent_comment, Comment): 305 | return False 306 | 307 | if not parent_comment.author == reddit.user.me(): 308 | return False 309 | 310 | root_replyable = parent_comment.parent() 311 | 312 | if not root_replyable.author == op: 313 | return False 314 | 315 | return True 316 | 317 | 318 | def update_reply(replyable, response_info): 319 | """Method to edit and update existing response comment by the bot with a new hero as requested. 320 | 321 | :param replyable: The comment/submission on reddit 322 | :param response_info: ResponseInfo containing hero_id and link for response 323 | :return: None 324 | """ 325 | bot_comment = replyable.parent() 326 | root_replyable = bot_comment.parent() 327 | 328 | # TODO maybe get original text from bot's command, rather than the original post, as it might be edited by the time this command is called 329 | original_text = root_replyable.body if isinstance(root_replyable, Comment) else root_replyable.title 330 | original_text = original_text.strip() 331 | 332 | if '>' in original_text: 333 | original_text = get_quoted_text(original_text).strip() 334 | 335 | # Getting name with Proper formatting 336 | hero_name = db_api.get_hero_name(response_info.hero_id) 337 | 338 | reply = "[{}]({}) (sound warning: {}){}".format(original_text, response_info.link, hero_name, config.COMMENT_ENDING) 339 | bot_comment.edit(reply) 340 | 341 | logger.info("Updated Reply: " + replyable.fullname) 342 | 343 | 344 | def is_hero_response(text): 345 | """Method to create response for given replyable. 346 | In case of multiple matches, it used to sort responses in descending order of heroes and get the first one, 347 | but now it's random. 348 | 349 | :param text: The processed body/title text 350 | :return: ResponseInfo containing hero_id and link for response if this is a valid update request, otherwise None 351 | """ 352 | 353 | link, hero_id = db_api.get_link_for_response(processed_text=text) 354 | 355 | if link and hero_id: 356 | return ResponseInfo(hero_id=hero_id, link=link) 357 | 358 | return None 359 | 360 | 361 | def add_regular_reply(replyable, response_info): 362 | """Method to create response for given replyable. 363 | In case of multiple matches, it used to sort responses in descending order of heroes and get the first one, 364 | but now it's random. 365 | 366 | :param replyable: The comment/submission on reddit 367 | :param response_info: ResponseInfo containing hero_id and link for response 368 | :return: None 369 | """ 370 | create_and_add_reply(replyable=replyable, response_url=response_info.link, hero_id=response_info.hero_id) 371 | 372 | 373 | def create_and_add_reply(replyable, response_url, hero_id): 374 | """Method that creates a reply in reddit format and adds the reply to comment/submission. 375 | The reply consists of a link to the response audio file, the response itself, a warning about the sound 376 | and an ending added from the config file (post footer). 377 | 378 | Image is currently ignored due to new reddit redesign not rendering flairs properly. 379 | 380 | :param replyable: The comment/submission on reddit 381 | :param response_url: The url to the response audio file 382 | :param hero_id: The hero_id to which the response belongs to. 383 | :return: The text for the comment reply. 384 | """ 385 | original_text = replyable.body if isinstance(replyable, Comment) else replyable.title 386 | original_text = original_text.strip() 387 | 388 | if '>' in original_text: 389 | original_text = get_quoted_text(original_text).strip() 390 | if '::' in original_text: 391 | original_text = original_text.split('::', 1)[1].strip() 392 | 393 | hero_name = db_api.get_hero_name(hero_id) 394 | 395 | reply = "[{}]({}) (sound warning: {}){}".format(original_text, response_url, hero_name, config.COMMENT_ENDING) 396 | replyable.reply(reply) 397 | 398 | logger.info("Replied to: " + replyable.fullname) 399 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """Module in which the constants that are used by Dota Responses Bot are declared.""" 2 | import os 3 | from util.response_request import request_cargo_set 4 | 5 | __author__ = 'Jonarzz' 6 | __maintainer__ = 'MePsyDuck' 7 | 8 | # App config 9 | CLIENT_ID = os.environ.get('CLIENT_ID') 10 | CLIENT_SECRET = os.environ.get('CLIENT_SECRET') 11 | 12 | # Account config 13 | USER_AGENT = 'Python:dota2_responses_bot:v3.0 by /u/Jonarz, maintained by /u/MePsyDuck' 14 | SUBREDDIT = os.environ.get('SUBREDDIT', 'dota2') 15 | USERNAME = os.environ.get('REDDIT_USERNAME') 16 | PASSWORD = os.environ.get('REDDIT_PASSWORD') 17 | 18 | # Parser config 19 | URL_DOMAIN = 'http://dota2.gamepedia.com' 20 | API_PATH = URL_DOMAIN + '/api.php' 21 | RESPONSES_CATEGORY = 'Responses' 22 | CATEGORY_API_PARAMS = {'action': 'query', 'list': 'categorymembers', 'cmlimit': 'max', 'cmprop': 'title', 23 | 'format': 'json', 'cmtitle': ''} 24 | FILE_API_PARAMS = {'action': 'query', 'titles': '', 'prop': 'imageinfo', 'iiprop': 'url', 'format': 'json'} 25 | 26 | STYLESHEET_URL = r'https://www.reddit.com/r/dota2/about/stylesheet.json' 27 | FLAIR_REGEX = r'(?P.flair-\w+),a\[href="(?P/hero-\w+)"\]' 28 | RESPONSE_REGEX = r'\*(?P( .*?)+)(?P(.*))' 29 | CHAT_WHEEL_SECTION_REGEX = r'(=== (?PThe International \d+) ===)(?P.+?)(?=\n=== [a-z0-9 ]+ ===\n)' 30 | FILE_REGEX = r'( (?P[a-zA-Z0-9_. ]+))' 31 | 32 | # Caching config 33 | CACHE_PROVIDER = os.environ.get('CACHE_PROVIDER', 'memory') # valid choices : redis, memory, db 34 | CACHE_URL = os.environ.get('CACHE_URL', 35 | os.path.join(os.getcwd(), 'cache.json')) # file path in case of memory/file based caching 36 | 37 | # DB config 38 | DB_PROVIDER = os.environ.get('DATABASE_PROVIDER', 'sqlite') # valid choices : sqlite, mysql, postgres 39 | DB_URL = os.environ.get('DATABASE_URL', os.path.join(os.getcwd(), 'bot.db')) # file path in case of sqlite 40 | 41 | # Logging config 42 | BOT_LOGGER = 'bot' 43 | PRAW_LOGGER = 'prawcore' 44 | LOG_LEVEL = os.environ.get('LOGGING_LEVEL', 'INFO').upper() 45 | LOG_FORMAT = '%(asctime)s %(funcName)-20s %(levelname)-8s %(message)s' 46 | LOG_DIR = 'logs' 47 | INFO_FILENAME = 'info.log' 48 | ERROR_FILENAME = 'error.log' 49 | PRAW_FILENAME = 'praw.log' 50 | 51 | CACHE_TTL = 5 52 | 53 | # Responses config 54 | # TODO confirm this keyword 55 | UPDATE_REQUEST_KEYWORD = 'try ' 56 | COMMENT_ENDING = ''' 57 | 58 | --- 59 | Bleep bloop, I am a robot. *OP can reply with "Try hero_name" to update this with new hero* 60 | 61 | [*^(Source)*](https://github.com/Jonarzz/DotaResponsesRedditBot) *^(|)* 62 | [*^(Suggestions/Issues)*](https://github.com/Jonarzz/DotaResponsesRedditBot/issues/new/choose) *^(|)* 63 | [*^(Maintainer)*](https://www.reddit.com/user/MePsyDuck/) *^(|)* 64 | [*^(Author)*](https://www.reddit.com/user/Jonarz/) 65 | ''' 66 | 67 | # Key should be lowercase without special characters. Needs to be updated if links break (as links can be 68 | # non-gamepedia links too) 69 | # Value should have a placeholder for original text and replyable ending 70 | CUSTOM_RESPONSES = { 71 | 'ho ho ha ha': '[{}](https://gamepedia.cursecdn.com/dota2_gamepedia/1/17/Snip_ability_shrapnel_03.mp3)' 72 | ' (trigger warning: Sniper){}', 73 | 'turn up this guy is crazy as fuck he s gotta be on molly or some powder or something': 74 | '[{}](https://www.youtube.com/watch?v=CO3j9lUYFfo) (Donation warning: Arteezy){}' 75 | } 76 | 77 | # Only include responses for items, runes, heroes, > 100 count and common phrases. 78 | # Hardcoded because then they can tweaked according to the needs. 79 | # Drawback for this : need to update each time hero/item is added 80 | FREQUENT_RESPONSES = {'denied', 'yes', 'not yet', 'no mana', 'not enough mana', 'i m not ready', 'out of mana', 81 | 'it s not time yet', 'ah', 'no', 'uh', 'ha ha', 'attack', 'haste', 'double damage', 'immortality', 82 | 'invisibility', 'illusion', 'regeneration', 'uh uh', 'ha', } 83 | 84 | # Hero and item responses not hardcoded here 85 | HERO_NAME_RESPONSES = request_cargo_set('https://dota2.gamepedia.com/api.php?'+ 86 | 'action=cargoquery&tables=heroes&fields=title&where=game'+ 87 | '+IS+NULL&limit=500&format=json') 88 | 89 | ITEM_RESPONSES = request_cargo_set('https://dota2.gamepedia.com/api.php?'+ 90 | 'action=cargoquery&tables=items&fields='+ 91 | 'title&where=game+IS+NULL&limit=500&format=json') 92 | 93 | # Add responses here as people report them. Taken from the old excluded responses list. 94 | COMMON_PHRASE_RESPONSES = {'earth shaker', 'shut up', 'skeleton king', 'it begins', 'i am', 'exactly so', 'very nice', 95 | 'why not', 'much appreciated', 'well done', 'pit lord', 'outworld destroyer', 'I know right', 96 | 'aphotic shield', 'go outside', 'vladimir s offering', 'sheep stick', 'my bad', 97 | "you're welcome", 'holy shit', 'are you okay', 'i agree', 'thank god', 'i like it', 'no way', 98 | 'fair enough', 'it worked', 'well deserved', 'he he he', 'how so', 'oh boy', 'very good', 99 | 'about time', 'are you kidding me', 'abyssal underlord', 'so beautiful', 'nice try', 100 | 'thank you so much', 'ah, nice', 'nice one', 'eul s scepter', 'thank you', 101 | 'scepter of divinity', 'at last', 'too soon', 'try again', 'i don t think so', 'try harder', 102 | 'well said', 'of course', 'got it', 'what happened', 'hey now', 'seems fair', 'that s right', 103 | 'all pick'} 104 | 105 | EXCLUDED_RESPONSES = FREQUENT_RESPONSES | ITEM_RESPONSES | HERO_NAME_RESPONSES | COMMON_PHRASE_RESPONSES 106 | -------------------------------------------------------------------------------- /parsers/__init__.py: -------------------------------------------------------------------------------- 1 | # Named as `parsers` because `parser` will produce ImportError due to conflict with internal `parser.py` file 2 | 3 | from parsers.css_parser import * 4 | from parsers.wiki_parser import * 5 | 6 | __all__ = ['css_parser', 'wiki_parser'] 7 | -------------------------------------------------------------------------------- /parsers/css_parser.py: -------------------------------------------------------------------------------- 1 | """Module to populate hero details from DotA 2 subreddit css. 2 | """ 3 | 4 | import json 5 | import re 6 | 7 | import requests 8 | from rapidfuzz import process 9 | 10 | from config import STYLESHEET_URL, FLAIR_REGEX, USER_AGENT 11 | from util.database.database import db_api 12 | 13 | __author__ = 'MePsyDuck' 14 | 15 | 16 | def populate_heroes(): 17 | """Method to update heroes in the Heroes table with hero names and proper css classes names as 18 | taken from the DotA2 subreddit and hero flair images from the reddit directory. 19 | 20 | Uses rapidfuzz for fuzzy matching of hero names to name found in `.flair-name` property in css. 21 | """ 22 | hero_names = db_api.get_all_hero_names() 23 | 24 | response = requests.get(STYLESHEET_URL, headers={'User-Agent': USER_AGENT}) 25 | r = json.loads(response.text) 26 | stylesheet = r['data']['stylesheet'] 27 | 28 | r = re.compile(FLAIR_REGEX) 29 | for flair in r.finditer(stylesheet): 30 | flair_css = flair['css_class'] 31 | img_path = flair['img_path'] 32 | flair_hero = img_path[6:] 33 | 34 | match, confidence, index = process.extractOne(flair_hero, hero_names) 35 | if confidence >= 90: 36 | db_api.update_hero(hero_name=match, img_path=img_path, flair_css=flair_css) 37 | -------------------------------------------------------------------------------- /parsers/wiki_parser.py: -------------------------------------------------------------------------------- 1 | """Module used to populate responses into the Responses table in database. 2 | 3 | Responses and urls to responses as mp3s are parsed from Dota 2 Wiki: http://dota2.gamepedia.com/ 4 | """ 5 | 6 | import json 7 | import re 8 | from concurrent.futures import as_completed 9 | 10 | import requests 11 | from requests.adapters import HTTPAdapter 12 | from requests_futures.sessions import FuturesSession 13 | from urllib3 import Retry 14 | 15 | from config import API_PATH, RESPONSES_CATEGORY, RESPONSE_REGEX, CATEGORY_API_PARAMS, URL_DOMAIN, FILE_API_PARAMS, \ 16 | FILE_REGEX, CHAT_WHEEL_SECTION_REGEX 17 | from util.database.database import db_api 18 | from util.logger import logger 19 | from util.str_utils import preprocess_text 20 | 21 | __author__ = 'Jonarzz' 22 | __maintainer__ = 'MePsyDuck' 23 | 24 | 25 | def populate_responses(): 26 | """Method that adds all the responses to database. Assumes responses and hero database are already built. 27 | """ 28 | populate_hero_responses() 29 | populate_chat_wheel() 30 | 31 | 32 | def populate_hero_responses(): 33 | """Method that populates hero responses (as well as Arcana voice packs and Announcer packs) from Gamepedia. 34 | First fetches all Pages in Responses category, then source for each page. 35 | Populates Responses table and Hero table from processed response, original response, link and hero name. 36 | """ 37 | pages = pages_for_category(RESPONSES_CATEGORY) 38 | for page in pages: 39 | if is_hero_type(page): 40 | # page points to hero responses 41 | hero_name = get_hero_name(page) 42 | else: 43 | # page points to voice pack, announcer or shopkeeper responses 44 | hero_name = page 45 | 46 | responses_source = requests.get(url=URL_DOMAIN + '/' + page, params={'action': 'raw'}).text 47 | 48 | response_link_list = create_responses_text_and_link_list(responses_source=responses_source) 49 | # Note: Save all responses to the db. Apply single word and common words filter on comments and submission text 50 | # not while saving responses 51 | db_api.add_hero_and_responses(hero_name=hero_name, response_link_list=response_link_list) 52 | 53 | 54 | def pages_for_category(category_name): 55 | """Method that returns a list of pages for a given Wiki category. 56 | 57 | :param category_name: returns all category members in json response from mediawiki API. 58 | :return: list of all `pages` in the given category. 59 | """ 60 | params = get_params_for_category_api(category_name) 61 | json_response = requests.get(url=API_PATH, params=params).text 62 | 63 | pages = [] 64 | 65 | parsed_json = json.loads(json_response) 66 | for category_members in parsed_json['query']['categorymembers']: 67 | title = category_members['title'] 68 | pages.append(title) 69 | 70 | return pages 71 | 72 | 73 | def get_params_for_category_api(category): 74 | """Method to get `GET` parameters for querying MediaWiki for category details. 75 | 76 | :param category: category name to be passed in params. 77 | :return: GET parameters `params` 78 | """ 79 | params = CATEGORY_API_PARAMS.copy() 80 | params['cmtitle'] = 'Category:' + category 81 | return params 82 | 83 | 84 | def get_params_for_files_api(files): 85 | """Method to get `GET` parameters for querying MediaWiki for details for multiple files. 86 | Uses pipe character `|` to include multiple files. Currently MediaWiki limits number of files to 50. 87 | If files list is empty, leave `File` parameter empty. 88 | 89 | :param files: list of file names to be passed in params. 90 | :return: GET parameters `params`. 91 | """ 92 | params = FILE_API_PARAMS.copy() 93 | if files: 94 | titles = 'File:' + '|File:'.join(files) 95 | else: 96 | titles = '' 97 | params['titles'] = titles 98 | return params 99 | 100 | 101 | def is_hero_type(page): 102 | """Method to check if page belongs to a hero or creep-hero(Warlock's Golem). 103 | 104 | :param page: Page name as string. 105 | :return: True if page belongs to hero else False 106 | """ 107 | return '/Responses' in page 108 | 109 | 110 | def get_hero_name(hero_page): 111 | """Method that parses hero name from its responses page. 112 | Pages for heroes are in the form of `Hero name/Responses` and `Bundle/Responses/Hero. 113 | We need only the `Hero name` part for heroes. 114 | 115 | :param hero_page: hero's responses page as string. 116 | :return: Hero name as parsed 117 | """ 118 | return hero_page.replace('/Responses', '') 119 | 120 | 121 | def create_responses_text_and_link_list(responses_source): 122 | """Method that for a given source of a hero's response page creates a list of tuple: (original_text, processed_text, 123 | link). 124 | Steps involved: 125 | * Use regex to find all lines containing mp3 files and responses. 126 | * Process it to get original response text and file name. 127 | * Create a list of files and get all the links for them by calling `links_for_files`. 128 | * Process original text to get processed response. 129 | * Add original response text, processed response text and file link to a list as a tuple. 130 | 131 | :param responses_source: Mediawiki source 132 | :return: list with tuples of (original_text, processed_text, link). 133 | """ 134 | responses_list = [] 135 | file_and_text_list = [] 136 | 137 | response_regex = re.compile(RESPONSE_REGEX) 138 | file_regex = re.compile(FILE_REGEX) 139 | 140 | for response in response_regex.finditer(responses_source): 141 | original_text = parse_response(response['text']) 142 | if original_text is not None: 143 | files_source = response['files'] 144 | for file in file_regex.finditer(files_source): 145 | file_name = file['file'].replace('_', ' ').capitalize() 146 | file_and_text_list.append([original_text, file_name]) 147 | 148 | files_list = [file for text, file in file_and_text_list] 149 | file_and_link_dict = links_for_files(files_list) 150 | 151 | for original_text, file in file_and_text_list: 152 | processed_text = preprocess_text(original_text) 153 | if processed_text != '': 154 | try: 155 | link = file_and_link_dict[file] 156 | responses_list.append((original_text, processed_text, link)) 157 | except KeyError: 158 | # Ignore files with no links to mp3 files. Happens to broken files and files undergoing migration. 159 | pass 160 | 161 | return responses_list 162 | 163 | 164 | def parse_response(text): 165 | # Special cases 166 | if any(excluded_case in text for excluded_case in ['(broken file)', 'versus (TI ', 'Ceeeb']): 167 | return None 168 | 169 | text = re.sub(r'…', '...', text) # Replace ellipsis with three dots 170 | 171 | regexps_empty_sub = [r'', # Remove comments 172 | r'{{resp\|(r|u|\d+|d\|\d+|rem)}}', # Remove response rarity 173 | r'{{hero icon\|[a-z- \']+\|\d+px}}', # Remove hero icon 174 | r'{{item( icon)?\|[a-z0-9() \']+\|\d+px}}', # Remove item icon 175 | r'\[\[File:[a-z.,!\'() ]+\|\d+px(\|link=[a-z,!\'() ]+)?(\|class=[a-z]+)?]]', # Remove Files 176 | r'\[\[#[a-z0-9_\-\' ]+\|\'\'followup\'\']]', 177 | # Remove followup links in tags 178 | r'\'\'[a-z0-9 /]+\'\'', # Remove text in tags 179 | r'.*?', # Remove text in tags 180 | r'.*?', # Remove text in tags 181 | ] 182 | for regex in regexps_empty_sub: 183 | text = re.sub(regex, '', text, flags=re.IGNORECASE) 184 | 185 | regexps_sub_text = [r'\[\[([a-zé().:\',\- ]+)]]', # Replace links such as [[Shitty Wizard]] 186 | r'\[\[[a-zé0-9().:\'/#-_ ]+\|([a-zé0-9().:\'/#-_ ]+)]]', 187 | # Replace links such as [[Ancient (Building)|Ancients]], [[:File:Axe|Axe]] and [[Terrorblade#Sunder|sundering]] 188 | r'{{tooltip\|(.*?)\|.*?}}', # Replace tooltips 189 | r'{{note\|([a-z.!\'\-?, ]+)\|[a-z.!\'\-?,()/ ]+}}', # Replace notes 190 | r'{{H\|([a-z.!\'\-?,()/ ]+)}}', # Replace heroes 191 | ] 192 | for regex in regexps_sub_text: 193 | text = re.sub(regex, '\\1', text, flags=re.IGNORECASE) 194 | 195 | if any(escape in text for escape in ['[[', ']]', '{{', '}}', '|', 'sm2']): 196 | logger.warn('Response could not be processed : ' + text) 197 | return None 198 | 199 | return text.strip() 200 | 201 | 202 | def links_for_files(files_list): 203 | """Method that queries MediaWiki API used by Gamepedia to return links to the files list passed. 204 | Does batch processing to avoid max number of files limit and header size limit. 205 | Used asynchronous requests for faster processing. 206 | Removes files version as we only need the latest one. 207 | 208 | MediaWiki allows max 50 files(titles) at once : https://www.mediawiki.org/wiki/API:Query. 209 | 210 | :param files_list: list of files 211 | :return files_link_mapping: dict with file names and their links. dict['file'] = link 212 | """ 213 | 214 | # Method level constants 215 | max_title_list_length = 50 216 | file_title_prefix_length = len('%7CFile%3A') # url encoded file title prefix '|File:' 217 | max_header_length = 1960 # max header length as found by trial and error 218 | 219 | files_link_mapping = {} 220 | futures = [] 221 | empty_api_length = len(requests.Request('get', url=API_PATH, params=get_params_for_files_api([])).prepare().url) 222 | 223 | # To add retry in case of Status 429 : Too many requests 224 | with FuturesSession() as session: 225 | retries = 5 226 | status_forcelist = [429] 227 | retry = Retry( 228 | total=retries, 229 | read=retries, 230 | connect=retries, 231 | respect_retry_after_header=True, 232 | status_forcelist=status_forcelist, 233 | ) 234 | 235 | adapter = HTTPAdapter(max_retries=retry) 236 | session.mount('http://', adapter) 237 | session.mount('https://', adapter) 238 | 239 | files_batch_list = [] 240 | current_title_length = 0 241 | 242 | for file in files_list: 243 | file_name_len = file_title_prefix_length + len(file) 244 | # If header size overflows or the number of files reaches the limit specified by MediaWiki 245 | if file_name_len + current_title_length >= max_header_length - empty_api_length or \ 246 | len(files_batch_list) >= max_title_list_length: 247 | # Issue a request for current batch of files 248 | futures.append(session.get(url=API_PATH, params=get_params_for_files_api(files_batch_list))) 249 | 250 | # Reset files tracking variables 251 | files_batch_list = [] 252 | current_title_length = 0 253 | 254 | files_batch_list.append(file) 255 | current_title_length += file_name_len 256 | 257 | if files_batch_list: 258 | futures.append(session.get(url=API_PATH, params=get_params_for_files_api(files_batch_list))) 259 | 260 | for future in as_completed(futures): 261 | json_response = future.result().json() 262 | query = json_response['query'] 263 | pages = query['pages'] 264 | 265 | for _, page in pages.items(): 266 | title = page['title'] 267 | try: 268 | imageinfo = page['imageinfo'][0] 269 | file_url = imageinfo['url'][:imageinfo['url'].index('.mp3') + len('.mp3')] # Remove file version and trailing path 270 | files_link_mapping[title[5:]] = file_url 271 | except KeyError: 272 | logger.critical('File does not have a link : ' + title) 273 | 274 | return files_link_mapping 275 | 276 | 277 | def populate_chat_wheel(): 278 | """Method that populates chat wheel responses featured in The International yearly Battle Pass. 279 | Other chat wheel responses from events and Dota plus are not processed currently. 280 | """ 281 | chat_wheel_source = requests.get(url=URL_DOMAIN + '/' + 'Chat_Wheel', params={'action': 'raw'}).text 282 | 283 | chat_wheel_regex = re.compile(CHAT_WHEEL_SECTION_REGEX, re.DOTALL | re.IGNORECASE) 284 | 285 | for match in chat_wheel_regex.finditer(chat_wheel_source): 286 | event = match['event'] 287 | responses_source = match['source'] 288 | response_link_list = create_responses_text_and_link_list(responses_source=responses_source) 289 | 290 | db_api.add_hero_and_responses(hero_name=event, response_link_list=response_link_list) 291 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | urllib3 2 | beautifulsoup4 3 | praw 4 | psycopg2 5 | redis 6 | pony 7 | rapidfuzz 8 | requests 9 | cacheout 10 | requests-futures -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | """Module to run the bot. Executes the work() method of bot that executes the endless loop of reading comments and 2 | submissions and replying to them if the match any response. 3 | """ 4 | from bot.worker import work, logger 5 | from util.logger import setup_logger 6 | 7 | __author__ = 'MePsyDuck' 8 | 9 | if __name__ == '__main__': 10 | setup_logger() 11 | try: 12 | work() 13 | except (KeyboardInterrupt, SystemExit): 14 | logger.exception("Script stopped") 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Module to be run first time to set up the database 2 | * Drops all tables if the exist and creates them again. 3 | * Populates responses from Gamepedia 4 | * Populates heroes from Gamepedia and Dota 2 subreddit CSS. 5 | """ 6 | from parsers import css_parser, wiki_parser 7 | from util.database.database import db_api 8 | 9 | __author__ = 'MePsyDuck' 10 | 11 | from util.logger import setup_logger 12 | 13 | 14 | def first_run(): 15 | db_api.drop_all_tables() 16 | db_api.create_all_tables() 17 | wiki_parser.populate_responses() 18 | css_parser.populate_heroes() 19 | 20 | 21 | if __name__ == '__main__': 22 | setup_logger() 23 | first_run() 24 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | """Module to test the bot and parser. All test should be placed in `tests` folder and python filename should start with 2 | `test_`. 3 | """ 4 | import sys 5 | import unittest 6 | 7 | __author__ = 'MePsyDuck' 8 | 9 | suite = unittest.TestLoader().discover('tests', pattern='test_*.py') 10 | result = not unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful() 11 | sys.exit(result) 12 | -------------------------------------------------------------------------------- /tests/test_bot.py: -------------------------------------------------------------------------------- 1 | """Module used to test bot worker module methods. 2 | """ 3 | 4 | import unittest 5 | 6 | import config 7 | from bot import account 8 | from bot import worker 9 | 10 | __author__ = 'Jonarzz' 11 | __maintainer__ = 'MePsyDuck' 12 | 13 | 14 | class BotWorkerTest(unittest.TestCase): 15 | """Class used to test bot worker module. 16 | Inherits from TestCase class of unittest module. 17 | """ 18 | 19 | def test_parse_comment(self): 20 | """Method that tests the process_text method from worker module. 21 | """ 22 | self.assertEqual(worker.process_text( 23 | "That's a great idea!!!"), "that s a great idea") 24 | self.assertEqual(worker.process_text(" WoNdErFuL "), "wonderful") 25 | self.assertEqual(worker.process_text("How are you?"), "how are you") 26 | self.assertEqual(worker.process_text( 27 | "Isn't is good to have quotes? you can add any response in quote and bot would still \n\n> reply to them"), 28 | "reply to them") 29 | self.assertEqual(worker.process_text( 30 | "> multiple quotes \n\n > but reply to \n\n > only first one"), "multiple quotes") 31 | 32 | def test_account(self): 33 | """Method used to test the Reddit instance returned by get_account() 34 | """ 35 | reddit = account.get_account() 36 | self.assertEqual(reddit.user.me(), config.USERNAME) 37 | -------------------------------------------------------------------------------- /tests/test_wiki_parser.py: -------------------------------------------------------------------------------- 1 | """Module used to test dota_wiki_parser module methods. 2 | """ 3 | 4 | import unittest 5 | 6 | from config import RESPONSES_CATEGORY 7 | from parsers import wiki_parser 8 | 9 | __author__ = 'Jonarzz' 10 | __maintainer__ = 'MePsyDuck' 11 | 12 | 13 | class WikiParserTest(unittest.TestCase): 14 | """Class used to test wiki_parser module. 15 | Inherits from TestCase class of unittest module. 16 | """ 17 | 18 | def test_pages_to_parse(self): 19 | """Method testing pages_to_ppages_for_categoryarse method from wiki_parser module. 20 | The method checks if the requested response is consists of pages as expected. 21 | """ 22 | pages = wiki_parser.pages_for_category(RESPONSES_CATEGORY) 23 | 24 | self.assertTrue(len(pages) > 150) 25 | self.assertTrue('Abaddon/Responses' in pages) 26 | self.assertTrue('Zeus/Responses' in pages) 27 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jonarzz/DotaResponsesRedditBot/5db25560accdfcd85a8a9e3e1d7d0d429baee49e/util/__init__.py -------------------------------------------------------------------------------- /util/caching/__init__.py: -------------------------------------------------------------------------------- 1 | from config import CACHE_PROVIDER 2 | from util.caching.db_cache import DBCache 3 | from util.caching.memory_cache import MemoryCache 4 | from util.caching.redis_cache import RedisCache 5 | 6 | 7 | def get_cache_api(): 8 | if CACHE_PROVIDER == 'redis': 9 | return RedisCache() 10 | elif CACHE_PROVIDER == 'memory': 11 | return MemoryCache() 12 | elif CACHE_PROVIDER == 'db': 13 | return DBCache() 14 | -------------------------------------------------------------------------------- /util/caching/caching.py: -------------------------------------------------------------------------------- 1 | """Module used to store replyable ids (comment or submission) in cache. 2 | Currently support only three implementations : DB based, in memory and redis. 3 | Support for more implementations can be added by extending CacheAPI class. 4 | """ 5 | 6 | from abc import ABC, abstractmethod 7 | 8 | __author__ = 'MePsyDuck' 9 | 10 | 11 | class CacheAPI(ABC): 12 | @abstractmethod 13 | def _exists(self, key): 14 | pass 15 | 16 | @abstractmethod 17 | def _set(self, key): 18 | pass 19 | 20 | def exists(self, thing_id): 21 | """Check if Reddit thing (currently comment/submission) is already processed/replied. 22 | If it is not in the cache, it adds the thing_id to cache. 23 | 24 | :param thing_id: They id of comment/submission to be cached. 25 | :returns: `True` if replyable exists, else `False`. 26 | """ 27 | if self._exists(thing_id): 28 | return True 29 | else: 30 | self._set(thing_id) 31 | return False 32 | -------------------------------------------------------------------------------- /util/caching/db_cache.py: -------------------------------------------------------------------------------- 1 | """Module to store thing_ids in DB. 2 | Not recommended as this needs manual clearing of old ids. 3 | """ 4 | 5 | from util.caching.caching import CacheAPI 6 | from util.database.database import db_api 7 | 8 | __author__ = 'MePsyDuck' 9 | 10 | 11 | class DBCache(CacheAPI): 12 | def _exists(self, key): 13 | """Method to check if key exists in DB cache. 14 | 15 | :param key: The `key` to to be checked in DB cache. 16 | :return: `True` if `key` exist in DB cache. 17 | """ 18 | return db_api.check_if_thing_exists(key) 19 | 20 | def _set(self, key): 21 | """Method to set `key` with `value` in DB cache. 22 | 23 | :param key: The `key` (thing_id) to be added to DB cache. 24 | """ 25 | db_api.add_thing_to_cache(key) 26 | -------------------------------------------------------------------------------- /util/caching/memory_cache.py: -------------------------------------------------------------------------------- 1 | """Module used to save cache in the memory. 2 | Uses FIFO eviction policy with maximum size of 10,000 and no ttl. 3 | JSON File used to dump data on shutdown and load it back up on startup. 4 | """ 5 | 6 | import atexit 7 | import json 8 | import os 9 | import signal 10 | from collections import OrderedDict 11 | 12 | from cacheout import FIFOCache 13 | 14 | from config import CACHE_URL 15 | from util.caching.caching import CacheAPI 16 | 17 | __author__ = 'MePsyDuck' 18 | 19 | 20 | class MemoryCache(CacheAPI): 21 | def __init__(self): 22 | """Method that loads dumped cache from previous shutdown stored in json file. 23 | """ 24 | self.cache = FIFOCache(maxsize=10_000, ttl=0, default='') 25 | if os.path.exists(CACHE_URL): 26 | with open(CACHE_URL) as cache_json: 27 | old_cache = json.load(cache_json, object_pairs_hook=OrderedDict) 28 | self.cache.set_many(old_cache) 29 | atexit.register(self._cleanup) 30 | signal.signal(signal.SIGTERM, self._cleanup) 31 | signal.signal(signal.SIGINT, self._cleanup) 32 | 33 | def _cleanup(self): 34 | """Method to dump cache data to json file on script interrupt/shutdown. 35 | """ 36 | with open(CACHE_URL, 'w+') as cache_json: 37 | json.dump(self.cache.copy(), cache_json) 38 | 39 | def _exists(self, key): 40 | """Method to check if key exists in cache. 41 | 42 | :param key: The `key` to to be checked in cache. 43 | :return: `True` if `key` exist in cache. 44 | """ 45 | return key in self.cache 46 | 47 | def _set(self, key): 48 | """Method to add thing_id to the cache. 49 | 50 | :param key: The `key` to be added to the cache. 51 | """ 52 | self.cache.set(key, '') 53 | -------------------------------------------------------------------------------- /util/caching/redis_cache.py: -------------------------------------------------------------------------------- 1 | """Module that allows Redis to be used as cache. Useful when running on Heroku or such platforms without persistent 2 | file storage. 3 | """ 4 | 5 | from redis import Redis 6 | 7 | from config import CACHE_URL, CACHE_TTL 8 | from util.caching.caching import CacheAPI 9 | from util.logger import logger 10 | 11 | __author__ = 'MePsyDuck' 12 | 13 | 14 | class RedisCache(CacheAPI): 15 | def __init__(self): 16 | """Create a new Redis instance when a new object for this class is created. 17 | """ 18 | self.redis = Redis.from_url(CACHE_URL) 19 | logger.info('Connected to Redis at ' + CACHE_URL) 20 | 21 | def _exists(self, key): 22 | """Method to check if `key` exists in redis cache. 23 | 24 | :param key: The `key` to to be checked in redis cache. 25 | :return: `True` if `key` exists in redis cache. 26 | """ 27 | if self.redis.exists(key): 28 | return True 29 | 30 | def _set(self, key): 31 | """Method to set `key` with `value` in redis. 32 | Key expires after CACHE_TTL days (`ex` in seconds). 33 | 34 | :param key: The `key` (thing_id) to be added to redis cache. 35 | """ 36 | self.redis.set(name=key, value='', ex=CACHE_TTL * 60 * 60) 37 | -------------------------------------------------------------------------------- /util/database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jonarzz/DotaResponsesRedditBot/5db25560accdfcd85a8a9e3e1d7d0d429baee49e/util/database/__init__.py -------------------------------------------------------------------------------- /util/database/database.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import random 3 | import urllib.parse as up 4 | 5 | from pony.orm import db_session, commit 6 | 7 | from config import CACHE_TTL, DB_URL, DB_PROVIDER 8 | from util.database.models import Responses, Heroes, RedditCache, db 9 | from util.logger import logger 10 | 11 | __author__ = 'MePsyDuck' 12 | 13 | 14 | class DatabaseAPI: 15 | def __init__(self): 16 | """Method to initialize db connection. Binds PonyORM Database object `db` to configured database. 17 | Creates the mapping between db tables and models. 18 | """ 19 | self.db = db 20 | if DB_PROVIDER == 'sqlite': 21 | self.db.bind(provider='sqlite', filename=DB_URL, create_db=True) 22 | elif DB_PROVIDER == 'mysql': 23 | up.uses_netloc.append("mysql") 24 | url = up.urlparse(DB_URL) 25 | self.db.bind(provider='mysql', host=url.hostname, user=url.username, passwd=url.password, db=url.path[1:]) 26 | elif DB_PROVIDER == 'postgres': 27 | up.uses_netloc.append("postgres") 28 | url = up.urlparse(DB_URL) 29 | self.db.bind(provider='postgres', user=url.username, password=url.password, host=url.hostname, 30 | database=url.path[1:]) 31 | else: 32 | self.db.bind(provider='sqlite', filename='bot.db', create_db=True) 33 | 34 | self.db.generate_mapping(create_tables=True) 35 | 36 | # Responses table queries 37 | @db_session 38 | def get_link_for_response(self, processed_text, hero_id=None): 39 | """Method that returns the link for the processed response text and given optional hero_id. If multiple matching 40 | entries are found, returns a random result. 41 | 42 | :param processed_text: The plain processed response text. 43 | :param hero_id: The hero's id. 44 | :return The link to the response, hero_id, or else None, None if no matching response is found. 45 | """ 46 | if hero_id: 47 | responses = Responses.select(lambda r: r.processed_text == processed_text and r.hero_id.id == hero_id) 48 | else: 49 | responses = Responses.select(lambda r: r.processed_text == processed_text) 50 | 51 | if len(responses): 52 | response = random.choice(list(responses)) 53 | return response.response_link, response.hero_id.id 54 | else: 55 | return None, None 56 | 57 | # RedditCache table queries 58 | @db_session 59 | def add_thing_to_cache(self, thing_id): 60 | """Method that adds current time and Reddit replyable or submission to RedditCache table by their id(fullname). 61 | 62 | :param thing_id: The fullname of replyable/submission on Reddit 63 | """ 64 | RedditCache(thing_id=thing_id) 65 | 66 | @db_session 67 | def delete_old_thing_ids(self): 68 | """Method used to remove things in cache older than a period of time `CACHE_TTL` defined in the config file. 69 | """ 70 | furthest_date = datetime.datetime.utcnow() - datetime.timedelta(days=CACHE_TTL) 71 | 72 | RedditCache.select(lambda t: t.added_datetime < furthest_date).delete(bulk=True) 73 | 74 | @db_session 75 | def check_if_thing_exists(self, thing_id): 76 | """Method that checks if the replyable id given is already present in the RedditCache table 77 | 78 | :param thing_id: The id of the replyable/submission on Reddit 79 | :return: True if the `thing_id` is already present in table, else False 80 | """ 81 | thing = RedditCache.select(lambda t: t.thing_id == thing_id) 82 | return thing is not None 83 | 84 | # Heroes table queries 85 | @db_session 86 | def add_hero_to_table(self, hero_name, img_path=None, flair_css=None): 87 | """Method to add hero to the table. All parameters are strings. 88 | 89 | :param hero_name: Hero's name 90 | :param img_path: path to hero's image 91 | :param flair_css: CSS for the flair 92 | """ 93 | Heroes(hero_name=hero_name, img_path=img_path, flair_css=flair_css) 94 | 95 | @db_session 96 | def get_hero_id_by_name(self, hero_name): 97 | """Method to get hero's id from table. 98 | 99 | :param hero_name: Hero's name 100 | :return: Hero's id 101 | """ 102 | h = Heroes.get(lambda hero: hero.hero_name.lower() == hero_name) 103 | return h.id if h is not None else None 104 | 105 | @db_session 106 | def get_hero_name(self, hero_id): 107 | """Method to get hero's name from table. 108 | 109 | :param hero_id: Hero's id 110 | :return: Hero's name 111 | """ 112 | h = Heroes[hero_id] 113 | return h.hero_name if h is not None else None 114 | 115 | @db_session 116 | def get_hero_id_by_flair_css(self, flair_css): 117 | """Method to get hero_id from the table based on the flair css. 118 | 119 | :param flair_css: Hero's css class as in r/DotA2 subreddit 120 | :return: Hero's id 121 | """ 122 | if flair_css: 123 | h = Heroes.get(flair_css=flair_css) 124 | return h.id if h is not None else None 125 | 126 | @db_session 127 | def get_img_dir_by_id(self, hero_id): 128 | """Method to get image directory for hero's flair. 129 | 130 | :param hero_id: Hero's id. 131 | :return: The directory path to the image. 132 | """ 133 | h = Heroes[hero_id] 134 | return h.img_path if h is not None else None 135 | 136 | @db_session 137 | def get_all_hero_names(self): 138 | """Method to get all heroes' names. 139 | 140 | :return: All heroes' names as a list. 141 | """ 142 | heroes = Heroes.select()[:] 143 | return [hero.hero_name for hero in heroes] 144 | 145 | @db_session 146 | def update_hero(self, hero_name, img_path, flair_css): 147 | """Method to update hero's attributes in the Heroes table. 148 | 149 | :param hero_name: Hero's name 150 | :param img_path: Hero's img dir/path 151 | :param flair_css: Hero's css class 152 | """ 153 | hero = Heroes.get(hero_name=hero_name) 154 | hero.img_path = img_path 155 | hero.flair_css = flair_css 156 | 157 | def create_all_tables(self): 158 | """Method to create all tables defined in the models 159 | """ 160 | self.db.create_tables() 161 | 162 | def drop_all_tables(self): 163 | """Method to drop all tables defined in the models 164 | """ 165 | self.db.drop_all_tables(with_all_data=True) 166 | 167 | @db_session 168 | def add_hero_and_responses(self, hero_name, response_link_list): 169 | """Method to add hero and it's responses to the db. 170 | 171 | :param hero_name: Hero name who's responses will be inserted 172 | :param response_link_list: List with tuples in the form of (original_text, text, link) 173 | """ 174 | h = Heroes(hero_name=hero_name, img_path=None, flair_css=None) 175 | commit() 176 | 177 | for original_text, processed_text, link in response_link_list: 178 | existing_response = Responses.get(response_link=link) 179 | if not existing_response: 180 | Responses(processed_text=processed_text, original_text=original_text, response_link=link, hero_id=h.id) 181 | else: 182 | logger.debug('Link already exists : ' + link + ' for response ' + existing_response.original_text) 183 | 184 | 185 | db_api = DatabaseAPI() 186 | -------------------------------------------------------------------------------- /util/database/models.py: -------------------------------------------------------------------------------- 1 | """Module that defines all the models used by PonyORM for db queries. 2 | """ 3 | 4 | from datetime import datetime 5 | 6 | from pony.orm import Database, PrimaryKey, Required, Optional, Set 7 | 8 | __author__ = 'MePsyDuck' 9 | 10 | db = Database() 11 | 12 | 13 | class Responses(db.Entity): 14 | id = PrimaryKey(int, auto=True) # Default db id column for pk 15 | processed_text = Required(str, 1000, index='idx_parsed_text') # Stores the processed response text 16 | original_text = Required(str, 1000) # Stores the original response text/ Unused currently, but may help in future. 17 | response_link = Required(str, unique=True) # Link to the response text 18 | hero_id = Required('Heroes') # The hero_id for hero whose response text this is 19 | 20 | 21 | class RedditCache(db.Entity): 22 | id = PrimaryKey(int, auto=True) # Default db id column for pk 23 | thing_id = Required(str, unique=True) # Comment or submission id that is already processed 24 | added_datetime = Optional(datetime, default=datetime.utcnow()) # Datetime of processing the replyable 25 | 26 | 27 | class Heroes(db.Entity): 28 | id = PrimaryKey(int, auto=True) # Default db id column for pk 29 | hero_name = Required(str, unique=True) # Hero's / Announcer pack's name 30 | img_path = Optional(str, nullable=True) # Path to hero's flair image in reddit css 31 | flair_css = Optional(str, nullable=True) # Class for hero in reddit css 32 | responses = Set(Responses) # Relationship between Responses and Heroes table 33 | -------------------------------------------------------------------------------- /util/logger.py: -------------------------------------------------------------------------------- 1 | """Module to setup logging for bot and praw and provide logger for other modules. 2 | """ 3 | 4 | import logging 5 | import os 6 | 7 | from config import BOT_LOGGER, PRAW_LOGGER, LOG_DIR, LOG_FORMAT, LOG_LEVEL, INFO_FILENAME, ERROR_FILENAME, \ 8 | PRAW_FILENAME 9 | 10 | __author__ = 'MePsyDuck' 11 | 12 | logger = logging.getLogger(BOT_LOGGER) 13 | 14 | 15 | def setup_logger(): 16 | """Method to setup loggers. Current logs only bot application logs and PRAW logs. 17 | 18 | Disable file logging is running on Heroku since Heroku does not offer persistent disk storage. All logs should be 19 | read from Stream output instead. 20 | """ 21 | if not os.path.exists(LOG_DIR): 22 | os.mkdir(LOG_DIR) 23 | 24 | log_formatter = logging.Formatter(LOG_FORMAT) 25 | log_level = logging.getLevelName(LOG_LEVEL) 26 | 27 | # Handlers 28 | info_log_file = os.path.join(LOG_DIR, INFO_FILENAME) 29 | info_file_handler = logging.FileHandler(info_log_file, mode='a') 30 | info_file_handler.setFormatter(log_formatter) 31 | info_file_handler.setLevel(logging.INFO) 32 | 33 | error_log_file = os.path.join(LOG_DIR, ERROR_FILENAME) 34 | error_file_handler = logging.FileHandler(error_log_file, mode='a') 35 | error_file_handler.setFormatter(log_formatter) 36 | error_file_handler.setLevel(logging.ERROR) 37 | 38 | praw_log_file = os.path.join(LOG_DIR, PRAW_FILENAME) 39 | praw_handler = logging.FileHandler(praw_log_file, mode='a') 40 | praw_handler.setLevel(logging.WARNING) 41 | 42 | stream_handler = logging.StreamHandler() 43 | stream_handler.setFormatter(log_formatter) 44 | stream_handler.setLevel(logging.DEBUG) 45 | 46 | # PRAW logging 47 | praw_logger = logging.getLogger(PRAW_LOGGER) 48 | praw_logger.setLevel(log_level) 49 | praw_logger.addHandler(stream_handler) 50 | praw_logger.addHandler(praw_handler) 51 | 52 | # Internal logging 53 | bot_logger = logging.getLogger(BOT_LOGGER) 54 | bot_logger.setLevel(log_level) 55 | bot_logger.addHandler(info_file_handler) # This should be commented out if running on Heroku 56 | bot_logger.addHandler(error_file_handler) # This should be commented out if running on Heroku 57 | bot_logger.addHandler(stream_handler) 58 | -------------------------------------------------------------------------------- /util/response_info.py: -------------------------------------------------------------------------------- 1 | class ResponseInfo: 2 | """Custom Class to store response info for passing in between functions 3 | """ 4 | def __init__(self, hero_id, link): 5 | self.hero_id = hero_id 6 | self.link = link 7 | -------------------------------------------------------------------------------- /util/response_request.py: -------------------------------------------------------------------------------- 1 | from util.str_utils import preprocess_text 2 | import requests 3 | 4 | 5 | def request_cargo_set(url): 6 | web_request = requests.get(url) 7 | web_json = web_request.json() 8 | cargo_set = set() 9 | for objects in web_json['cargoquery']: 10 | cargo_set.add(preprocess_text(objects['title']['title'])) 11 | return cargo_set 12 | -------------------------------------------------------------------------------- /util/str_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import string 3 | 4 | PUNCTUATION_TRANS = str.maketrans(string.punctuation, ' ' * len(string.punctuation)) 5 | WHITESPACE_TRANS = str.maketrans(string.whitespace, ' ' * len(string.whitespace)) 6 | 7 | 8 | def preprocess_text(text): 9 | """Method for pre-processing the given response text. 10 | It: 11 | * replaces all punctuations with spaces 12 | * replaces all whitespace characters (tab, newline etc) with spaces 13 | * removes trailing and leading spaces 14 | * removes double spaces 15 | * changes to lowercase 16 | 17 | :param text: the text to be cleaned 18 | :return: cleaned text 19 | """ 20 | 21 | text = text.translate(PUNCTUATION_TRANS) 22 | text = text.translate(WHITESPACE_TRANS) 23 | text = text.strip().lower() 24 | text = re.sub(' +', ' ', text) 25 | return text 26 | --------------------------------------------------------------------------------