├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── custom-response-request.md
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── bot
    ├── __init__.py
    ├── account.py
    └── worker.py
├── config.py
├── parsers
    ├── __init__.py
    ├── css_parser.py
    └── wiki_parser.py
├── requirements.txt
├── run.py
├── setup.py
├── test.py
├── tests
    ├── test_bot.py
    └── test_wiki_parser.py
└── util
    ├── __init__.py
    ├── caching
        ├── __init__.py
        ├── caching.py
        ├── db_cache.py
        ├── memory_cache.py
        └── redis_cache.py
    ├── database
        ├── __init__.py
        ├── database.py
        └── models.py
    ├── logger.py
    ├── response_info.py
    ├── response_request.py
    └── str_utils.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: MePsyDuck
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **Link to the comment**
14 | If the bug happened in a comment, provide a link to it.
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Additional context**
20 | Add any other context about the problem here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom-response-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Custom response request
 3 | about: Suggest an custom response for this bot
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: MePsyDuck
 7 | 
 8 | ---
 9 | 
10 | **Describe the custom response you'd like**
11 | A clear and concise description of what you want to happen.
12 | 
13 | **Link to the response**
14 | Link to the response/wiki page.
15 | 
16 | **Custom format**
17 | Format for the response, if any.
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### JetBrains template
  3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
  4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  5 | 
  6 | # User-specific stuff
  7 | .idea/**/tasks.xml
  8 | .idea/**/usage.statistics.xml
  9 | .idea/**/dictionaries
 10 | .idea/**/shelf
 11 | 
 12 | # Sensitive or high-churn files
 13 | .idea/**/dataSources/
 14 | .idea/**/dataSources.ids
 15 | .idea/**/dataSources.local.xml
 16 | .idea/**/sqlDataSources.xml
 17 | .idea/**/dynamic.xml
 18 | .idea/**/uiDesigner.xml
 19 | .idea/**/dbnavigator.xml
 20 | 
 21 | # Gradle
 22 | .idea/**/gradle.xml
 23 | .idea/**/libraries
 24 | 
 25 | # Gradle and Maven with auto-import
 26 | # When using Gradle or Maven with auto-import, you should exclude module files,
 27 | # since they will be recreated, and may cause churn.  Uncomment if using
 28 | # auto-import.
 29 | # .idea/modules.xml
 30 | # .idea/*.iml
 31 | # .idea/modules
 32 | 
 33 | # CMake
 34 | cmake-build-*/
 35 | 
 36 | # Mongo Explorer plugin
 37 | .idea/**/mongoSettings.xml
 38 | 
 39 | # File-based project format
 40 | *.iws
 41 | 
 42 | # IntelliJ
 43 | out/
 44 | 
 45 | # mpeltonen/sbt-idea plugin
 46 | .idea_modules/
 47 | 
 48 | # JIRA plugin
 49 | atlassian-ide-plugin.xml
 50 | 
 51 | # Cursive Clojure plugin
 52 | .idea/replstate.xml
 53 | 
 54 | # Crashlytics plugin (for Android Studio and IntelliJ)
 55 | com_crashlytics_export_strings.xml
 56 | crashlytics.properties
 57 | crashlytics-build.properties
 58 | fabric.properties
 59 | 
 60 | # Editor-based Rest Client
 61 | .idea/httpRequests
 62 | ### Example user template template
 63 | ### Example user template
 64 | 
 65 | # IntelliJ project files
 66 | .idea
 67 | out
 68 | gen### Python template
 69 | # Byte-compiled / optimized / DLL files
 70 | __pycache__/
 71 | *$py.class
 72 | 
 73 | # C extensions
 74 | *.so
 75 | 
 76 | # Distribution / packaging
 77 | .Python
 78 | build/
 79 | develop-eggs/
 80 | dist/
 81 | downloads/
 82 | eggs/
 83 | .eggs/
 84 | lib/
 85 | lib64/
 86 | parts/
 87 | sdist/
 88 | var/
 89 | wheels/
 90 | *.egg-info/
 91 | .installed.cfg
 92 | MANIFEST
 93 | 
 94 | # PyInstaller
 95 | #  Usually these files are written by a python script from a template
 96 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 97 | *.manifest
 98 | *.spec
 99 | 
100 | # Installer logs
101 | pip-log.txt
102 | pip-delete-this-directory.txt
103 | 
104 | # Unit test / coverage reports
105 | htmlcov/
106 | .tox/
107 | .coverage
108 | .coverage.*
109 | .cache
110 | nosetests.xml
111 | coverage.xml
112 | *.cover
113 | .hypothesis/
114 | .pytest_cache/
115 | 
116 | # Translations
117 | *.mo
118 | *.pot
119 | 
120 | # Django stuff:
121 | *.log
122 | local_settings.py
123 | db.sqlite3
124 | 
125 | # Flask stuff:
126 | instance/
127 | .webassets-cache
128 | 
129 | # Scrapy stuff:
130 | .scrapy
131 | 
132 | # Sphinx documentation
133 | docs/_build/
134 | 
135 | # PyBuilder
136 | target/
137 | 
138 | # Jupyter Notebook
139 | .ipynb_checkpoints
140 | 
141 | # pyenv
142 | .python-version
143 | 
144 | # celery beat schedule file
145 | celerybeat-schedule
146 | 
147 | # SageMath parsed files
148 | *.sage.py
149 | 
150 | # Environments
151 | .env
152 | .venv
153 | env/
154 | venv/
155 | ENV/
156 | env.bak/
157 | venv.bak/
158 | 
159 | # Spyder project settings
160 | .spyderproject
161 | .spyproject
162 | 
163 | # Rope project settings
164 | .ropeproject
165 | 
166 | # mkdocs documentation
167 | /site
168 | 
169 | # mypy
170 | .mypy_cache/
171 | 
172 | # bot sqlite local db
173 | bot.db
174 | 
175 | # bot sqlite local db
176 | cache.json
177 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "3.6.4"
 5 | 
 6 | before_install:
 7 |   - sudo apt-get update
 8 | 
 9 | install:
10 |   - pip install codecov
11 |   - pip install -r requirements.txt
12 | 
13 | virtualenv:
14 |   system_site_package: true
15 | 
16 | script:
17 |   - coverage run test.py
18 | 
19 | after_success:
20 |   - codecov
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Jonarzz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Dota Responses Reddit Bot
  2 | [![Python 3.6.4](https://img.shields.io/badge/python-3.6.4-blue.svg)](https://www.python.org/downloads/release/python-364/)
  3 | [![Build Status](https://api.travis-ci.org/Jonarzz/DotaResponsesRedditBot.svg?branch=master)](https://travis-ci.org/Jonarzz/DotaResponsesRedditBot)
  4 | [![Maintainability](https://api.codeclimate.com/v1/badges/de2c724018076b34064f/maintainability)](https://codeclimate.com/github/Jonarzz/DotaResponsesRedditBot/maintainability)
  5 | [![codecov](https://codecov.io/gh/Jonarzz/DotaResponsesRedditBot/branch/master/graph/badge.svg)](https://codecov.io/gh/Jonarzz/DotaResponsesRedditBot)
  6 | 
  7 | > Bot adding reply comments with links to appropriate responses found on [/r/dota2](https://www.reddit.com/r/DotA2).
  8 | 
  9 | ![Example](https://i.imgur.com/PAcg57z.png)
 10 | 
 11 | The bot replies only for the comments that **are** responses. 
 12 | 
 13 | For example: `"Selemene commands"` will return a Luna response *(like on the screenshot above)*.
 14 | 
 15 | **The original bot account is no longer active, but over 4 years of running [it gained over 100,000 karma points on Reddit](https://www.reddit.com/user/dota_responses_bot/comments/?t=all&sort=top)**.
 16 | **Currently the bot's account is [dota2_responses_bot](https://www.reddit.com/user/dota2_responses_bot)**.
 17 | 
 18 | ### Active maintainer
 19 | [MePsyDuck](https://github.com/MePsyDuck)
 20 | 
 21 | ### Comment/Submission text processing
 22 | *   All the body text is transformed into lowercase
 23 | *   Any punctuation is replaced with spaces
 24 | *   Multiple spaces are trimmed to single space.
 25 | *   If comment has blockquote, first blockquote is considered for matching.
 26 | 
 27 | <!-- Old behavior
 28 | All the responses are in lowercase in the dictionary, before comparison the comments are parsed to lowercase as well. Dot or exclamation mark ending the replyable is ignored.
 29 | -->
 30 | <!-- Old behavior
 31 | The bot will try to match a response of the hero that is in the comment/submission's author flair. If it does not find an appropriate one, it takes the one of the first hero that has such a response (alphabetically).
 32 | -->
 33 | 
 34 | ---
 35 | ### TODO
 36 | *   (If possible) Add hero flair in responses (Waiting on reddit to support this in reddit redesign).
 37 | 
 38 | ---
 39 | ### Some stats and general info
 40 | [/r/dota2](https://www.reddit.com/r/DotA2) subreddit generates around 3.5k comments/day, 
 41 | peaking around 12.5k during December (stats via [subbreditstats](https://subredditstats.com/r/dota2). 
 42 | Bot should be able to handle more than 15k comments/day(10 comments/minute) easily (Just an estimate, actual performance not yet tested).
 43 | 
 44 | ---
 45 | ### Environment variables 
 46 | Config variables needed to be set in environment for running the bot:
 47 | 
 48 | |     Variable      | Required? |   Default    | Description                                                                                            |
 49 | |-------------------|-----------|--------------|--------------------------------------------------------------------------------------------------------|
 50 | | CLIENT_ID         | Required  | None.        | `client_id` generated by Reddit.                                                                       |
 51 | | CLIENT_SECRET     | Required  | None.        | `secret` generated by Reddit.                                                                          |
 52 | | SUBREDDIT         | Optional  | `dota2`      | Subreddit the bot is going to work on.                                                                 |
 53 | | REDDIT_USERNAME   | Required  | None.        | Username for the Reddit account being used.                                                            |
 54 | | REDDIT_PASSWORD   | Required  | None.        | Password for the Reddit account being used.                                                            |
 55 | | CACHE_PROVIDER    | Optional  | `memory`     | Caching module to be used. Valid choices : `redis`, `memory`, `db`.                                    |
 56 | | CACHE_URL         | Optional  | `cache.json` | URL path to redis instance/database/file in memory. Based on `CACHE_PROVIDER`.                         |
 57 | | DATABASE_PROVIDER | Optional  | `sqlite`     | DBMS to be used. Valid choices : `sqlite`, `mysql`, `postgres`                                         |
 58 | | DATABASE_URL      | Optional  | `bot.db`     | URL to the database.                                                                                   |
 59 | | LOGGING_LEVEL     | Optional  | `INFO`       | Logging level. Valid choices : [Logging levels](https://docs.python.org/3/library/logging.html#levels) |
 60 | 
 61 | ---
 62 | ### Changelog
 63 | 
 64 | #### 3.1
 65 | *   User(OP) can now request to update the response using another comment under bot's comment.
 66 |     The comment should be in the format ```Try <hero_name>``` 
 67 | *   Users can now request for a hero specific response by adding ```<hero_name> ::``` prefix to the response.
 68 |     Has more priority than user's flair.
 69 | 
 70 | #### 3.0
 71 | Major revamp for the bot.
 72 | Things that are new:
 73 | *   Bot can reply to responses that are in blockquotes and ignore rest of comment.
 74 | *   Added support for TI chat wheel sounds.
 75 | *   Comment on post submission if title is a response.
 76 | 
 77 | Things updated:
 78 | *   Support sqlite, MySQL and PostgreSQL dbs via Pony-ORM.
 79 | *   Added caching for comment ids (redis, db and in memory/file based).
 80 | *   Revamped parsing of responses from wiki (now directly from the sources).
 81 | *   Revamped parsing flair css and image directories from subreddit css.
 82 | *   Better parsing for comments.
 83 | *   Added better support for custom responses.
 84 | *   Updated excluded responses.
 85 | *   Updated docs.
 86 | *   Updated tests. 
 87 | 
 88 | #### 2.7
 89 | *   Now hero portraits (flairs) are added before the response
 90 | *   The bot tries to match the hero response with the hero in the comment's author flair first
 91 | 
 92 | #### 2.6
 93 | *   Added a few Io and Phoenix responses
 94 | 
 95 | #### 2.5
 96 | *   Fixed random responses for the "Shitty wizard" line - now it's working properly
 97 | *   Added special treatment for the comments related to "One of my favorites" response
 98 | *   Moved from a dictionary for responses and a list for comment ids to databases
 99 | 
100 | #### 2.4
101 | *   Added random responses for the "Shitty wizard" line (needs testing)
102 | 
103 | #### 2.3
104 | *   Added tests
105 | *   Code refactoring
106 | *   Added Travis CI and CodeClimate checking
107 | 
108 | #### 2.2
109 | *   Code refactoring (renaming variables, deleting unnecessary methods, etc.)
110 | *   Response in the reply is now an exact quote of the original comment
111 | *   Added comments
112 | 
113 | #### 2.1
114 | *   Bot is now adding the source of the response (e.g. hero name) to the comment *(needs testing)*
115 | 
116 | #### 2.0
117 | *   Added Arc Warden responses
118 | 
119 | #### 1.9
120 | *   File paths are now relative to the script file location (using os)
121 | *   Added dates to logging
122 | *   Logs are saved in respective files on the server
123 | *   Fixed a bug with adding same comment a few times in sticky threads that are on the subreddit for a long time (time-saving workaround)
124 | 
125 | #### 1.8
126 | *   Change in the main loop of the script - much better efficiency (time)
127 | 
128 | #### 1.7
129 | *   Changed reply comment formatting
130 | 
131 | #### 1.6
132 | *   Removed keyword triggering as /r/dota2 community did not like it
133 | 
134 | #### 1.5
135 | *   Added keywords that trigger the bot: "just do it", "beautiful", "from the ghastly eyrie", "sniper", "ppd", "leafeator", "ur/your brother"
136 | *   Code refactoring
137 | 
138 | #### 1.4
139 | *   Fixed a bug created by 1.2 changes
140 | *   Changed submissions number in hot to 25
141 | *   Added more excluded responses
142 | 
143 | #### 1.3
144 | *   Removed responses such as "thank you", hero names and item names (anti-spam)
145 | 
146 | #### 1.2
147 | *   Accept comments with extra letters added for emphasis
148 | 
149 | #### 1.1
150 | *   One word responses are no longer in the dictionary
151 | *   Replaced double spaces with single space
152 | *   Bot is now working with hot submissions
153 | 
154 | ---
155 | ### Treeware License
156 | Basically MIT License, but if you use the code (learning or project purposes), you have to plant at least one tree at some future time.
157 | 


--------------------------------------------------------------------------------
/bot/__init__.py:
--------------------------------------------------------------------------------
1 | from bot.account import *
2 | from bot.worker import *
3 | 
4 | __all__ = ['account', 'worker']
5 | 


--------------------------------------------------------------------------------
/bot/account.py:
--------------------------------------------------------------------------------
 1 | """Module used to configure the connection to the Reddit API.
 2 | Removed old Code Flow auth in favor of Password Flow auth.
 3 | Reason: https://www.reddit.com/r/redditdev/comments/5fxlk8/praw_refresh_tokens/dantjyk/
 4 | """
 5 | 
 6 | import praw
 7 | 
 8 | import config
 9 | 
10 | __author__ = 'Jonarzz'
11 | __maintainer__ = 'MePsyDuck'
12 | 
13 | 
14 | def get_account():
15 |     """Method that provides the connection to Reddit API using OAuth.
16 |         :return: Reddit instance.
17 |     """
18 |     return praw.Reddit(client_id=config.CLIENT_ID,
19 |                        client_secret=config.CLIENT_SECRET,
20 |                        user_agent=config.USER_AGENT,
21 |                        username=config.USERNAME,
22 |                        password=config.PASSWORD)
23 | 


--------------------------------------------------------------------------------
/bot/worker.py:
--------------------------------------------------------------------------------
  1 | """Main module of the Dota 2 subreddit Responses Bot.
  2 | 
  3 | The main body of the script is running in this file. The comments are loaded from the subreddit
  4 | and the script checks if the comment or submission is a response from Dota 2. If it is, a proper reply for response is
  5 | prepared. The response is posted as a reply to the original comment/submission on Reddit.
  6 | """
  7 | import time
  8 | 
  9 | from praw.exceptions import APIException
 10 | from praw.models import Comment
 11 | from prawcore import ServerError
 12 | 
 13 | import config
 14 | from bot import account
 15 | from util.caching import get_cache_api
 16 | from util.database.database import db_api
 17 | from util.logger import logger
 18 | from util.response_info import ResponseInfo
 19 | from util.str_utils import preprocess_text
 20 | 
 21 | __author__ = 'Jonarzz'
 22 | __maintainer__ = 'MePsyDuck'
 23 | 
 24 | cache_api = get_cache_api()
 25 | 
 26 | 
 27 | def work():
 28 |     """Main method executing the script.
 29 | 
 30 |     It connects to an account, loads dictionaries from proper files (declared in config file).
 31 |     Afterwards it executes process_comments method with proper arguments passed.
 32 |     """
 33 | 
 34 |     reddit = account.get_account()
 35 |     logger.info('Connected to Reddit account : ' + config.USERNAME)
 36 | 
 37 |     comment_stream, submission_stream = get_reddit_stream(reddit)
 38 |     while True:
 39 |         try:
 40 |             for comment in comment_stream:
 41 |                 if comment is None:
 42 |                     break
 43 |                 process_replyable(reddit, comment)
 44 |             for submission in submission_stream:
 45 |                 if submission is None:
 46 |                     break
 47 |                 process_replyable(reddit, submission)
 48 |         except ServerError as e:
 49 |             comment_stream, submission_stream = get_reddit_stream(reddit)
 50 |             logger.critical("Reddit server is down : " + str(e))
 51 |             time.sleep(120)
 52 |         except APIException as e:
 53 |             comment_stream, submission_stream = get_reddit_stream(reddit)
 54 |             logger.critical("API Exception occurred : " + str(e))
 55 |             time.sleep(60)
 56 | 
 57 | 
 58 | def get_reddit_stream(reddit):
 59 |     """Returns the comment and submission stream.
 60 |     Streams need to be restarted/re-obtained when they throw exception.
 61 | 
 62 |     :param reddit: The reddit account instance
 63 |     :return: The comment and subreddit stream
 64 |     """
 65 |     comment_stream = reddit.subreddit(config.SUBREDDIT).stream.comments(pause_after=-1)
 66 |     submission_stream = reddit.subreddit(config.SUBREDDIT).stream.submissions(pause_after=-1)
 67 |     return comment_stream, submission_stream
 68 | 
 69 | 
 70 | def process_replyable(reddit, replyable):
 71 |     """Method used to check all the comments in a submission and add replies if they are responses.
 72 | 
 73 |     PRAW generates past ~100 comments/submissions on the first iteration. Then the loop only runs if there is a new
 74 |     comment/submission added to the stream. This also means that once PRAW is up and running, after the initial comments
 75 |     list it won't generate any duplicate comments.
 76 | 
 77 |     However, just as a safeguard, Caching is used to store replyable ids as they are processed for the first time.
 78 |     Otherwise, when the bot is restarted it might reply twice to same comments. If replyable id is in the already present
 79 |     in the cache_api, then it is ignored, else processed and added to the cache_api.
 80 |     * Self comments are ignored.
 81 |     * It is prepared for comparison to the responses in dictionary.
 82 |     * If the replyable is not on the excluded responses list (loaded from config) and if it is in the responses db or
 83 |     specific responses list, a reply replyable is prepared and posted.
 84 | 
 85 |     :param reddit: The reddit account instance
 86 |     :param replyable: comment or submission
 87 |     :return: None
 88 |     """
 89 | 
 90 |     if cache_api.exists(thing_id=replyable.fullname):
 91 |         return
 92 | 
 93 |     # Ignore thyself
 94 |     if replyable.author == reddit.user.me():
 95 |         return
 96 | 
 97 |     logger.info("Found new replyable: " + replyable.fullname)
 98 | 
 99 |     processed_text = process_text(replyable.body if isinstance(replyable, Comment) else replyable.title)
100 | 
101 |     # TODO make use of assignment expression for all below
102 |     if is_excluded_response(processed_text):
103 |         pass
104 |     elif is_custom_response(processed_text):
105 |         add_custom_reply(replyable, processed_text)
106 |     elif (response_info := is_hero_specific_response(processed_text)) is not None:
107 |         add_hero_specific_reply(replyable, response_info)
108 |     elif (response_info := is_flair_specific_response(replyable, processed_text)) is not None:
109 |         add_flair_specific_reply(replyable, response_info)
110 |     elif (response_info := is_update_request(reddit, replyable, processed_text)) is not None:
111 |         update_reply(replyable, response_info)
112 |     elif (response_info := is_hero_response(processed_text)) is not None:
113 |         add_regular_reply(replyable, response_info)
114 | 
115 | 
116 | def process_text(text):
117 |     """Method used to clean the replyable body/title text.
118 |     If text contains a quote, the first quote text is considered as the text.
119 | 
120 |     :param text: The replyable body/title text
121 |     :return: Processed text
122 |     """
123 |     hero_name = None
124 |     if '>' in text:
125 |         text = get_quoted_text(text)
126 |     if '::' in text:
127 |         hero_name, text = text.split('::', 1)
128 |         hero_name = hero_name.strip() + '::'
129 | 
130 |     return (hero_name or '') + preprocess_text(text)
131 | 
132 | 
133 | def get_quoted_text(text):
134 |     """Method used to get quoted text.
135 |     If body/title text contains a quote, the first quote is considered as the text.
136 | 
137 |     :param text: The replyable text
138 |     :return: The first quote in the text. If no quotes are found, then the entire text is returned
139 |     """
140 |     lines = text.split('\n\n')
141 |     for line in lines:
142 |         if line.startswith('>'):
143 |             return line[1:]
144 |     return text
145 | 
146 | 
147 | def is_excluded_response(text):
148 |     """Method to check if the given body/title is in excluded responses set.
149 |     Also return False for single word text (they're mostly common phrases).
150 | 
151 |     :param text: The processed body/title text
152 |     :return: True if text is an excluded response, else False
153 |     """
154 |     return ' ' not in text or text in config.EXCLUDED_RESPONSES
155 | 
156 | 
157 | def is_custom_response(text):
158 |     """Method to check if given body/title text is in custom response set.
159 | 
160 |     :param text: The body/title text
161 |     :return: True if text is a custom response, else False
162 |     """
163 |     return text in config.CUSTOM_RESPONSES
164 | 
165 | 
166 | def add_custom_reply(replyable, body):
167 |     """Method to create a custom reply for specific cases that match the custom responses set.
168 | 
169 |     :param replyable: The comment/submission on reddit
170 |     :param body: The processed body/title text
171 |     :return: None
172 |     """
173 |     custom_response = config.CUSTOM_RESPONSES[body]
174 |     original_text = replyable.body if isinstance(replyable, Comment) else replyable.title
175 | 
176 |     reply = custom_response.format(original_text, config.COMMENT_ENDING)
177 |     replyable.reply(reply)
178 |     logger.info("Replied to: " + replyable.fullname)
179 | 
180 | 
181 | def is_hero_specific_response(text):
182 |     """Method that checks if response for specified hero name and text exists.
183 | 
184 |     :param text: The processed body/title text
185 |     :return: ResponseInfo containing hero_id and link for response if the response for specified hero was found, otherwise None
186 |     """
187 |     if '::' in text:
188 |         hero_name, text = text.split('::', 1)
189 | 
190 |         if not hero_name or not text:
191 |             return None
192 | 
193 |         hero_id = db_api.get_hero_id_by_name(hero_name=hero_name)
194 |         if hero_id:
195 |             link, _ = db_api.get_link_for_response(processed_text=text, hero_id=hero_id)
196 |             if link:
197 |                 return ResponseInfo(hero_id=hero_id, link=link)
198 |     return None
199 | 
200 | 
201 | def add_hero_specific_reply(replyable, response_info):
202 |     """Method to add a hero specific reply to the comment/submission.
203 | 
204 |     :param replyable: The comment/submission on reddit
205 |     :param response_info: ResponseInfo containing hero_id and link for response
206 |     :return: None
207 |     """
208 |     create_and_add_reply(replyable=replyable, response_url=response_info.link, hero_id=response_info.hero_id)
209 | 
210 | 
211 | def is_flair_specific_response(replyable, text):
212 |     """Method that checks if response for hero in author's flair and text exists.
213 | 
214 |     :param replyable: The comment/submission on reddit
215 |     :param text: The processed body/title text
216 |     :return: ResponseInfo containing hero_id and link for response if the response for author's flair's hero was found, otherwise None
217 |     """
218 |     hero_id = db_api.get_hero_id_by_flair_css(flair_css=replyable.author_flair_css_class)
219 |     if hero_id:
220 |         link, _ = db_api.get_link_for_response(processed_text=text, hero_id=hero_id)
221 |         if link:
222 |             return ResponseInfo(hero_id=hero_id, link=link)
223 |     return None
224 | 
225 | 
226 | def add_flair_specific_reply(replyable, response_info):
227 |     """Method to add a author's flair specific reply to the comment/submission.
228 | 
229 |     :param replyable: The comment/submission on reddit
230 |     :param response_info: ResponseInfo containing hero_id and link for response
231 |     :return: None
232 |     """
233 |     create_and_add_reply(replyable=replyable, response_url=response_info.link, hero_id=response_info.hero_id)
234 | 
235 | 
236 | def is_update_request(reddit, replyable, text):
237 |     """Method to check whether the comment is a request to update existing response.
238 |     Only works if
239 |     * Comment begins with "try"
240 |     * Comment ends with valid hero name
241 |     * Given hero has the original response
242 |     * Root/Original comment/submission was not hero specific response.
243 | 
244 |     Examples:
245 |     "Try legion commander" : Valid
246 |     "Try leGiOn ComManDer" : Valid - case does not matter
247 |     "legion commander" : Invalid - does not begin with `try`
248 |     "Try legion" : Invalid - invalid hero name
249 | 
250 |     :param reddit: The reddit account instance
251 |     :param replyable: The comment/submission on reddit
252 |     :param text: The processed body/title text
253 |     :return: ResponseInfo containing hero_id and link for response if this is a valid update request, otherwise None
254 |     """
255 | 
256 |     if not text.startswith(config.UPDATE_REQUEST_KEYWORD):
257 |         return None
258 | 
259 |     if not validate_update_request_comment_tree(reddit, replyable):
260 |         return None
261 | 
262 |     hero_name = text.replace(config.UPDATE_REQUEST_KEYWORD, '', 1)
263 |     hero_id = db_api.get_hero_id_by_name(hero_name=hero_name)
264 |     if hero_id is None:
265 |         return None
266 | 
267 |     root_replyable = replyable.parent().parent()
268 |     processed_text = process_text(root_replyable.body if isinstance(root_replyable, Comment) else root_replyable.title)
269 | 
270 |     if is_hero_specific_response(processed_text):
271 |         return None
272 | 
273 |     link, _ = db_api.get_link_for_response(processed_text=processed_text, hero_id=hero_id)
274 | 
275 |     if link is None:
276 |         return None
277 | 
278 |     return ResponseInfo(hero_id=hero_id, link=link)
279 | 
280 | 
281 | def validate_update_request_comment_tree(reddit, replyable):
282 |     """Method to check whether the comment in the request to update existing response is valid.
283 |     A valid comment tree is when:
284 |     * Comment was made as a reply to bot's comment
285 |     * Comment was added by OP, who made the original request(Response/Comment) for the response.
286 | 
287 |     The comment tree should look something like below, where root(original) replyable can be Comment or Submission.
288 |     Only valid case is c3.
289 |     c1/s1 user: Foo
290 |         c2 bot: "Foo" response by Bar hero
291 |            c3 user: Try Bar2
292 |            c4 other_user: Try Bar2
293 | 
294 |     :param reddit: The reddit account instance
295 |     :param replyable: The comment/submission on reddit
296 |     :return: True if this is a valid comment tree, else False
297 |     """
298 |     if not isinstance(replyable, Comment):
299 |         return False
300 | 
301 |     op = replyable.author
302 |     parent_comment = replyable.parent()
303 | 
304 |     if not isinstance(parent_comment, Comment):
305 |         return False
306 | 
307 |     if not parent_comment.author == reddit.user.me():
308 |         return False
309 | 
310 |     root_replyable = parent_comment.parent()
311 | 
312 |     if not root_replyable.author == op:
313 |         return False
314 | 
315 |     return True
316 | 
317 | 
318 | def update_reply(replyable, response_info):
319 |     """Method to edit and update existing response comment by the bot with a new hero as requested.
320 | 
321 |     :param replyable: The comment/submission on reddit
322 |     :param response_info: ResponseInfo containing hero_id and link for response
323 |     :return: None
324 |     """
325 |     bot_comment = replyable.parent()
326 |     root_replyable = bot_comment.parent()
327 | 
328 |     # TODO maybe get original text from bot's command, rather than the original post, as it might be edited by the time this command is called
329 |     original_text = root_replyable.body if isinstance(root_replyable, Comment) else root_replyable.title
330 |     original_text = original_text.strip()
331 | 
332 |     if '>' in original_text:
333 |         original_text = get_quoted_text(original_text).strip()
334 | 
335 |     # Getting name with Proper formatting
336 |     hero_name = db_api.get_hero_name(response_info.hero_id)
337 | 
338 |     reply = "[{}]({}) (sound warning: {}){}".format(original_text, response_info.link, hero_name, config.COMMENT_ENDING)
339 |     bot_comment.edit(reply)
340 | 
341 |     logger.info("Updated Reply: " + replyable.fullname)
342 | 
343 | 
344 | def is_hero_response(text):
345 |     """Method to create response for given replyable.
346 |     In case of multiple matches, it used to sort responses in descending order of heroes and get the first one,
347 |     but now it's random.
348 | 
349 |     :param text: The processed body/title text
350 |     :return: ResponseInfo containing hero_id and link for response if this is a valid update request, otherwise None
351 |     """
352 | 
353 |     link, hero_id = db_api.get_link_for_response(processed_text=text)
354 | 
355 |     if link and hero_id:
356 |         return ResponseInfo(hero_id=hero_id, link=link)
357 | 
358 |     return None
359 | 
360 | 
361 | def add_regular_reply(replyable, response_info):
362 |     """Method to create response for given replyable.
363 |     In case of multiple matches, it used to sort responses in descending order of heroes and get the first one,
364 |     but now it's random.
365 | 
366 |     :param replyable: The comment/submission on reddit
367 |     :param response_info: ResponseInfo containing hero_id and link for response
368 |     :return: None
369 |     """
370 |     create_and_add_reply(replyable=replyable, response_url=response_info.link, hero_id=response_info.hero_id)
371 | 
372 | 
373 | def create_and_add_reply(replyable, response_url, hero_id):
374 |     """Method that creates a reply in reddit format and adds the reply to comment/submission.
375 |     The reply consists of a link to the response audio file, the response itself, a warning about the sound
376 |     and an ending added from the config file (post footer).
377 |     
378 |     Image is currently ignored due to new reddit redesign not rendering flairs properly.
379 | 
380 |     :param replyable: The comment/submission on reddit
381 |     :param response_url: The url to the response audio file
382 |     :param hero_id: The hero_id to which the response belongs to.
383 |     :return: The text for the comment reply.
384 |     """
385 |     original_text = replyable.body if isinstance(replyable, Comment) else replyable.title
386 |     original_text = original_text.strip()
387 | 
388 |     if '>' in original_text:
389 |         original_text = get_quoted_text(original_text).strip()
390 |     if '::' in original_text:
391 |         original_text = original_text.split('::', 1)[1].strip()
392 | 
393 |     hero_name = db_api.get_hero_name(hero_id)
394 | 
395 |     reply = "[{}]({}) (sound warning: {}){}".format(original_text, response_url, hero_name, config.COMMENT_ENDING)
396 |     replyable.reply(reply)
397 | 
398 |     logger.info("Replied to: " + replyable.fullname)
399 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | """Module in which the constants that are used by Dota Responses Bot are declared."""
  2 | import os
  3 | from util.response_request import request_cargo_set
  4 | 
  5 | __author__ = 'Jonarzz'
  6 | __maintainer__ = 'MePsyDuck'
  7 | 
  8 | # App config
  9 | CLIENT_ID = os.environ.get('CLIENT_ID')
 10 | CLIENT_SECRET = os.environ.get('CLIENT_SECRET')
 11 | 
 12 | # Account config
 13 | USER_AGENT = 'Python:dota2_responses_bot:v3.0 by /u/Jonarz, maintained by /u/MePsyDuck'
 14 | SUBREDDIT = os.environ.get('SUBREDDIT', 'dota2')
 15 | USERNAME = os.environ.get('REDDIT_USERNAME')
 16 | PASSWORD = os.environ.get('REDDIT_PASSWORD')
 17 | 
 18 | # Parser config
 19 | URL_DOMAIN = 'http://dota2.gamepedia.com'
 20 | API_PATH = URL_DOMAIN + '/api.php'
 21 | RESPONSES_CATEGORY = 'Responses'
 22 | CATEGORY_API_PARAMS = {'action': 'query', 'list': 'categorymembers', 'cmlimit': 'max', 'cmprop': 'title',
 23 |                        'format': 'json', 'cmtitle': ''}
 24 | FILE_API_PARAMS = {'action': 'query', 'titles': '', 'prop': 'imageinfo', 'iiprop': 'url', 'format': 'json'}
 25 | 
 26 | STYLESHEET_URL = r'https://www.reddit.com/r/dota2/about/stylesheet.json'
 27 | FLAIR_REGEX = r'(?P<css_class>.flair-\w+),a\[href="(?P<img_path>/hero-\w+)"\]'
 28 | RESPONSE_REGEX = r'\*(?P<files>( <sm2>.*?</sm2>)+)(?P<text>(.*))'
 29 | CHAT_WHEEL_SECTION_REGEX = r'(=== (?P<event>The International \d+) ===)(?P<source>.+?)(?=\n=== [a-z0-9 ]+ ===\n)'
 30 | FILE_REGEX = r'( <sm2>(?P<file>[a-zA-Z0-9_. ]+)</sm2>)'
 31 | 
 32 | # Caching config
 33 | CACHE_PROVIDER = os.environ.get('CACHE_PROVIDER', 'memory')  # valid choices : redis, memory, db
 34 | CACHE_URL = os.environ.get('CACHE_URL',
 35 |                            os.path.join(os.getcwd(), 'cache.json'))  # file path in case of memory/file based caching
 36 | 
 37 | # DB config
 38 | DB_PROVIDER = os.environ.get('DATABASE_PROVIDER', 'sqlite')  # valid choices : sqlite, mysql, postgres
 39 | DB_URL = os.environ.get('DATABASE_URL', os.path.join(os.getcwd(), 'bot.db'))  # file path in case of sqlite
 40 | 
 41 | # Logging config
 42 | BOT_LOGGER = 'bot'
 43 | PRAW_LOGGER = 'prawcore'
 44 | LOG_LEVEL = os.environ.get('LOGGING_LEVEL', 'INFO').upper()
 45 | LOG_FORMAT = '%(asctime)s %(funcName)-20s %(levelname)-8s %(message)s'
 46 | LOG_DIR = 'logs'
 47 | INFO_FILENAME = 'info.log'
 48 | ERROR_FILENAME = 'error.log'
 49 | PRAW_FILENAME = 'praw.log'
 50 | 
 51 | CACHE_TTL = 5
 52 | 
 53 | # Responses config
 54 | # TODO confirm this keyword
 55 | UPDATE_REQUEST_KEYWORD = 'try '
 56 | COMMENT_ENDING = '''
 57 | 
 58 | ---
 59 | Bleep bloop, I am a robot. *OP can reply with "Try hero_name" to update this with new hero*
 60 | 
 61 | [*^(Source)*](https://github.com/Jonarzz/DotaResponsesRedditBot) *^(|)* 
 62 | [*^(Suggestions/Issues)*](https://github.com/Jonarzz/DotaResponsesRedditBot/issues/new/choose) *^(|)* 
 63 | [*^(Maintainer)*](https://www.reddit.com/user/MePsyDuck/) *^(|)* 
 64 | [*^(Author)*](https://www.reddit.com/user/Jonarz/)
 65 | '''
 66 | 
 67 | # Key should be lowercase without special characters. Needs to be updated if links break (as links can be
 68 | # non-gamepedia links too)
 69 | # Value should have a placeholder for original text and replyable ending
 70 | CUSTOM_RESPONSES = {
 71 |     'ho ho ha ha': '[{}](https://gamepedia.cursecdn.com/dota2_gamepedia/1/17/Snip_ability_shrapnel_03.mp3)'
 72 |                    ' (trigger warning: Sniper){}',
 73 |     'turn up this guy is crazy as fuck he s gotta be on molly or some powder or something':
 74 |         '[{}](https://www.youtube.com/watch?v=CO3j9lUYFfo) (Donation warning: Arteezy){}'
 75 | }
 76 | 
 77 | # Only include responses for items, runes, heroes, > 100 count and common phrases.
 78 | # Hardcoded because then they can tweaked according to the needs.
 79 | # Drawback for this : need to update each time hero/item is added
 80 | FREQUENT_RESPONSES = {'denied', 'yes', 'not yet', 'no mana', 'not enough mana', 'i m not ready', 'out of mana',
 81 |                       'it s not time yet', 'ah', 'no', 'uh', 'ha ha', 'attack', 'haste', 'double damage', 'immortality',
 82 |                       'invisibility', 'illusion', 'regeneration', 'uh uh', 'ha', }
 83 | 
 84 | # Hero and item responses not hardcoded here
 85 | HERO_NAME_RESPONSES = request_cargo_set('https://dota2.gamepedia.com/api.php?'+
 86 |                                         'action=cargoquery&tables=heroes&fields=title&where=game'+
 87 |                                         '+IS+NULL&limit=500&format=json')
 88 | 
 89 | ITEM_RESPONSES = request_cargo_set('https://dota2.gamepedia.com/api.php?'+
 90 |                                    'action=cargoquery&tables=items&fields='+
 91 |                                    'title&where=game+IS+NULL&limit=500&format=json')
 92 | 
 93 | # Add responses here as people report them. Taken from the old excluded responses list.
 94 | COMMON_PHRASE_RESPONSES = {'earth shaker', 'shut up', 'skeleton king', 'it begins', 'i am', 'exactly so', 'very nice',
 95 |                            'why not', 'much appreciated', 'well done', 'pit lord', 'outworld destroyer', 'I know right',
 96 |                            'aphotic shield', 'go outside', 'vladimir s offering', 'sheep stick', 'my bad',
 97 |                            "you're welcome", 'holy shit', 'are you okay', 'i agree', 'thank god', 'i like it', 'no way',
 98 |                            'fair enough', 'it worked', 'well deserved', 'he he he', 'how so', 'oh boy', 'very good',
 99 |                            'about time', 'are you kidding me', 'abyssal underlord', 'so beautiful', 'nice try',
100 |                            'thank you so much', 'ah, nice', 'nice one', 'eul s scepter', 'thank you',
101 |                            'scepter of divinity', 'at last', 'too soon', 'try again', 'i don t think so', 'try harder',
102 |                            'well said', 'of course', 'got it', 'what happened', 'hey now', 'seems fair', 'that s right',
103 |                            'all pick'}
104 | 
105 | EXCLUDED_RESPONSES = FREQUENT_RESPONSES | ITEM_RESPONSES | HERO_NAME_RESPONSES | COMMON_PHRASE_RESPONSES
106 | 


--------------------------------------------------------------------------------
/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | # Named as `parsers` because `parser` will produce ImportError due to conflict with internal `parser.py` file
2 | 
3 | from parsers.css_parser import *
4 | from parsers.wiki_parser import *
5 | 
6 | __all__ = ['css_parser', 'wiki_parser']
7 | 


--------------------------------------------------------------------------------
/parsers/css_parser.py:
--------------------------------------------------------------------------------
 1 | """Module to populate hero details from DotA 2 subreddit css.
 2 | """
 3 | 
 4 | import json
 5 | import re
 6 | 
 7 | import requests
 8 | from rapidfuzz import process
 9 | 
10 | from config import STYLESHEET_URL, FLAIR_REGEX, USER_AGENT
11 | from util.database.database import db_api
12 | 
13 | __author__ = 'MePsyDuck'
14 | 
15 | 
16 | def populate_heroes():
17 |     """Method to update heroes in the Heroes table with hero names and proper css classes names as
18 |     taken from the DotA2 subreddit and hero flair images from the reddit directory.
19 | 
20 |     Uses rapidfuzz for fuzzy matching of hero names to name found in `.flair-name` property in css.
21 |     """
22 |     hero_names = db_api.get_all_hero_names()
23 | 
24 |     response = requests.get(STYLESHEET_URL, headers={'User-Agent': USER_AGENT})
25 |     r = json.loads(response.text)
26 |     stylesheet = r['data']['stylesheet']
27 | 
28 |     r = re.compile(FLAIR_REGEX)
29 |     for flair in r.finditer(stylesheet):
30 |         flair_css = flair['css_class']
31 |         img_path = flair['img_path']
32 |         flair_hero = img_path[6:]
33 | 
34 |         match, confidence, index = process.extractOne(flair_hero, hero_names)
35 |         if confidence >= 90:
36 |             db_api.update_hero(hero_name=match, img_path=img_path, flair_css=flair_css)
37 | 


--------------------------------------------------------------------------------
/parsers/wiki_parser.py:
--------------------------------------------------------------------------------
  1 | """Module used to populate responses into the Responses table in database.
  2 | 
  3 | Responses and urls to responses as mp3s are parsed from Dota 2 Wiki: http://dota2.gamepedia.com/
  4 | """
  5 | 
  6 | import json
  7 | import re
  8 | from concurrent.futures import as_completed
  9 | 
 10 | import requests
 11 | from requests.adapters import HTTPAdapter
 12 | from requests_futures.sessions import FuturesSession
 13 | from urllib3 import Retry
 14 | 
 15 | from config import API_PATH, RESPONSES_CATEGORY, RESPONSE_REGEX, CATEGORY_API_PARAMS, URL_DOMAIN, FILE_API_PARAMS, \
 16 |     FILE_REGEX, CHAT_WHEEL_SECTION_REGEX
 17 | from util.database.database import db_api
 18 | from util.logger import logger
 19 | from util.str_utils import preprocess_text
 20 | 
 21 | __author__ = 'Jonarzz'
 22 | __maintainer__ = 'MePsyDuck'
 23 | 
 24 | 
 25 | def populate_responses():
 26 |     """Method that adds all the responses to database. Assumes responses and hero database are already built.
 27 |     """
 28 |     populate_hero_responses()
 29 |     populate_chat_wheel()
 30 | 
 31 | 
 32 | def populate_hero_responses():
 33 |     """Method that populates hero responses (as well as Arcana voice packs and Announcer packs) from Gamepedia.
 34 |     First fetches all Pages in Responses category, then source for each page.
 35 |     Populates Responses table and Hero table from processed response, original response, link and hero name.
 36 |     """
 37 |     pages = pages_for_category(RESPONSES_CATEGORY)
 38 |     for page in pages:
 39 |         if is_hero_type(page):
 40 |             # page points to hero responses
 41 |             hero_name = get_hero_name(page)
 42 |         else:
 43 |             # page points to voice pack, announcer or shopkeeper responses
 44 |             hero_name = page
 45 | 
 46 |         responses_source = requests.get(url=URL_DOMAIN + '/' + page, params={'action': 'raw'}).text
 47 | 
 48 |         response_link_list = create_responses_text_and_link_list(responses_source=responses_source)
 49 |         # Note: Save all responses to the db. Apply single word and common words filter on comments and submission text
 50 |         # not while saving responses
 51 |         db_api.add_hero_and_responses(hero_name=hero_name, response_link_list=response_link_list)
 52 | 
 53 | 
 54 | def pages_for_category(category_name):
 55 |     """Method that returns a list of pages for a given Wiki category.
 56 | 
 57 |     :param category_name: returns all category members in json response from mediawiki API.
 58 |     :return: list of all `pages` in the given category.
 59 |     """
 60 |     params = get_params_for_category_api(category_name)
 61 |     json_response = requests.get(url=API_PATH, params=params).text
 62 | 
 63 |     pages = []
 64 | 
 65 |     parsed_json = json.loads(json_response)
 66 |     for category_members in parsed_json['query']['categorymembers']:
 67 |         title = category_members['title']
 68 |         pages.append(title)
 69 | 
 70 |     return pages
 71 | 
 72 | 
 73 | def get_params_for_category_api(category):
 74 |     """Method to get `GET` parameters for querying MediaWiki for category details.
 75 | 
 76 |     :param category: category name to be passed in params.
 77 |     :return: GET parameters `params`
 78 |     """
 79 |     params = CATEGORY_API_PARAMS.copy()
 80 |     params['cmtitle'] = 'Category:' + category
 81 |     return params
 82 | 
 83 | 
 84 | def get_params_for_files_api(files):
 85 |     """Method to get `GET` parameters for querying MediaWiki for details for multiple files.
 86 |     Uses pipe character `|` to include multiple files. Currently MediaWiki limits number of files to 50.
 87 |     If files list is empty, leave `File` parameter empty.
 88 | 
 89 |     :param files: list of file names to be passed in params.
 90 |     :return: GET parameters `params`.
 91 |     """
 92 |     params = FILE_API_PARAMS.copy()
 93 |     if files:
 94 |         titles = 'File:' + '|File:'.join(files)
 95 |     else:
 96 |         titles = ''
 97 |     params['titles'] = titles
 98 |     return params
 99 | 
100 | 
101 | def is_hero_type(page):
102 |     """Method to check if page belongs to a hero or creep-hero(Warlock's Golem).
103 | 
104 |     :param page: Page name as string.
105 |     :return: True if page belongs to hero else False
106 |     """
107 |     return '/Responses' in page
108 | 
109 | 
110 | def get_hero_name(hero_page):
111 |     """Method that parses hero name from its responses page.
112 |     Pages for heroes are in the form of `Hero name/Responses` and `Bundle/Responses/Hero.
113 |     We need only the `Hero name` part for heroes.
114 | 
115 |     :param hero_page: hero's responses page as string.
116 |     :return: Hero name as parsed
117 |     """
118 |     return hero_page.replace('/Responses', '')
119 | 
120 | 
121 | def create_responses_text_and_link_list(responses_source):
122 |     """Method that for a given source of a hero's response page creates a list of tuple: (original_text, processed_text,
123 |      link).
124 |     Steps involved:
125 |     * Use regex to find all lines containing mp3 files and responses.
126 |     * Process it to get original response text and file name.
127 |     * Create a list of files and get all the links for them by calling `links_for_files`.
128 |     * Process original text to get processed response.
129 |     * Add original response text, processed response text and file link to a list as a tuple.
130 | 
131 |     :param responses_source: Mediawiki source
132 |     :return: list with tuples of (original_text, processed_text, link).
133 |     """
134 |     responses_list = []
135 |     file_and_text_list = []
136 | 
137 |     response_regex = re.compile(RESPONSE_REGEX)
138 |     file_regex = re.compile(FILE_REGEX)
139 | 
140 |     for response in response_regex.finditer(responses_source):
141 |         original_text = parse_response(response['text'])
142 |         if original_text is not None:
143 |             files_source = response['files']
144 |             for file in file_regex.finditer(files_source):
145 |                 file_name = file['file'].replace('_', ' ').capitalize()
146 |                 file_and_text_list.append([original_text, file_name])
147 | 
148 |     files_list = [file for text, file in file_and_text_list]
149 |     file_and_link_dict = links_for_files(files_list)
150 | 
151 |     for original_text, file in file_and_text_list:
152 |         processed_text = preprocess_text(original_text)
153 |         if processed_text != '':
154 |             try:
155 |                 link = file_and_link_dict[file]
156 |                 responses_list.append((original_text, processed_text, link))
157 |             except KeyError:
158 |                 # Ignore files with no links to mp3 files. Happens to broken files and files undergoing migration.
159 |                 pass
160 | 
161 |     return responses_list
162 | 
163 | 
164 | def parse_response(text):
165 |     # Special cases
166 |     if any(excluded_case in text for excluded_case in ['(broken file)', 'versus (TI ', 'Ceeeb']):
167 |         return None
168 | 
169 |     text = re.sub(r'…', '...', text)  # Replace ellipsis with three dots
170 | 
171 |     regexps_empty_sub = [r'<!--.*?-->',  # Remove comments
172 |                          r'{{resp\|(r|u|\d+|d\|\d+|rem)}}',  # Remove response rarity
173 |                          r'{{hero icon\|[a-z- \']+\|\d+px}}',  # Remove hero icon
174 |                          r'{{item( icon)?\|[a-z0-9() \']+\|\d+px}}',  # Remove item icon
175 |                          r'\[\[File:[a-z.,!\'() ]+\|\d+px(\|link=[a-z,!\'() ]+)?(\|class=[a-z]+)?]]',  # Remove Files
176 |                          r'<small>\[\[#[a-z0-9_\-\' ]+\|\'\'followup\'\']]</small>',
177 |                          # Remove followup links in <small> tags
178 |                          r'<small>\'\'[a-z0-9 /]+\'\'</small>',  # Remove text in <small> tags
179 |                          r'<ref>.*?</ref>',  # Remove text in <ref> tags
180 |                          r'<nowiki>.*?</nowiki>',  # Remove text in <nowiki> tags
181 |                          ]
182 |     for regex in regexps_empty_sub:
183 |         text = re.sub(regex, '', text, flags=re.IGNORECASE)
184 | 
185 |     regexps_sub_text = [r'\[\[([a-zé().:\',\- ]+)]]',  # Replace links such as [[Shitty Wizard]]
186 |                         r'\[\[[a-zé0-9().:\'/#-_ ]+\|([a-zé0-9().:\'/#-_ ]+)]]',
187 |                         # Replace links such as [[Ancient (Building)|Ancients]], [[:File:Axe|Axe]] and [[Terrorblade#Sunder|sundering]]
188 |                         r'{{tooltip\|(.*?)\|.*?}}',  # Replace tooltips
189 |                         r'{{note\|([a-z.!\'\-?, ]+)\|[a-z.!\'\-?,()/ ]+}}',  # Replace notes
190 |                         r'{{H\|([a-z.!\'\-?,()/ ]+)}}',  # Replace heroes
191 |                         ]
192 |     for regex in regexps_sub_text:
193 |         text = re.sub(regex, '\\1', text, flags=re.IGNORECASE)
194 | 
195 |     if any(escape in text for escape in ['[[', ']]', '{{', '}}', '|', 'sm2']):
196 |         logger.warn('Response could not be processed : ' + text)
197 |         return None
198 | 
199 |     return text.strip()
200 | 
201 | 
202 | def links_for_files(files_list):
203 |     """Method that queries MediaWiki API used by Gamepedia to return links to the files list passed.
204 |     Does batch processing to avoid max number of files limit and header size limit.
205 |     Used asynchronous requests for faster processing.
206 |     Removes files version as we only need the latest one.
207 | 
208 |     MediaWiki allows max 50 files(titles) at once : https://www.mediawiki.org/wiki/API:Query.
209 | 
210 |     :param files_list: list of files
211 |     :return files_link_mapping: dict with file names and their links. dict['file'] = link
212 |     """
213 | 
214 |     # Method level constants
215 |     max_title_list_length = 50
216 |     file_title_prefix_length = len('%7CFile%3A')  # url encoded file title prefix '|File:'
217 |     max_header_length = 1960  # max header length as found by trial and error
218 | 
219 |     files_link_mapping = {}
220 |     futures = []
221 |     empty_api_length = len(requests.Request('get', url=API_PATH, params=get_params_for_files_api([])).prepare().url)
222 | 
223 |     # To add retry in case of Status 429 : Too many requests
224 |     with FuturesSession() as session:
225 |         retries = 5
226 |         status_forcelist = [429]
227 |         retry = Retry(
228 |             total=retries,
229 |             read=retries,
230 |             connect=retries,
231 |             respect_retry_after_header=True,
232 |             status_forcelist=status_forcelist,
233 |         )
234 | 
235 |         adapter = HTTPAdapter(max_retries=retry)
236 |         session.mount('http://', adapter)
237 |         session.mount('https://', adapter)
238 | 
239 |         files_batch_list = []
240 |         current_title_length = 0
241 | 
242 |         for file in files_list:
243 |             file_name_len = file_title_prefix_length + len(file)
244 |             # If header size overflows or the number of files reaches the limit specified by MediaWiki
245 |             if file_name_len + current_title_length >= max_header_length - empty_api_length or \
246 |                     len(files_batch_list) >= max_title_list_length:
247 |                 # Issue a request for current batch of files
248 |                 futures.append(session.get(url=API_PATH, params=get_params_for_files_api(files_batch_list)))
249 | 
250 |                 # Reset files tracking variables
251 |                 files_batch_list = []
252 |                 current_title_length = 0
253 | 
254 |             files_batch_list.append(file)
255 |             current_title_length += file_name_len
256 | 
257 |         if files_batch_list:
258 |             futures.append(session.get(url=API_PATH, params=get_params_for_files_api(files_batch_list)))
259 | 
260 |         for future in as_completed(futures):
261 |             json_response = future.result().json()
262 |             query = json_response['query']
263 |             pages = query['pages']
264 | 
265 |             for _, page in pages.items():
266 |                 title = page['title']
267 |                 try:
268 |                     imageinfo = page['imageinfo'][0]
269 |                     file_url = imageinfo['url'][:imageinfo['url'].index('.mp3') + len('.mp3')]  # Remove file version and trailing path
270 |                     files_link_mapping[title[5:]] = file_url
271 |                 except KeyError:
272 |                     logger.critical('File does not have a link : ' + title)
273 | 
274 |     return files_link_mapping
275 | 
276 | 
277 | def populate_chat_wheel():
278 |     """Method that populates chat wheel responses featured in The International yearly Battle Pass.
279 |     Other chat wheel responses from events and Dota plus are not processed currently.
280 |     """
281 |     chat_wheel_source = requests.get(url=URL_DOMAIN + '/' + 'Chat_Wheel', params={'action': 'raw'}).text
282 | 
283 |     chat_wheel_regex = re.compile(CHAT_WHEEL_SECTION_REGEX, re.DOTALL | re.IGNORECASE)
284 | 
285 |     for match in chat_wheel_regex.finditer(chat_wheel_source):
286 |         event = match['event']
287 |         responses_source = match['source']
288 |         response_link_list = create_responses_text_and_link_list(responses_source=responses_source)
289 | 
290 |         db_api.add_hero_and_responses(hero_name=event, response_link_list=response_link_list)
291 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | urllib3
 2 | beautifulsoup4
 3 | praw
 4 | psycopg2
 5 | redis
 6 | pony
 7 | rapidfuzz
 8 | requests
 9 | cacheout
10 | requests-futures


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | """Module to run the bot. Executes the work() method of bot that executes the endless loop of reading comments and
 2 | submissions and replying to them if the match any response.
 3 | """
 4 | from bot.worker import work, logger
 5 | from util.logger import setup_logger
 6 | 
 7 | __author__ = 'MePsyDuck'
 8 | 
 9 | if __name__ == '__main__':
10 |     setup_logger()
11 |     try:
12 |         work()
13 |     except (KeyboardInterrupt, SystemExit):
14 |         logger.exception("Script stopped")
15 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Module to be run first time to set up the database
 2 | * Drops all tables if the exist and creates them again.
 3 | * Populates responses from Gamepedia
 4 | * Populates heroes from Gamepedia and Dota 2 subreddit CSS.
 5 | """
 6 | from parsers import css_parser, wiki_parser
 7 | from util.database.database import db_api
 8 | 
 9 | __author__ = 'MePsyDuck'
10 | 
11 | from util.logger import setup_logger
12 | 
13 | 
14 | def first_run():
15 |     db_api.drop_all_tables()
16 |     db_api.create_all_tables()
17 |     wiki_parser.populate_responses()
18 |     css_parser.populate_heroes()
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     setup_logger()
23 |     first_run()
24 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | """Module to test the bot and parser. All test should be placed in `tests` folder and python filename should start with
 2 | `test_`.
 3 | """
 4 | import sys
 5 | import unittest
 6 | 
 7 | __author__ = 'MePsyDuck'
 8 | 
 9 | suite = unittest.TestLoader().discover('tests', pattern='test_*.py')
10 | result = not unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful()
11 | sys.exit(result)
12 | 


--------------------------------------------------------------------------------
/tests/test_bot.py:
--------------------------------------------------------------------------------
 1 | """Module used to test bot worker module methods.
 2 | """
 3 | 
 4 | import unittest
 5 | 
 6 | import config
 7 | from bot import account
 8 | from bot import worker
 9 | 
10 | __author__ = 'Jonarzz'
11 | __maintainer__ = 'MePsyDuck'
12 | 
13 | 
14 | class BotWorkerTest(unittest.TestCase):
15 |     """Class used to test bot worker module.
16 |     Inherits from TestCase class of unittest module.
17 |     """
18 | 
19 |     def test_parse_comment(self):
20 |         """Method that tests the process_text method from worker module.
21 |         """
22 |         self.assertEqual(worker.process_text(
23 |             "That's a great idea!!!"), "that s a great idea")
24 |         self.assertEqual(worker.process_text("  WoNdErFuL  "), "wonderful")
25 |         self.assertEqual(worker.process_text("How are you?"), "how are you")
26 |         self.assertEqual(worker.process_text(
27 |             "Isn't is good to have quotes?  you can add any response in quote and bot would still \n\n> reply to them"),
28 |             "reply to them")
29 |         self.assertEqual(worker.process_text(
30 |             "> multiple quotes \n\n > but reply to \n\n > only first one"), "multiple quotes")
31 | 
32 |     def test_account(self):
33 |         """Method used to test the Reddit instance returned by get_account()
34 |         """
35 |         reddit = account.get_account()
36 |         self.assertEqual(reddit.user.me(), config.USERNAME)
37 | 


--------------------------------------------------------------------------------
/tests/test_wiki_parser.py:
--------------------------------------------------------------------------------
 1 | """Module used to test dota_wiki_parser module methods.
 2 | """
 3 | 
 4 | import unittest
 5 | 
 6 | from config import RESPONSES_CATEGORY
 7 | from parsers import wiki_parser
 8 | 
 9 | __author__ = 'Jonarzz'
10 | __maintainer__ = 'MePsyDuck'
11 | 
12 | 
13 | class WikiParserTest(unittest.TestCase):
14 |     """Class used to test wiki_parser module.
15 |     Inherits from TestCase class of unittest module.
16 |     """
17 | 
18 |     def test_pages_to_parse(self):
19 |         """Method testing pages_to_ppages_for_categoryarse method from wiki_parser module.
20 |         The method checks if the requested response is consists of pages as expected.
21 |         """
22 |         pages = wiki_parser.pages_for_category(RESPONSES_CATEGORY)
23 | 
24 |         self.assertTrue(len(pages) > 150)
25 |         self.assertTrue('Abaddon/Responses' in pages)
26 |         self.assertTrue('Zeus/Responses' in pages)
27 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jonarzz/DotaResponsesRedditBot/5db25560accdfcd85a8a9e3e1d7d0d429baee49e/util/__init__.py


--------------------------------------------------------------------------------
/util/caching/__init__.py:
--------------------------------------------------------------------------------
 1 | from config import CACHE_PROVIDER
 2 | from util.caching.db_cache import DBCache
 3 | from util.caching.memory_cache import MemoryCache
 4 | from util.caching.redis_cache import RedisCache
 5 | 
 6 | 
 7 | def get_cache_api():
 8 |     if CACHE_PROVIDER == 'redis':
 9 |         return RedisCache()
10 |     elif CACHE_PROVIDER == 'memory':
11 |         return MemoryCache()
12 |     elif CACHE_PROVIDER == 'db':
13 |         return DBCache()
14 | 


--------------------------------------------------------------------------------
/util/caching/caching.py:
--------------------------------------------------------------------------------
 1 | """Module used to store replyable ids (comment or submission) in cache.
 2 | Currently support only three implementations : DB based, in memory and redis.
 3 | Support for more implementations can be added by extending CacheAPI class.
 4 | """
 5 | 
 6 | from abc import ABC, abstractmethod
 7 | 
 8 | __author__ = 'MePsyDuck'
 9 | 
10 | 
11 | class CacheAPI(ABC):
12 |     @abstractmethod
13 |     def _exists(self, key):
14 |         pass
15 | 
16 |     @abstractmethod
17 |     def _set(self, key):
18 |         pass
19 | 
20 |     def exists(self, thing_id):
21 |         """Check if Reddit thing (currently comment/submission) is already processed/replied.
22 |         If it is not in the cache, it adds the thing_id to cache.
23 | 
24 |         :param thing_id: They id of comment/submission to be cached.
25 |         :returns: `True` if replyable exists, else `False`.
26 |         """
27 |         if self._exists(thing_id):
28 |             return True
29 |         else:
30 |             self._set(thing_id)
31 |             return False
32 | 


--------------------------------------------------------------------------------
/util/caching/db_cache.py:
--------------------------------------------------------------------------------
 1 | """Module to store thing_ids in DB.
 2 | Not recommended as this needs manual clearing of old ids.
 3 | """
 4 | 
 5 | from util.caching.caching import CacheAPI
 6 | from util.database.database import db_api
 7 | 
 8 | __author__ = 'MePsyDuck'
 9 | 
10 | 
11 | class DBCache(CacheAPI):
12 |     def _exists(self, key):
13 |         """Method to check if key exists in DB cache.
14 | 
15 |         :param key: The `key` to to be checked in DB cache.
16 |         :return: `True` if `key` exist in DB cache.
17 |         """
18 |         return db_api.check_if_thing_exists(key)
19 | 
20 |     def _set(self, key):
21 |         """Method to set `key` with `value` in DB cache.
22 | 
23 |         :param key: The `key` (thing_id) to be added to DB cache.
24 |         """
25 |         db_api.add_thing_to_cache(key)
26 | 


--------------------------------------------------------------------------------
/util/caching/memory_cache.py:
--------------------------------------------------------------------------------
 1 | """Module used to save cache in the memory.
 2 | Uses FIFO eviction policy with maximum size of 10,000 and no ttl.
 3 | JSON File used to dump data on shutdown and load it back up on startup.
 4 | """
 5 | 
 6 | import atexit
 7 | import json
 8 | import os
 9 | import signal
10 | from collections import OrderedDict
11 | 
12 | from cacheout import FIFOCache
13 | 
14 | from config import CACHE_URL
15 | from util.caching.caching import CacheAPI
16 | 
17 | __author__ = 'MePsyDuck'
18 | 
19 | 
20 | class MemoryCache(CacheAPI):
21 |     def __init__(self):
22 |         """Method that loads dumped cache from previous shutdown stored in json file.
23 |         """
24 |         self.cache = FIFOCache(maxsize=10_000, ttl=0, default='')
25 |         if os.path.exists(CACHE_URL):
26 |             with open(CACHE_URL) as cache_json:
27 |                 old_cache = json.load(cache_json, object_pairs_hook=OrderedDict)
28 |                 self.cache.set_many(old_cache)
29 |         atexit.register(self._cleanup)
30 |         signal.signal(signal.SIGTERM, self._cleanup)
31 |         signal.signal(signal.SIGINT, self._cleanup)
32 | 
33 |     def _cleanup(self):
34 |         """Method to dump cache data to json file on script interrupt/shutdown.
35 |         """
36 |         with open(CACHE_URL, 'w+') as cache_json:
37 |             json.dump(self.cache.copy(), cache_json)
38 | 
39 |     def _exists(self, key):
40 |         """Method to check if key exists in cache.
41 | 
42 |         :param key: The `key` to to be checked in cache.
43 |         :return: `True` if `key` exist in cache.
44 |         """
45 |         return key in self.cache
46 | 
47 |     def _set(self, key):
48 |         """Method to add thing_id to the cache.
49 | 
50 |         :param key: The `key` to be added to the cache.
51 |         """
52 |         self.cache.set(key, '')
53 | 


--------------------------------------------------------------------------------
/util/caching/redis_cache.py:
--------------------------------------------------------------------------------
 1 | """Module that allows Redis to be used as cache. Useful when running on Heroku or such platforms without persistent
 2 | file storage.
 3 | """
 4 | 
 5 | from redis import Redis
 6 | 
 7 | from config import CACHE_URL, CACHE_TTL
 8 | from util.caching.caching import CacheAPI
 9 | from util.logger import logger
10 | 
11 | __author__ = 'MePsyDuck'
12 | 
13 | 
14 | class RedisCache(CacheAPI):
15 |     def __init__(self):
16 |         """Create a new Redis instance when a new object for this class is created.
17 |         """
18 |         self.redis = Redis.from_url(CACHE_URL)
19 |         logger.info('Connected to Redis at ' + CACHE_URL)
20 | 
21 |     def _exists(self, key):
22 |         """Method to check if `key` exists in redis cache.
23 | 
24 |         :param key: The `key` to to be checked in redis cache.
25 |         :return: `True` if `key` exists in redis cache.
26 |         """
27 |         if self.redis.exists(key):
28 |             return True
29 | 
30 |     def _set(self, key):
31 |         """Method to set `key` with `value` in redis.
32 |         Key expires after CACHE_TTL days (`ex` in seconds).
33 | 
34 |         :param key: The `key` (thing_id) to be added to redis cache.
35 |         """
36 |         self.redis.set(name=key, value='', ex=CACHE_TTL * 60 * 60)
37 | 


--------------------------------------------------------------------------------
/util/database/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jonarzz/DotaResponsesRedditBot/5db25560accdfcd85a8a9e3e1d7d0d429baee49e/util/database/__init__.py


--------------------------------------------------------------------------------
/util/database/database.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import random
  3 | import urllib.parse as up
  4 | 
  5 | from pony.orm import db_session, commit
  6 | 
  7 | from config import CACHE_TTL, DB_URL, DB_PROVIDER
  8 | from util.database.models import Responses, Heroes, RedditCache, db
  9 | from util.logger import logger
 10 | 
 11 | __author__ = 'MePsyDuck'
 12 | 
 13 | 
 14 | class DatabaseAPI:
 15 |     def __init__(self):
 16 |         """Method to initialize db connection. Binds PonyORM Database object `db` to configured database.
 17 |         Creates the mapping between db tables and models.
 18 |         """
 19 |         self.db = db
 20 |         if DB_PROVIDER == 'sqlite':
 21 |             self.db.bind(provider='sqlite', filename=DB_URL, create_db=True)
 22 |         elif DB_PROVIDER == 'mysql':
 23 |             up.uses_netloc.append("mysql")
 24 |             url = up.urlparse(DB_URL)
 25 |             self.db.bind(provider='mysql', host=url.hostname, user=url.username, passwd=url.password, db=url.path[1:])
 26 |         elif DB_PROVIDER == 'postgres':
 27 |             up.uses_netloc.append("postgres")
 28 |             url = up.urlparse(DB_URL)
 29 |             self.db.bind(provider='postgres', user=url.username, password=url.password, host=url.hostname,
 30 |                          database=url.path[1:])
 31 |         else:
 32 |             self.db.bind(provider='sqlite', filename='bot.db', create_db=True)
 33 | 
 34 |         self.db.generate_mapping(create_tables=True)
 35 | 
 36 |     # Responses table queries
 37 |     @db_session
 38 |     def get_link_for_response(self, processed_text, hero_id=None):
 39 |         """Method that returns the link for the processed response text and given optional hero_id. If multiple matching
 40 |         entries are found, returns a random result.
 41 | 
 42 |         :param processed_text: The plain processed response text.
 43 |         :param hero_id: The hero's id.
 44 |        :return The link to the response, hero_id, or else None, None if no matching response is found.
 45 |         """
 46 |         if hero_id:
 47 |             responses = Responses.select(lambda r: r.processed_text == processed_text and r.hero_id.id == hero_id)
 48 |         else:
 49 |             responses = Responses.select(lambda r: r.processed_text == processed_text)
 50 | 
 51 |         if len(responses):
 52 |             response = random.choice(list(responses))
 53 |             return response.response_link, response.hero_id.id
 54 |         else:
 55 |             return None, None
 56 | 
 57 |     # RedditCache table queries
 58 |     @db_session
 59 |     def add_thing_to_cache(self, thing_id):
 60 |         """Method that adds current time and Reddit replyable or submission to RedditCache table by their id(fullname).
 61 | 
 62 |         :param thing_id: The fullname of replyable/submission on Reddit
 63 |         """
 64 |         RedditCache(thing_id=thing_id)
 65 | 
 66 |     @db_session
 67 |     def delete_old_thing_ids(self):
 68 |         """Method used to remove things in cache older than a period of time `CACHE_TTL` defined in the config file.
 69 |         """
 70 |         furthest_date = datetime.datetime.utcnow() - datetime.timedelta(days=CACHE_TTL)
 71 | 
 72 |         RedditCache.select(lambda t: t.added_datetime < furthest_date).delete(bulk=True)
 73 | 
 74 |     @db_session
 75 |     def check_if_thing_exists(self, thing_id):
 76 |         """Method that checks if the replyable id given is already present in the RedditCache table
 77 | 
 78 |         :param thing_id: The id of the replyable/submission on Reddit
 79 |         :return: True if the `thing_id` is already present in table, else False
 80 |         """
 81 |         thing = RedditCache.select(lambda t: t.thing_id == thing_id)
 82 |         return thing is not None
 83 | 
 84 |     # Heroes table queries
 85 |     @db_session
 86 |     def add_hero_to_table(self, hero_name, img_path=None, flair_css=None):
 87 |         """Method to add hero to the table. All parameters are strings.
 88 | 
 89 |         :param hero_name: Hero's name
 90 |         :param img_path: path to hero's image
 91 |         :param flair_css: CSS for the flair
 92 |         """
 93 |         Heroes(hero_name=hero_name, img_path=img_path, flair_css=flair_css)
 94 | 
 95 |     @db_session
 96 |     def get_hero_id_by_name(self, hero_name):
 97 |         """Method to get hero's id from table.
 98 | 
 99 |         :param hero_name: Hero's name
100 |         :return: Hero's id
101 |         """
102 |         h = Heroes.get(lambda hero: hero.hero_name.lower() == hero_name)
103 |         return h.id if h is not None else None
104 | 
105 |     @db_session
106 |     def get_hero_name(self, hero_id):
107 |         """Method to get hero's name from table.
108 | 
109 |         :param hero_id: Hero's id
110 |         :return: Hero's name
111 |         """
112 |         h = Heroes[hero_id]
113 |         return h.hero_name if h is not None else None
114 | 
115 |     @db_session
116 |     def get_hero_id_by_flair_css(self, flair_css):
117 |         """Method to get hero_id from the table based on the flair css.
118 | 
119 |         :param flair_css: Hero's css class as in r/DotA2 subreddit
120 |         :return: Hero's id
121 |         """
122 |         if flair_css:
123 |             h = Heroes.get(flair_css=flair_css)
124 |             return h.id if h is not None else None
125 | 
126 |     @db_session
127 |     def get_img_dir_by_id(self, hero_id):
128 |         """Method to get image directory for hero's flair.
129 | 
130 |          :param hero_id: Hero's id.
131 |          :return: The directory path to the image.
132 |          """
133 |         h = Heroes[hero_id]
134 |         return h.img_path if h is not None else None
135 | 
136 |     @db_session
137 |     def get_all_hero_names(self):
138 |         """Method to get all heroes' names.
139 | 
140 |         :return: All heroes' names as a list.
141 |         """
142 |         heroes = Heroes.select()[:]
143 |         return [hero.hero_name for hero in heroes]
144 | 
145 |     @db_session
146 |     def update_hero(self, hero_name, img_path, flair_css):
147 |         """Method to update hero's attributes in the Heroes table.
148 | 
149 |         :param hero_name: Hero's name
150 |         :param img_path: Hero's img dir/path
151 |         :param flair_css: Hero's css class
152 |         """
153 |         hero = Heroes.get(hero_name=hero_name)
154 |         hero.img_path = img_path
155 |         hero.flair_css = flair_css
156 | 
157 |     def create_all_tables(self):
158 |         """Method to create all tables defined in the models
159 |         """
160 |         self.db.create_tables()
161 | 
162 |     def drop_all_tables(self):
163 |         """Method to drop all tables defined in the models
164 |         """
165 |         self.db.drop_all_tables(with_all_data=True)
166 | 
167 |     @db_session
168 |     def add_hero_and_responses(self, hero_name, response_link_list):
169 |         """Method to add hero and it's responses to the db.
170 | 
171 |         :param hero_name: Hero name who's responses will be inserted
172 |         :param response_link_list: List with tuples in the form of (original_text, text, link)
173 |         """
174 |         h = Heroes(hero_name=hero_name, img_path=None, flair_css=None)
175 |         commit()
176 | 
177 |         for original_text, processed_text, link in response_link_list:
178 |             existing_response = Responses.get(response_link=link)
179 |             if not existing_response:
180 |                 Responses(processed_text=processed_text, original_text=original_text, response_link=link, hero_id=h.id)
181 |             else:
182 |                 logger.debug('Link already exists : ' + link + ' for response ' + existing_response.original_text)
183 | 
184 | 
185 | db_api = DatabaseAPI()
186 | 


--------------------------------------------------------------------------------
/util/database/models.py:
--------------------------------------------------------------------------------
 1 | """Module that defines all the models used by PonyORM for db queries.
 2 | """
 3 | 
 4 | from datetime import datetime
 5 | 
 6 | from pony.orm import Database, PrimaryKey, Required, Optional, Set
 7 | 
 8 | __author__ = 'MePsyDuck'
 9 | 
10 | db = Database()
11 | 
12 | 
13 | class Responses(db.Entity):
14 |     id = PrimaryKey(int, auto=True)  # Default db id column for pk
15 |     processed_text = Required(str, 1000, index='idx_parsed_text')  # Stores the processed response text
16 |     original_text = Required(str, 1000)  # Stores the original response text/ Unused currently, but may help in future.
17 |     response_link = Required(str, unique=True)  # Link to the response text
18 |     hero_id = Required('Heroes')  # The hero_id for hero whose response text this is
19 | 
20 | 
21 | class RedditCache(db.Entity):
22 |     id = PrimaryKey(int, auto=True)  # Default db id column for pk
23 |     thing_id = Required(str, unique=True)  # Comment or submission id that is already processed
24 |     added_datetime = Optional(datetime, default=datetime.utcnow())  # Datetime of processing the replyable
25 | 
26 | 
27 | class Heroes(db.Entity):
28 |     id = PrimaryKey(int, auto=True)  # Default db id column for pk
29 |     hero_name = Required(str, unique=True)  # Hero's / Announcer pack's name
30 |     img_path = Optional(str, nullable=True)  # Path to hero's flair image in reddit css
31 |     flair_css = Optional(str, nullable=True)  # Class for hero in reddit css
32 |     responses = Set(Responses)  # Relationship between Responses and Heroes table
33 | 


--------------------------------------------------------------------------------
/util/logger.py:
--------------------------------------------------------------------------------
 1 | """Module to setup logging for bot and praw and provide logger for other modules.
 2 | """
 3 | 
 4 | import logging
 5 | import os
 6 | 
 7 | from config import BOT_LOGGER, PRAW_LOGGER, LOG_DIR, LOG_FORMAT, LOG_LEVEL, INFO_FILENAME, ERROR_FILENAME, \
 8 |     PRAW_FILENAME
 9 | 
10 | __author__ = 'MePsyDuck'
11 | 
12 | logger = logging.getLogger(BOT_LOGGER)
13 | 
14 | 
15 | def setup_logger():
16 |     """Method to setup loggers. Current logs only bot application logs and PRAW logs.
17 | 
18 |     Disable file logging is running on Heroku since Heroku does not offer persistent disk storage. All logs should be
19 |     read from Stream output instead.
20 |     """
21 |     if not os.path.exists(LOG_DIR):
22 |         os.mkdir(LOG_DIR)
23 | 
24 |     log_formatter = logging.Formatter(LOG_FORMAT)
25 |     log_level = logging.getLevelName(LOG_LEVEL)
26 | 
27 |     # Handlers
28 |     info_log_file = os.path.join(LOG_DIR, INFO_FILENAME)
29 |     info_file_handler = logging.FileHandler(info_log_file, mode='a')
30 |     info_file_handler.setFormatter(log_formatter)
31 |     info_file_handler.setLevel(logging.INFO)
32 | 
33 |     error_log_file = os.path.join(LOG_DIR, ERROR_FILENAME)
34 |     error_file_handler = logging.FileHandler(error_log_file, mode='a')
35 |     error_file_handler.setFormatter(log_formatter)
36 |     error_file_handler.setLevel(logging.ERROR)
37 | 
38 |     praw_log_file = os.path.join(LOG_DIR, PRAW_FILENAME)
39 |     praw_handler = logging.FileHandler(praw_log_file, mode='a')
40 |     praw_handler.setLevel(logging.WARNING)
41 | 
42 |     stream_handler = logging.StreamHandler()
43 |     stream_handler.setFormatter(log_formatter)
44 |     stream_handler.setLevel(logging.DEBUG)
45 | 
46 |     # PRAW logging
47 |     praw_logger = logging.getLogger(PRAW_LOGGER)
48 |     praw_logger.setLevel(log_level)
49 |     praw_logger.addHandler(stream_handler)
50 |     praw_logger.addHandler(praw_handler)
51 | 
52 |     # Internal logging
53 |     bot_logger = logging.getLogger(BOT_LOGGER)
54 |     bot_logger.setLevel(log_level)
55 |     bot_logger.addHandler(info_file_handler)  # This should be commented out if running on Heroku
56 |     bot_logger.addHandler(error_file_handler)  # This should be commented out if running on Heroku
57 |     bot_logger.addHandler(stream_handler)
58 | 


--------------------------------------------------------------------------------
/util/response_info.py:
--------------------------------------------------------------------------------
1 | class ResponseInfo:
2 |     """Custom Class to store response info for passing in between functions
3 |     """
4 |     def __init__(self, hero_id, link):
5 |         self.hero_id = hero_id
6 |         self.link = link
7 | 


--------------------------------------------------------------------------------
/util/response_request.py:
--------------------------------------------------------------------------------
 1 | from util.str_utils import preprocess_text
 2 | import requests
 3 | 
 4 | 
 5 | def request_cargo_set(url):
 6 |     web_request = requests.get(url)
 7 |     web_json = web_request.json()
 8 |     cargo_set = set()
 9 |     for objects in web_json['cargoquery']:
10 |         cargo_set.add(preprocess_text(objects['title']['title']))
11 |     return cargo_set
12 | 


--------------------------------------------------------------------------------
/util/str_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import string
 3 | 
 4 | PUNCTUATION_TRANS = str.maketrans(string.punctuation, ' ' * len(string.punctuation))
 5 | WHITESPACE_TRANS = str.maketrans(string.whitespace, ' ' * len(string.whitespace))
 6 | 
 7 | 
 8 | def preprocess_text(text):
 9 |     """Method for pre-processing the given response text.
10 |     It:
11 |     * replaces all punctuations with spaces
12 |     * replaces all whitespace characters (tab, newline etc) with spaces
13 |     * removes trailing and leading spaces
14 |     * removes double spaces
15 |     * changes to lowercase
16 | 
17 |     :param text: the text to be cleaned
18 |     :return: cleaned text
19 |     """
20 | 
21 |     text = text.translate(PUNCTUATION_TRANS)
22 |     text = text.translate(WHITESPACE_TRANS)
23 |     text = text.strip().lower()
24 |     text = re.sub(' +', ' ', text)
25 |     return text
26 | 


--------------------------------------------------------------------------------