├── .gitignore ├── README.md ├── base ├── install │ └── requirements.txt ├── redditbot │ ├── __init__.py │ └── base │ │ ├── __init__.py │ │ ├── handlers.py │ │ └── utils.py └── setup.py └── bots ├── install └── requirements.txt ├── redditbot ├── __init__.py └── bots │ ├── __init__.py │ ├── emote_counter │ ├── __init__.py │ ├── bot.py │ └── runbot.py │ ├── settings │ ├── __init__.py │ ├── base.py │ ├── dev_mac.py │ ├── dev_win.py │ └── prod.py │ └── xkcdref │ ├── __init__.py │ ├── bot.py │ ├── datastore.py │ ├── runbot.py │ └── xkcdfetcher.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # My stuff 2 | .DS_Store 3 | .idea/ 4 | local_settings.py 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # Installer logs 31 | pip-log.txt 32 | pip-delete-this-directory.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | redditbot 2 | ========= 3 | 4 | This is a reddit bot framework (v2!) 5 | 6 | Built-in Templates: 7 | - Message-triggered bots (PMs, comment replies, post replies, username mentions) 8 | - Comment-triggered bots 9 | - Submission-triggered bots 10 | - Vote-triggered bots 11 | 12 | Included working bots: 13 | - Emote counter (triggered through PMs) 14 | - XKCD transcriber (trigged via submission and comments, monitors messages to add to ignore list, vote monitoring) 15 | 16 | Requires: 17 | python 2.7, praw, simplejson, snudown, a bunch of other stuff (see requirements.txt) 18 | 19 | 20 | --- 21 | 22 | How-to: 23 | 24 | 1. Subclass UserCommentsVoteTriggeredBot, MailTriggeredBot, SubredditCommentTriggeredBot, SubredditSubmissionTriggeredBot or BotHandler if you need a new type. 25 | 26 | 2. Implement the \_check() and \_do() functions at a minimum. If you are subclassing a template, be sure to call the super() function for each. If not, implement \_get_content() as well. 27 | 28 | 3. In v1, the built-in bot templates auto checked to ensure it doesn't reply twice. This functionality has been moved into the `utils` module, and needs to be called yourself. 29 | 30 | 4. `MultiBotHandler` can be used to run multiple bots at the same time in a single python process. 31 | 32 | 5. It is important that 33 | 34 | ``` 35 | # Must be first for monkey_patch() 36 | from redditbot.base import patch_all 37 | patch_all() 38 | ``` 39 | 40 | are the first lines that execute when the python process is started. These lines ensure that gevent's monkey patches are made correctly. 41 | 42 | --- 43 | 44 | Notes: 45 | 46 | 1. The example bots can be run with `python runbot.py` 47 | 48 | 2. Accounts and settings are configured in settings.py 49 | 50 | 3. local_settings.py can be used to configure dev settings and have git ignore it. 51 | 52 | 4. Oauth2 support is included (script type apps only). 53 | -------------------------------------------------------------------------------- /base/install/requirements.txt: -------------------------------------------------------------------------------- 1 | gevent>=1.0.2 2 | praw>=3.4.0 3 | pylru>=1.0.6 4 | requests>=2.5.1 -------------------------------------------------------------------------------- /base/redditbot/__init__.py: -------------------------------------------------------------------------------- 1 | __import__("pkg_resources").declare_namespace(__name__) -------------------------------------------------------------------------------- /base/redditbot/base/__init__.py: -------------------------------------------------------------------------------- 1 | def patch_all(): 2 | from gevent import monkey 3 | monkey.patch_all() 4 | 5 | import logging 6 | logger = logging.getLogger(__name__) 7 | logger.setLevel(logging.INFO) 8 | logger.info('Monkey patching...') 9 | -------------------------------------------------------------------------------- /base/redditbot/base/handlers.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | 4 | import gevent 5 | import praw 6 | import pylru 7 | import requests 8 | import requests.auth 9 | 10 | logger = logging.getLogger(__name__) 11 | logger.setLevel(logging.INFO) 12 | 13 | OAUTH_ACCESS_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token' 14 | 15 | 16 | class MultiBotHandler(object): 17 | def __init__(self, handlers): 18 | self.handlers = handlers 19 | 20 | def run(self): 21 | greenlets = [] 22 | for handler in self.handlers: 23 | greenlets.append(gevent.spawn(handler.run)) 24 | gevent.joinall(greenlets) 25 | 26 | 27 | class BotHandler(object): 28 | def __init__(self, user_agent, auth, delay, fetch_limit, cache_size=0, dry_run=False): 29 | self.user_agent = user_agent 30 | self.auth = auth 31 | self.delay = delay 32 | self.fetch_limit = fetch_limit 33 | self.cache_size = cache_size 34 | self.dry_run = dry_run 35 | self.cache = pylru.lrucache(self.cache_size) if self.cache_size > 0 else None 36 | self.api_request_delay = 1.0 if self.__is_oauth() else 2.0 37 | self.r = praw.Reddit(self.user_agent, cache_timeout=0, api_request_delay=self.api_request_delay) 38 | self.expires = -1 39 | self.__auth() 40 | 41 | def _get_content(self): 42 | raise NotImplementedError() 43 | 44 | def _check(self, obj): 45 | raise NotImplementedError() 46 | 47 | def _do(self, obj): 48 | raise NotImplementedError() 49 | 50 | def __is_oauth(self): 51 | return 'client_id' in self.auth and 'secret' in self.auth 52 | 53 | def __update_access_credentials(self): 54 | # Fetch access token 55 | client_auth = requests.auth.HTTPBasicAuth(self.auth['client_id'], self.auth['secret']) 56 | response = requests.post(OAUTH_ACCESS_TOKEN_URL, auth=client_auth, data={ 57 | 'grant_type': 'password', 58 | 'username': self.auth['username'], 59 | 'password': self.auth['password'] 60 | }, headers={ 61 | 'User-Agent': self.user_agent 62 | }) 63 | 64 | # Check response 65 | if response.ok: 66 | response = response.json() 67 | else: 68 | logger.error('Could not retrieve access creds: Status {status}'.format(status=response.status_code)) 69 | return 70 | 71 | # Update 72 | if 'error' in response: 73 | logger.error('Could not retrieve access creds: Json error: {status}'.format(status=response['error'])) 74 | else: 75 | print 'setting access creds for oauth' 76 | self.r.set_access_credentials(scope='*', access_token=response['access_token']) 77 | self.expires = time.time() + int(response['expires_in']) * 0.9 78 | 79 | def __auth(self): 80 | if 'username' not in self.auth or 'password' not in self.auth: 81 | raise Exception("Must provide username and password in auth") 82 | 83 | if self.__is_oauth(): 84 | self.r.set_oauth_app_info(client_id='a', client_secret='a', redirect_uri='a') 85 | self.__update_access_credentials() 86 | else: 87 | self.r.login(self.auth['username'], self.auth['password']) 88 | 89 | def __main(self): 90 | # Check if we need to update access token 91 | if time.time() > self.expires > 0: 92 | self.__update_access_credentials() 93 | 94 | # Get the content 95 | content = self._get_content() 96 | if not content: 97 | logger.warn('Bad content object: skipping...') 98 | return 99 | 100 | hits = 0 101 | misses = 0 102 | 103 | # Process all content 104 | for obj in content: 105 | # Check if it's in the cache 106 | if self.cache is not None: 107 | if obj.id in self.cache: 108 | hits += 1 109 | continue 110 | misses += 1 111 | self.cache[obj.id] = 0 112 | 113 | # Process the object, sandbox exceptions 114 | try: 115 | if not self._check(obj): 116 | continue 117 | logger.info('Found valid object: {id} by {name}.'.format(id=obj.id, 118 | name=obj.author.name if obj.author else '[deleted]')) 119 | if not self._do(obj): 120 | logger.info('Failed to process object {id}.'.format(id=obj.id)) 121 | except Exception as e: 122 | logger.exception('Exception while processing object {id}'.format(id=obj.id)) 123 | 124 | if self.cache is not None: 125 | logger.info('Cache hits/misses/total: {hits} / {misses} / {total}'.format(hits=hits, misses=misses, 126 | total=hits + misses)) 127 | 128 | def run(self): 129 | logger.info('Bot started!') 130 | 131 | while True: 132 | start_time = time.time() 133 | 134 | try: 135 | self.__main() 136 | except Exception as e: 137 | logger.exception('Exception while processing content generator') 138 | 139 | # Sleep at least self.delay per cycle 140 | time_delta = time.time() - start_time 141 | sleep_time = self.delay - time_delta 142 | logger.info('Processing/Sleeping for: {p:.2f}s / {s:.2f}s'.format(p=time_delta, s=max(0, sleep_time))) 143 | logger.info('Finished processing round for {name}'.format(name=self.user_agent)) 144 | if sleep_time > 0: 145 | time.sleep(sleep_time) 146 | 147 | logger.info('Bot finished! Exiting gracefully.') 148 | 149 | 150 | class UserCommentsVoteTriggeredBot(BotHandler): 151 | def __init__(self, *args, **kwargs): 152 | self.monitored_user = kwargs.pop('monitored_user') 153 | self.score_threshold_max = kwargs.pop('score_threshold_max', None) 154 | self.score_threshold_min = kwargs.pop('score_threshold_min', None) 155 | if self.score_threshold_max is None and self.score_threshold_min is None: 156 | raise Exception("score_threshold_max or score_threshold_min should be set") 157 | 158 | super(UserCommentsVoteTriggeredBot, self).__init__(*args, **kwargs) 159 | 160 | def _get_content(self): 161 | return self.r.get_redditor(self.monitored_user).get_comments(limit=self.fetch_limit) 162 | 163 | def _check(self, comment): 164 | # Check vote score min 165 | if self.score_threshold_min is not None and comment.score < self.score_threshold_min: 166 | return True 167 | 168 | # Check vote score max 169 | if self.score_threshold_max is not None and comment.score > self.score_threshold_max: 170 | return True 171 | 172 | return False 173 | 174 | 175 | class MailTriggeredBot(BotHandler): 176 | def __init__(self, *args, **kwargs): 177 | super(MailTriggeredBot, self).__init__(*args, **kwargs) 178 | 179 | def _get_content(self): 180 | return self.r.get_unread(limit=self.fetch_limit) 181 | 182 | def is_private_message(self, message): 183 | return not message.was_comment 184 | 185 | def is_comment_reply(self, message): 186 | return message.was_comment and message.subject == 'comment reply' 187 | 188 | def is_post_reply(self, message): 189 | return message.was_comment and message.subject == 'post reply' 190 | 191 | def is_username_mention(self, message): 192 | return message.was_comment and message.subject == 'username mention' 193 | 194 | 195 | class SubredditCommentTriggeredBot(BotHandler): 196 | def __init__(self, *args, **kwargs): 197 | self.subreddit = kwargs.pop('subreddit') 198 | super(SubredditCommentTriggeredBot, self).__init__(*args, **kwargs) 199 | 200 | def _get_content(self): 201 | return self.r.get_comments(self.subreddit, limit=self.fetch_limit) 202 | 203 | 204 | class SubredditSubmissionTriggeredBot(BotHandler): 205 | def __init__(self, *args, **kwargs): 206 | self.subreddit = kwargs.pop('subreddit') 207 | super(SubredditSubmissionTriggeredBot, self).__init__(*args, **kwargs) 208 | 209 | def _get_content(self): 210 | return self.r.get_subreddit(self.subreddit).get_new(limit=self.fetch_limit) 211 | -------------------------------------------------------------------------------- /base/redditbot/base/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import praw 4 | import requests 5 | 6 | logger = logging.getLogger(__name__) 7 | logger.setLevel(logging.INFO) 8 | 9 | 10 | def has_replied(praw_object, username): 11 | """ 12 | Returns True if the specified user has a comment in the top level replies of the given submission/comment/message, 13 | and False otherwise. 14 | For comments, submissions, messages ONLY. 15 | """ 16 | if type(praw_object) == praw.objects.Message: 17 | # TODO: Fix this to actually check properly 18 | # If it's not the first message in the PM thread, we replied previously. 19 | # This is not the best method, and it is a bit flakey, 20 | # but good enough for most cases 21 | if praw_object.first_message is not None: 22 | return True 23 | return False 24 | elif type(praw_object) == praw.objects.Submission: 25 | praw_object.replace_more_comments(limit=None) 26 | replies = praw_object.comments 27 | elif type(praw_object) == praw.objects.Comment: 28 | replies = praw_object.replies 29 | else: 30 | raise Exception("Object must be an instance of praw.objects.Comment/Submission/Message") 31 | 32 | if not replies: 33 | return False 34 | 35 | # Check each reply if the username matches 36 | username = username.lower() 37 | for reply in replies: 38 | if reply.author and reply.author.name.lower() == username: 39 | return True 40 | 41 | return False 42 | 43 | 44 | def is_comment_owner(praw_comment, username): 45 | """ 46 | Returns True if the specified comment belongs to the user, 47 | otherwise False. 48 | """ 49 | return praw_comment.author and praw_comment.author.name.lower() == username.lower() 50 | 51 | 52 | def send_reply(praw_object, reply_msg): 53 | """ 54 | Returns the reply object if the message was sent successfully, otherwise None. 55 | For comments, submissions, messages ONLY. 56 | """ 57 | try: 58 | if type(praw_object) == praw.objects.Submission: 59 | reply_obj = praw_object.add_comment(reply_msg) 60 | else: 61 | reply_obj = praw_object.reply(reply_msg) 62 | except requests.HTTPError as e: 63 | if e.response.status_code == 403: 64 | logger.error('Could not post reply: Forbidden') 65 | return None 66 | else: 67 | raise 68 | except Exception as e: 69 | logger.exception('Exception while replying') 70 | return None 71 | 72 | logger.info(' => Reply Sent!') 73 | return reply_obj 74 | 75 | 76 | def edit_reply(praw_comment, reply_msg): 77 | """ 78 | Returns True if the comment was edited successfully, and False otherwise. 79 | For comments ONLY. 80 | """ 81 | try: 82 | praw_comment.edit(reply_msg) 83 | except Exception as e: 84 | logger.exception('Exception while editing') 85 | return False 86 | 87 | logger.info(' => Edit was made!') 88 | return True 89 | 90 | 91 | def has_chain(praw_r, praw_comment, username): 92 | """ 93 | Returns True if the parent was made by username. 94 | Returns False otherwise. 95 | """ 96 | if not hasattr(praw_comment, 'parent_id'): 97 | return False 98 | parent = praw_r.get_info(thing_id=praw_comment.parent_id) 99 | if not parent or type(parent) != praw.objects.Comment: 100 | return False 101 | return is_comment_owner(parent, username) 102 | -------------------------------------------------------------------------------- /base/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name='redditbot.base', 5 | version='1.0.0', 6 | author='Jeremy Simpson', 7 | description='Redditbot framework', 8 | license='MIT', 9 | classifiers=[ 10 | 'Development Status :: 5 - Production/Stable', 11 | 'Intended Audience :: Developers', 12 | 'License :: OSI Approved :: MIT License', 13 | 'Programming Language :: Python :: 2.7', 14 | ], 15 | packages=setuptools.find_packages(), 16 | namespace_packages=['redditbot'], 17 | ) 18 | -------------------------------------------------------------------------------- /bots/install/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4>=4.3.2 2 | simplejson>=3.6.5 3 | git+https://github.com/reddit/snudown#egg=snudown -------------------------------------------------------------------------------- /bots/redditbot/__init__.py: -------------------------------------------------------------------------------- 1 | __import__("pkg_resources").declare_namespace(__name__) -------------------------------------------------------------------------------- /bots/redditbot/bots/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/__init__.py -------------------------------------------------------------------------------- /bots/redditbot/bots/emote_counter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/emote_counter/__init__.py -------------------------------------------------------------------------------- /bots/redditbot/bots/emote_counter/bot.py: -------------------------------------------------------------------------------- 1 | from redditbot.base.handlers import MailTriggeredBot 2 | from redditbot.base import utils 3 | 4 | 5 | class TopEmotesBot(MailTriggeredBot): 6 | def __init__(self, *args, **kwargs): 7 | super(TopEmotesBot, self).__init__(*args, **kwargs) 8 | 9 | def _check(self, mail): 10 | return self.is_private_message(mail) 11 | 12 | def _do(self, mail): 13 | reply_msg = '[](/sbstalkthread)This bot has been decommissioned.\n\n' \ 14 | 'It has migrated over to [lunarmist.net](http://lunarmist.net/emotes/), ' \ 15 | 'and has been enhanced with graphs and better comment coverage. Check it out!' 16 | 17 | # Reply to the user and mark it as read 18 | if utils.send_reply(mail, reply_msg): 19 | mail.mark_as_read() 20 | return True 21 | else: 22 | return False 23 | -------------------------------------------------------------------------------- /bots/redditbot/bots/emote_counter/runbot.py: -------------------------------------------------------------------------------- 1 | # Must be first for monkey_patch() 2 | from redditbot.base import patch_all 3 | patch_all() 4 | 5 | import logging 6 | 7 | from bot import TopEmotesBot 8 | from redditbot.bots import settings 9 | 10 | logging.basicConfig() 11 | 12 | 13 | def run(): 14 | TopEmotesBot(user_agent='Emote counter by %s' % settings.AUTHOR, 15 | auth=settings.REDDIT_ACCOUNTS['counts_your_emotes'], 16 | delay=60, 17 | fetch_limit=None, 18 | cache_size=100).run() 19 | 20 | 21 | if __name__ == '__main__': 22 | run() 23 | -------------------------------------------------------------------------------- /bots/redditbot/bots/settings/__init__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from base import * 5 | 6 | logger = logging.getLogger(__name__) 7 | logger.setLevel(logging.INFO) 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--settings', help='host', default='prod') 11 | 12 | args, _ = parser.parse_known_args() 13 | 14 | if args.settings == 'dev-mac': 15 | logger.info('Loading dev-mac settings') 16 | from dev_mac import * 17 | elif args.settings == 'dev-win': 18 | logger.info('Loading dev-win settings') 19 | from dev_win import * 20 | elif args.settings == 'prod': 21 | logger.info('Loading prod settings') 22 | from prod import * 23 | -------------------------------------------------------------------------------- /bots/redditbot/bots/settings/base.py: -------------------------------------------------------------------------------- 1 | REDDIT_ACCOUNTS = { 2 | 'counts_your_emotes': { 3 | 'username': '', 4 | 'password': '', 5 | }, 6 | 'xkcd_transcriber': { 7 | 'username': '', 8 | 'password': '', 9 | 'client_id': '', 10 | 'secret': '' 11 | } 12 | } 13 | 14 | AUTHOR = '/u/name_here' 15 | 16 | XKCD_DB_LOCATION = '/path/to/db' 17 | 18 | DRY_RUN = False 19 | -------------------------------------------------------------------------------- /bots/redditbot/bots/settings/dev_mac.py: -------------------------------------------------------------------------------- 1 | XKCD_DB_LOCATION = '' 2 | DRY_RUN = True 3 | -------------------------------------------------------------------------------- /bots/redditbot/bots/settings/dev_win.py: -------------------------------------------------------------------------------- 1 | XKCD_DB_LOCATION = '' 2 | DRY_RUN = True 3 | -------------------------------------------------------------------------------- /bots/redditbot/bots/settings/prod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/settings/prod.py -------------------------------------------------------------------------------- /bots/redditbot/bots/xkcdref/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/xkcdref/__init__.py -------------------------------------------------------------------------------- /bots/redditbot/bots/xkcdref/bot.py: -------------------------------------------------------------------------------- 1 | import re 2 | import time 3 | import logging 4 | import random 5 | 6 | import praw 7 | import snudown 8 | from bs4 import BeautifulSoup 9 | 10 | from redditbot.base import utils 11 | from redditbot.base.handlers import MailTriggeredBot, UserCommentsVoteTriggeredBot, SubredditCommentTriggeredBot, \ 12 | SubredditSubmissionTriggeredBot 13 | 14 | logger = logging.getLogger(__name__) 15 | logger.setLevel(logging.INFO) 16 | 17 | # Emote stuff 18 | FULL_EMOTE_REGEX = re.compile( 19 | """\[[^\]]*\]\s*\(\s*/(?P[^\s/]+?)(?P-\S+)?\s*(?P["'].*?["'])?\s*\)""") 20 | PONY_SUBS = ["mylittlepony", "mlplounge", "ploungeafterdark", "mylittlefriends", "mylittleandysonic1"] 21 | PONY_SECRETS = [ 22 | u'[](/adorkable "%s")', 23 | u'[](/twibook "%s")', 24 | u'[](/twicookiejar "%s")', 25 | u'[](/twicookie "%s")', 26 | u'[](/pretzeltwi "%s")', 27 | u'[](/twitongue "%s")', 28 | ] 29 | SB_SECRETS = [ 30 | '[](/sbstalkthread)', 31 | '[](/2d)', 32 | '[](/sbload)', 33 | '[](/sbtarget)', 34 | '[](/sweetiecardbot)', 35 | ] 36 | WISDOM_MESSAGES = [ 37 | "Beep boop.", 38 | "0100100100100000011011000110100101101011011001010010000001110000011011110110111001111001", 39 | "meow meow meow meow meow", 40 | "Did you know that a group of cats is called a clowder?", 41 | "Did you know that cats have over 20 muscles that control their ears?", 42 | "Did you know that cats sleep 70% of their lives?", 43 | "Did you know that owning a cat can reduce the risk of stroke and heart attack by a third?", 44 | "Did you know that adult cats only meow to communicate with humans? meow.", 45 | "Did you know that cats make more than 100 different sounds whereas dogs make around 10?", 46 | "Did you know that cats have 1,000 times more data storage than an iPad?", 47 | "Did you know that a house cat is faster than Usain Bolt?", 48 | "Did you know that cats only sweat through their foot pads?", 49 | "Did you know that cats have scent glands along their tail, their forehead, lips, chin, and the underside of their front paws?", 50 | "Did you know that cat owners are 17% more likely to have a graduate degree?", 51 | "Did you know that when your cat brings home a dead mouse or bird, it may do so to show you that you suck at hunting?", 52 | ] 53 | 54 | 55 | REDDIT_PM_IGNORE = "https://reddit.com/message/compose/?to=xkcd_transcriber&subject=ignore%20me&message=ignore%20me" 56 | REDDIT_PM_DELETE = "https://reddit.com/message/compose/?to=xkcd_transcriber&subject=delete&message=delete%20{thing_id}" 57 | NO_BREAK_SPACE = u'\u00A0' 58 | MAX_MESSAGE_LENGTH = 10000 59 | 60 | XKCD_SIG_LINKS = [ 61 | u'[xkcd.com](https://www.xkcd.com)', 62 | u'[xkcd%ssub](https://www.reddit.com/r/xkcd/)' % NO_BREAK_SPACE, 63 | u'[Problems/Bugs?](https://www.reddit.com/r/xkcd_transcriber/)', 64 | u'[Statistics](http://xkcdref.info/statistics/)', 65 | u'[Stop%sReplying](%s)' % (NO_BREAK_SPACE, REDDIT_PM_IGNORE), 66 | u'[Delete](%s)' % REDDIT_PM_DELETE 67 | ] 68 | 69 | MARKDOWN_ESCAPE_CHARACTERS = "\\`*_{}[]()#+-.!:|&<>/^~" 70 | 71 | 72 | class MailXkcdBot(MailTriggeredBot): 73 | def __init__(self, *args, **kwargs): 74 | self.datastore = kwargs.pop('datastore') 75 | self.xkcd_fetcher = kwargs.pop('xkcd_fetcher') 76 | super(MailXkcdBot, self).__init__(*args, **kwargs) 77 | 78 | def _check(self, mail): 79 | if utils.has_replied(mail, self.auth['username']): 80 | return False 81 | if utils.is_comment_owner(mail, self.auth['username']): 82 | return False 83 | return True 84 | 85 | def _do(self, mail): 86 | body_lower = mail.body.lower() 87 | subject_lower = mail.subject.lower() 88 | result = True 89 | 90 | if self.is_private_message(mail): 91 | if body_lower.find('ignore me') != -1 or subject_lower.find('ignore me') != -1: 92 | result = self.process_ignore(mail) 93 | elif body_lower.startswith('delete') or subject_lower.startswith('delete'): 94 | result = self.process_delete(mail) 95 | elif self.is_comment_reply(mail): 96 | result = self.process_comment_reply(mail) 97 | 98 | if result and not self.dry_run: 99 | mail.mark_as_read() 100 | return result 101 | 102 | def process_ignore(self, mail): 103 | # Add to ignore list 104 | if not self.dry_run: 105 | self.datastore.add_ignore(mail.author.name.lower()) 106 | 107 | # Reply to the user 108 | reply_msg = "You have been added to the ignore list. If this bot continues to respond, PM /u/LunarMist2." 109 | 110 | # Do not send if we are doing a dry run 111 | if self.dry_run: 112 | return True 113 | 114 | #if utils.send_reply(mail, reply_msg): 115 | # return True 116 | #return False 117 | return True 118 | 119 | def process_delete(self, mail): 120 | # Ensure the mail author is the same as the original referencer 121 | parts = mail.body.split(' ') 122 | if len(parts) == 2: 123 | thing_id = parts[1] 124 | obj = self.r.get_info(thing_id=thing_id) 125 | if obj: 126 | parent = self.r.get_info(thing_id=obj.parent_id) 127 | if parent and parent.author and parent.author.name == mail.author.name: 128 | if not self.dry_run: 129 | obj.delete() 130 | logger.info(' => Comment Deleted!') 131 | 132 | return True 133 | 134 | def process_comment_reply(self, mail): 135 | body_lower = mail.body.lower() 136 | 137 | # Check for joke replies 138 | if body_lower.find('thank you') != -1 or body_lower.find('thanks') != -1: 139 | reply_msg = random.choice(SB_SECRETS) + "My pleasure" 140 | elif body_lower.find('i love you') != -1: 141 | reply_msg = random.choice(SB_SECRETS) + "Love ya too~" 142 | elif body_lower == 'k': 143 | reply_msg = "[](/o_o)K" 144 | elif body_lower == ")": 145 | reply_msg = "(" 146 | else: 147 | return True 148 | 149 | # Do not reply if the user is ignored 150 | if mail.author and mail.author.name.lower() in self.datastore.get_ignores(): 151 | logger.info('Skipping mail {id}. Reason: Author on ignore list.'.format(id=mail.id)) 152 | return True 153 | 154 | # Check it was originally a reply to a transcript 155 | if not self._is_transcript_reply(self.r, mail, self.auth['username']): 156 | logger.info('Skipping to post joke reply to {id}. Reason: Not a reply to a transcript'.format(id=mail.id)) 157 | return True 158 | 159 | # Do not send if we are doing a dry run 160 | if self.dry_run: 161 | return True 162 | 163 | # Reply to the user 164 | if utils.send_reply(mail, reply_msg): 165 | return True 166 | return False 167 | 168 | def _is_transcript_reply(self, praw_r, praw_comment, username): 169 | if not hasattr(praw_comment, 'parent_id'): 170 | return False 171 | 172 | parent = praw_r.get_info(thing_id=praw_comment.parent_id) 173 | if not parent or type(parent) != praw.objects.Comment: 174 | return False 175 | return len(parent.body) > 50 and utils.is_comment_owner(parent, username) 176 | 177 | 178 | class VoteXkcdBot(UserCommentsVoteTriggeredBot): 179 | def _do(self, comment): 180 | logger.info('Comment {id} below score threshold: {score}. Removing'.format(id=comment.id, score=comment.score)) 181 | if not self.dry_run: 182 | comment.delete() 183 | return True 184 | 185 | 186 | class CommentXkcdBot(SubredditCommentTriggeredBot): 187 | def __init__(self, *args, **kwargs): 188 | self.datastore = kwargs.pop('datastore') 189 | self.xkcd_fetcher = kwargs.pop('xkcd_fetcher') 190 | super(CommentXkcdBot, self).__init__(*args, **kwargs) 191 | 192 | def _check(self, comment): 193 | if comment.body.lower().find('xkcd.com') == -1: 194 | return False 195 | if comment.subreddit.display_name.lower().find('xkcd') != -1: 196 | return False 197 | if comment.subreddit.display_name.lower() == 'jerktalkdiamond': 198 | return False 199 | if utils.is_comment_owner(comment, self.auth['username']): 200 | return False 201 | if utils.has_replied(comment, self.auth['username']): 202 | return False 203 | return not utils.has_chain(self.r, comment, self.auth['username']) 204 | 205 | def _do(self, comment): 206 | html = snudown.markdown(comment.body.encode('UTF-8')) 207 | soup = BeautifulSoup(html) 208 | refs = {} 209 | 210 | # Iterate through all links, get xkcd json 211 | for link in soup.find_all('a'): 212 | href = link.get('href') 213 | if not href: 214 | continue 215 | j = self.xkcd_fetcher.get_json(href) 216 | if not j: 217 | logger.warn('Data could not be fetched for {url}'.format(url=href)) 218 | continue 219 | refs[int(j.get('num', -1))] = { 220 | 'data': j, 221 | 'href': href 222 | } 223 | 224 | return self.process_references(comment, refs) 225 | 226 | def process_references(self, comment, refs): 227 | if not refs: 228 | return True 229 | 230 | # Record in db the references 231 | for comic_id, ref in refs.iteritems(): 232 | if comic_id > 0 and not self.dry_run: 233 | timestamp = int(time.time()) 234 | author = comment.author.name if comment.author else '[deleted]' 235 | sub = comment.subreddit.display_name 236 | link = comment.permalink 237 | self.datastore.insert_xkcd_event(comic_id, timestamp, sub, author, link, 238 | ref['data'].get('from_external', False)) 239 | 240 | # Do not reply if the user is ignored 241 | if comment.author and comment.author.name.lower() in self.datastore.get_ignores(): 242 | logger.info('Skipping comment {id}. Reason: Author on ignore list.'.format(id=comment.id)) 243 | return True 244 | 245 | return self.send_reply(comment, refs) 246 | 247 | def send_reply(self, comment, refs): 248 | builder = ReferenceBuilder() 249 | reply_msg = builder.build_all(comment, refs, self.xkcd_fetcher, self.datastore, None) 250 | 251 | # Do not send if there's no body 252 | if builder.get_body_length() == 0: 253 | return True 254 | 255 | # Do not send if we are doing a dry run 256 | if self.dry_run: 257 | return True 258 | 259 | # Reply to the user 260 | reply_obj = utils.send_reply(comment, reply_msg) 261 | if reply_obj is None: 262 | return False 263 | 264 | # Edit and fix [delete] signature link 265 | builder.build_signature(reply_obj) 266 | reply_msg = builder.cat() 267 | if not utils.edit_reply(reply_obj, reply_msg): 268 | return False 269 | 270 | return True 271 | 272 | 273 | class SubmissionXkcdBot(SubredditSubmissionTriggeredBot): 274 | def __init__(self, *args, **kwargs): 275 | self.datastore = kwargs.pop('datastore') 276 | self.xkcd_fetcher = kwargs.pop('xkcd_fetcher') 277 | super(SubmissionXkcdBot, self).__init__(*args, **kwargs) 278 | 279 | def _check(self, submission): 280 | if submission.is_self: 281 | if submission.selftext.lower().find('xkcd.com') == -1: 282 | return False 283 | else: 284 | if submission.url.lower().find('xkcd.com') == -1: 285 | return False 286 | if submission.subreddit.display_name.lower().find('xkcd') != -1: 287 | return False 288 | if submission.subreddit.display_name.lower() == 'jerktalkdiamond': 289 | return False 290 | if utils.is_comment_owner(submission, self.auth['username']): 291 | return False 292 | if utils.has_replied(submission, self.auth['username']): 293 | return False 294 | return not utils.has_chain(self.r, submission, self.auth['username']) 295 | 296 | def _do(self, submission): 297 | if submission.is_self: 298 | return self.process_self(submission) 299 | else: 300 | return self.process_link(submission) 301 | 302 | def process_self(self, submission): 303 | html = snudown.markdown(submission.selftext.encode('UTF-8')) 304 | soup = BeautifulSoup(html) 305 | refs = {} 306 | 307 | # Iterate through all links, get xkcd json 308 | for link in soup.find_all('a'): 309 | href = link.get('href') 310 | if not href: 311 | continue 312 | j = self.xkcd_fetcher.get_json(href) 313 | if not j: 314 | logger.warn('Data could not be fetched for {url}'.format(url=href)) 315 | continue 316 | refs[int(j.get('num', -1))] = { 317 | 'data': j, 318 | 'href': href 319 | } 320 | 321 | return self.process_references(submission, refs) 322 | 323 | def process_link(self, submission): 324 | # Only need to process a single url 325 | j = self.xkcd_fetcher.get_json(submission.url) 326 | if not j: 327 | logger.warn('Data could not be fetched for {url}'.format(url=submission.url)) 328 | return True 329 | refs = { 330 | int(j.get('num', -1)): { 331 | 'data': j, 332 | 'href': submission.url 333 | } 334 | } 335 | 336 | return self.process_references(submission, refs) 337 | 338 | def process_references(self, submission, refs): 339 | if not refs: 340 | return True 341 | 342 | # Record in db the references 343 | for comic_id, ref in refs.iteritems(): 344 | if comic_id > 0 and not self.dry_run: 345 | timestamp = int(time.time()) 346 | author = submission.author.name if submission.author else '[deleted]' 347 | sub = submission.subreddit.display_name 348 | link = submission.permalink 349 | self.datastore.insert_xkcd_event(comic_id, timestamp, sub, author, link, 350 | ref['data'].get('from_external', False)) 351 | 352 | # Do not reply if the user is ignored 353 | if submission.author and submission.author.name.lower() in self.datastore.get_ignores(): 354 | logger.info('Skipping submission {id}. Reason: Author on ignore list.'.format(id=submission.id)) 355 | return True 356 | 357 | return self.send_reply(submission, refs) 358 | 359 | def send_reply(self, submission, refs): 360 | # TODO: Re-enable transcripts when the json has been fixed 361 | builder = ReferenceBuilder(include_transcript=False) 362 | reply_msg = builder.build_all(submission, refs, self.xkcd_fetcher, self.datastore, None) 363 | 364 | # Do not send if there's no body 365 | if builder.get_body_length() == 0: 366 | return True 367 | 368 | # Do not send if we are doing a dry run 369 | if self.dry_run: 370 | return True 371 | 372 | # Reply to the user 373 | reply_obj = utils.send_reply(submission, reply_msg) 374 | if reply_obj is None: 375 | return False 376 | 377 | # Edit and fix [delete] signature link 378 | builder.build_signature(reply_obj) 379 | reply_msg = builder.cat() 380 | if not utils.edit_reply(reply_obj, reply_msg): 381 | return False 382 | 383 | return True 384 | 385 | 386 | class ReferenceBuilder(object): 387 | def __init__(self, include_transcript=False): 388 | self.include_transcript = include_transcript 389 | self.reply_msg_head = '' 390 | self.reply_msg_sig = '' 391 | self.reply_msg_body = '' 392 | 393 | def build_head(self, comment): 394 | # Check for secret message 395 | secret_message = '' 396 | # Only check for if there's a block of text to process 397 | text = getattr(comment, 'body', getattr(comment, 'selftext', '')) 398 | matches = re.finditer(FULL_EMOTE_REGEX, text) 399 | if matches: 400 | for match in matches: 401 | d = match.groupdict() 402 | if d['message'] and d['message'].find('xkcd_transcriber') != -1: 403 | secret_message = self._get_secret_reply(comment.author) 404 | break 405 | 406 | # Secret emote 407 | secret_emote = '' 408 | if comment.subreddit.display_name.lower() in PONY_SUBS or secret_message: 409 | secret_emote = random.choice(PONY_SECRETS) % secret_message + ' ' 410 | 411 | self.reply_msg_head = secret_emote 412 | 413 | def _get_secret_reply(self, author): 414 | greetings = "Hello, " + author.name if author else "[deleted]" + "." 415 | wisdom = random.choice(WISDOM_MESSAGES) 416 | return greetings + wisdom 417 | 418 | def build_signature(self, reply_obj): 419 | if reply_obj is None: 420 | self.reply_msg_sig = '---\n' + ' ^| '.join(['^' + a for a in XKCD_SIG_LINKS]) 421 | else: 422 | self.reply_msg_sig = '---\n' + ' ^| '.join(['^' + a for a in XKCD_SIG_LINKS]).format( 423 | thing_id=reply_obj.name) 424 | 425 | def build_body(self, refs, xkcd_fetcher, datastore): 426 | # Reset 427 | self.reply_msg_body = '' 428 | 429 | # Build body text 430 | for comic_id, ref in refs.iteritems(): 431 | data = ref['data'] 432 | if self.reply_msg_body != '': 433 | self.reply_msg_body += u'----\n' 434 | 435 | if ref['href'].find('imgs.xkcd.com') != -1 or data.get('from_external') is True: 436 | self.reply_msg_body += u'[Original Source](https://xkcd.com/{num}/)\n\n'.format(num=comic_id) 437 | elif data.get('img'): 438 | self.reply_msg_body += u'[Image]({image})\n\n'.format(image=self._format_url(data.get('img'))) 439 | if data.get('link'): 440 | self.reply_msg_body += u'[Link]({link})\n\n'.format(link=self._format_url(data.get('link'))) 441 | self.reply_msg_body += u'[Mobile](https://m.xkcd.com/{num}/)\n\n'.format(num=comic_id) 442 | if data.get('title'): 443 | self.reply_msg_body += u'**Title:** {title}\n\n'.format(title=self._format_text(data.get('title', ''))) 444 | if data.get('transcript') and self.include_transcript: 445 | self.reply_msg_body += u'**Transcript:** {transcript}\n\n'.format( 446 | transcript=self._format_text(re.sub('\n{{.+}}', '', data.get('transcript', '')))) 447 | if data.get('alt'): 448 | self.reply_msg_body += u'**Title-text:** {alt}\n\n'.format(alt=self._format_text(data.get('alt', ''))) 449 | if comic_id > 0: 450 | explained = xkcd_fetcher.get_explained_link(comic_id) 451 | self.reply_msg_body += u'[Comic Explanation]({link})\n\n'.format(link=explained) 452 | 453 | stats = datastore.get_stats(comic_id) 454 | if stats: 455 | plural = 's' if stats['count'] != 1 else '' 456 | self.reply_msg_body += u'**Stats:** This comic has been referenced {0} time{1}, representing {2:.4f}% of referenced xkcds.\n\n'.format( 457 | stats['count'], plural, stats['percentage']) 458 | 459 | def build_all(self, comment, refs, xkcd_fetcher, datastore, reply_obj): 460 | self.build_head(comment) 461 | self.build_body(refs, xkcd_fetcher, datastore) 462 | self.build_signature(reply_obj) 463 | return self.cat() 464 | 465 | def cat(self): 466 | return self.reply_msg_head + self.reply_msg_body + self.reply_msg_sig 467 | 468 | def get_body_length(self): 469 | return len(self.reply_msg_body) 470 | 471 | def _format_url(self, url): 472 | return url.replace('(', '\\(').replace(')', '\\)') 473 | 474 | def _format_text(self, text): 475 | if isinstance(text, unicode): 476 | text = text.encode('raw_unicode_escape').decode('utf-8') 477 | lines = text.replace('\n', '\n\n') 478 | lines = self._escape_markdown(lines) 479 | return lines 480 | 481 | def _escape_markdown(self, text): 482 | for c in MARKDOWN_ESCAPE_CHARACTERS: 483 | text = text.replace(str(c), '\\' + str(c)) 484 | return text 485 | -------------------------------------------------------------------------------- /bots/redditbot/bots/xkcdref/datastore.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sqlite3 3 | 4 | import simplejson 5 | 6 | 7 | class SimpleDataStore(object): 8 | def __init__(self, db_path): 9 | self.db_path = db_path 10 | self.conn = None 11 | 12 | def open(self): 13 | if not self.conn: 14 | self.conn = sqlite3.connect(self.db_path) 15 | 16 | def close(self): 17 | if self.conn: 18 | self.conn.close() 19 | self.conn = None 20 | 21 | def execute(self, *args, **kwargs): 22 | self.open() 23 | c = self.conn.cursor() 24 | c.execute(*args, **kwargs) 25 | return c 26 | 27 | def commit(self): 28 | if self.conn: 29 | self.conn.commit() 30 | 31 | 32 | class BotDataStore(object): 33 | def __init__(self, bot_name, database_path): 34 | # Create the path if it does not alreay exist 35 | if not os.path.exists(os.path.dirname(database_path)): 36 | os.makedirs(os.path.dirname(database_path)) 37 | 38 | self.bot_name = bot_name 39 | self.database_path = database_path 40 | self.datastore = SimpleDataStore(self.database_path) 41 | self.create() 42 | 43 | def create(self): 44 | self.datastore.execute(""" 45 | CREATE TABLE IF NOT EXISTS ignored_users ( 46 | bot_name TEXT, 47 | target_name TEXT, 48 | UNIQUE(bot_name, target_name) ON CONFLICT IGNORE 49 | ); 50 | """) 51 | 52 | self.datastore.execute(""" 53 | CREATE TABLE IF NOT EXISTS xkcd_comic_references ( 54 | comic_id INTEGER, 55 | time INTEGER NOT NULL, 56 | subreddit TEXT, 57 | user TEXT, 58 | link TEXT, 59 | UNIQUE(comic_id, subreddit, user, link) ON CONFLICT IGNORE 60 | ); 61 | """) 62 | 63 | self.datastore.execute(""" 64 | CREATE TABLE IF NOT EXISTS xkcd_comic_meta ( 65 | comic_id INTEGER PRIMARY KEY, 66 | json TEXT, 67 | title TEXT, 68 | hash_avg TEXT, 69 | hash_d TEXT, 70 | hash_p TEXT 71 | ); 72 | """) 73 | 74 | self.datastore.execute(""" 75 | CREATE VIEW IF NOT EXISTS references_counts AS 76 | SELECT 77 | comic_id, 78 | 0 AS comic_count, 79 | 0.0 AS comic_percentage 80 | FROM 81 | xkcd_comic_meta 82 | WHERE comic_id NOT IN ( 83 | SELECT DISTINCT(comic_id) FROM xkcd_comic_references 84 | ) 85 | UNION 86 | SELECT 87 | comic_id, 88 | COUNT(*) AS comic_count, 89 | (COUNT(*) * 100.0) / (SELECT COUNT(*) FROM xkcd_comic_references) AS comic_percentage 90 | FROM 91 | xkcd_comic_references 92 | GROUP BY 93 | comic_id 94 | ; 95 | """) 96 | 97 | self.datastore.commit() 98 | 99 | def add_ignore(self, target): 100 | self.datastore.execute( 101 | 'INSERT INTO ignored_users VALUES(?, ?)', 102 | (self.bot_name, target) 103 | ) 104 | 105 | self.datastore.commit() 106 | 107 | def get_ignores(self): 108 | cursor = self.datastore.execute( 109 | 'SELECT target_name FROM ignored_users WHERE bot_name = ?', 110 | (self.bot_name,) 111 | ) 112 | 113 | return [r[0] for r in cursor] 114 | 115 | def get_stats(self, comic_id): 116 | cursor = self.datastore.execute( 117 | 'SELECT comic_count, comic_percentage FROM references_counts WHERE comic_id = ?', 118 | (int(comic_id),) 119 | ) 120 | 121 | meta = cursor.fetchone() 122 | if not meta: 123 | return None 124 | return { 125 | 'count': meta[0], 126 | 'percentage': meta[1] 127 | } 128 | 129 | def insert_xkcd_event(self, comic_id, time, subreddit, user, link, from_external): 130 | self.datastore.execute( 131 | 'INSERT INTO xkcd_comic_references VALUES(?, ?, ?, ?, ?)', 132 | (int(comic_id), int(time), subreddit, user, link) 133 | ) 134 | 135 | self.datastore.commit() 136 | 137 | def get_xkcd_meta(self, comic_id): 138 | cursor = self.datastore.execute( 139 | 'SELECT comic_id, json, hash_avg, hash_d, hash_p FROM xkcd_comic_meta WHERE comic_id = ?', 140 | (int(comic_id),) 141 | ) 142 | 143 | meta = cursor.fetchone() 144 | if not meta: 145 | return None 146 | return { 147 | 'comic_id': meta[0], 148 | 'json_data': simplejson.loads(meta[1]), 149 | 'hash_avg': meta[2], 150 | 'hash_d': meta[3], 151 | 'hash_p': meta[4], 152 | } 153 | 154 | def insert_xkcd_meta(self, comic_id, json, hash_avg, hash_d, hash_p): 155 | r = self.datastore.execute( 156 | 'SELECT 1 FROM xkcd_comic_meta WHERE comic_id = ?', 157 | (int(comic_id),) 158 | ) 159 | 160 | if r.fetchone() is None: 161 | self.datastore.execute( 162 | 'INSERT INTO xkcd_comic_meta VALUES(?, ?, ?, ?, ?, ?)', 163 | (int(comic_id), simplejson.dumps(json), json.get('title', ''), str(hash_avg), str(hash_d), str(hash_p)) 164 | ) 165 | 166 | self.datastore.commit() 167 | 168 | def close(self): 169 | try: 170 | self.datastore.close() 171 | except Exception as e: 172 | pass 173 | -------------------------------------------------------------------------------- /bots/redditbot/bots/xkcdref/runbot.py: -------------------------------------------------------------------------------- 1 | # Must be first for monkey_patch() 2 | from redditbot.base import patch_all 3 | patch_all() 4 | 5 | import logging 6 | 7 | from redditbot.bots import settings 8 | from redditbot.base.handlers import MultiBotHandler 9 | from bot import SubmissionXkcdBot, CommentXkcdBot, MailXkcdBot, VoteXkcdBot 10 | from datastore import BotDataStore 11 | from xkcdfetcher import XkcdFetcher 12 | 13 | logging.basicConfig() 14 | 15 | 16 | def run(): 17 | datastore = BotDataStore('xkcd_transcriber', settings.XKCD_DB_LOCATION) 18 | xkcd_fetcher = XkcdFetcher(datastore) 19 | 20 | # If fetch_limit is set to None, it will keep on going back for hugely old submissions 21 | submission_bot = SubmissionXkcdBot(user_agent='xkcdref bot (submission) by %s' % settings.AUTHOR, 22 | auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'], 23 | delay=20, 24 | fetch_limit=300, 25 | cache_size=600, 26 | dry_run=settings.DRY_RUN, 27 | subreddit='all', 28 | datastore=datastore, 29 | xkcd_fetcher=xkcd_fetcher) 30 | 31 | comment_bot = CommentXkcdBot(user_agent='xkcdref bot (comment) by %s' % settings.AUTHOR, 32 | auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'], 33 | delay=15, 34 | fetch_limit=None, 35 | cache_size=2000, 36 | dry_run=settings.DRY_RUN, 37 | subreddit='all', 38 | datastore=datastore, 39 | xkcd_fetcher=xkcd_fetcher) 40 | 41 | mail_bot = MailXkcdBot(user_agent='xkcdref bot (message) by %s' % settings.AUTHOR, 42 | auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'], 43 | delay=60, 44 | fetch_limit=None, 45 | cache_size=0, 46 | dry_run=settings.DRY_RUN, 47 | datastore=datastore, 48 | xkcd_fetcher=xkcd_fetcher) 49 | 50 | vote_bot = VoteXkcdBot(user_agent='xkcdref bot (vote) by %s' % settings.AUTHOR, 51 | auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'], 52 | delay=300, 53 | fetch_limit=None, 54 | cache_size=0, 55 | dry_run=settings.DRY_RUN, 56 | monitored_user='xkcd_transcriber', 57 | score_threshold_min=-1) 58 | 59 | # Run all bots 60 | MultiBotHandler([ 61 | submission_bot, 62 | comment_bot, 63 | mail_bot, 64 | vote_bot 65 | ]).run() 66 | 67 | 68 | if __name__ == '__main__': 69 | run() 70 | -------------------------------------------------------------------------------- /bots/redditbot/bots/xkcdref/xkcdfetcher.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | import re 3 | import logging 4 | import urlparse 5 | 6 | import simplejson 7 | 8 | logger = logging.getLogger(__name__) 9 | logger.setLevel(logging.INFO) 10 | 11 | XKCD_JSON_API_URL = 'http://xkcd.com/{comic_id}/info.0.json' 12 | XKCD_EXPLAINED_URL = 'https://www.explainxkcd.com/wiki/index.php/{comic_id}#Explanation' 13 | 14 | 15 | class XkcdFetcher(object): 16 | def __init__(self, datastore): 17 | self.datastore = datastore 18 | self.next_index = 1 19 | self.reverse_image_index = {} 20 | self.reverse_hash_index = {} 21 | self.json_index = {} 22 | 23 | def get_json(self, url): 24 | if not url.startswith('http'): 25 | url = '//' + url 26 | parsed = urlparse.urlparse(url) 27 | 28 | if re.match('^(www\.)?imgs\.xkcd\.com$', parsed.netloc.lower()): 29 | if parsed.path not in self.reverse_image_index: 30 | self._load_indexes() 31 | comic_id = self.reverse_image_index.get(parsed.path) 32 | return self.json_index.get(comic_id) if comic_id else None 33 | 34 | if re.match('^(www\.)?xkcd\.com$', parsed.netloc.lower()) and re.match('^/\d+/?$', parsed.path): 35 | m = re.search('^/(\d+)/?$', parsed.path) 36 | comic_id = int(m.group(1)) 37 | if comic_id not in self.json_index: 38 | self._load_indexes() 39 | return self.json_index.get(comic_id) if comic_id else None 40 | 41 | if re.match('^imgur\.com$', parsed.netloc): 42 | pass 43 | 44 | return None 45 | 46 | def get_explained_link(self, comic_id): 47 | return XKCD_EXPLAINED_URL.format(comic_id=comic_id) 48 | 49 | def _load_indexes(self): 50 | while True: 51 | # Get metadata 52 | meta = self._get_meta(self.next_index) 53 | if not meta: 54 | return 55 | 56 | # comic_id -> json 57 | self.json_index[self.next_index] = meta['json_data'] 58 | 59 | # image_url_path_part -> comic_id 60 | parsed = urlparse.urlparse(meta['json_data'].get('img', '')) 61 | if parsed.path and parsed.path not in self.reverse_image_index: 62 | self.reverse_image_index[parsed.path] = self.next_index 63 | 64 | # avg_hash -> comic_id 65 | if meta['hash_avg'] and meta['hash_avg'] not in self.reverse_hash_index: 66 | self.reverse_hash_index[meta['hash_avg']] = self.next_index 67 | 68 | self.next_index += 1 69 | 70 | def _get_meta(self, comic_id): 71 | meta = self.datastore.get_xkcd_meta(comic_id) 72 | if not meta: 73 | comic_id, json_data, hash_avg, hash_d, hash_p = self._build_xkcd_meta(comic_id) 74 | if comic_id is not None: 75 | self.datastore.insert_xkcd_meta(comic_id, json_data, hash_avg, hash_d, hash_p) 76 | meta = self.datastore.get_xkcd_meta(comic_id) 77 | return meta 78 | 79 | def _build_xkcd_meta(self, comic_id): 80 | j = self._get_xkcd_json(comic_id) 81 | if j: 82 | hash_avg, hash_d, hash_p = self._get_image_hashes(j.get('img')) 83 | return comic_id, j, hash_avg, hash_d, hash_p 84 | return None, None, None, None, None 85 | 86 | def _get_image_hashes(self, url): 87 | return '', '', '' 88 | 89 | """ 90 | def _get_image_hashes(self, url): 91 | if not url: 92 | return '', '', '' 93 | file_name = '/tmp/' + get_random_file_name() 94 | try: 95 | self.myopener.retrieve(url, file_name) 96 | hash_avg = imagehash.average_hash(Image.open(file_name)) 97 | hash_d = imagehash.dhash(Image.open(file_name)) 98 | hash_p = imagehash.phash(Image.open(file_name)) 99 | return str(hash_avg), str(hash_d), str(hash_p) 100 | except Exception as e: 101 | logger.exception('Exception while getting image hashes') 102 | return '', '', '' 103 | finally: 104 | os.remove(file_name) 105 | """ 106 | 107 | def _get_xkcd_json(self, comic_id): 108 | if int(comic_id) == 404: 109 | return {'title': '404', 'transcript': '404', 'alt': '404', 'img': '', 'num': 404} 110 | 111 | try: 112 | response = urllib2.urlopen(XKCD_JSON_API_URL.format(comic_id=comic_id)) 113 | html = response.read() 114 | return simplejson.loads(html) 115 | except Exception as e: 116 | # logger.exception('Exception while getting xkcd json') 117 | return None 118 | -------------------------------------------------------------------------------- /bots/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name='redditbot.bots', 5 | version='1.0.0', 6 | author='Jeremy Simpson', 7 | description='Redditbot bots', 8 | license='MIT', 9 | classifiers=[ 10 | 'Development Status :: 5 - Production/Stable', 11 | 'Intended Audience :: Developers', 12 | 'License :: OSI Approved :: MIT License', 13 | 'Programming Language :: Python :: 2.7', 14 | ], 15 | packages=setuptools.find_packages(), 16 | namespace_packages=['redditbot'], 17 | entry_points={ 18 | 'console_scripts': [ 19 | 'runbot-emote-counter = redditbot.bots.emote_counter.runbot:run', 20 | 'runbot-xkcdref = redditbot.bots.xkcdref.runbot:run' 21 | ] 22 | } 23 | ) 24 | --------------------------------------------------------------------------------