├── .gitignore
├── README.md
├── base
    ├── install
    │   └── requirements.txt
    ├── redditbot
    │   ├── __init__.py
    │   └── base
    │   │   ├── __init__.py
    │   │   ├── handlers.py
    │   │   └── utils.py
    └── setup.py
└── bots
    ├── install
        └── requirements.txt
    ├── redditbot
        ├── __init__.py
        └── bots
        │   ├── __init__.py
        │   ├── emote_counter
        │       ├── __init__.py
        │       ├── bot.py
        │       └── runbot.py
        │   ├── settings
        │       ├── __init__.py
        │       ├── base.py
        │       ├── dev_mac.py
        │       ├── dev_win.py
        │       └── prod.py
        │   └── xkcdref
        │       ├── __init__.py
        │       ├── bot.py
        │       ├── datastore.py
        │       ├── runbot.py
        │       └── xkcdfetcher.py
    └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # My stuff
 2 | .DS_Store
 3 | .idea/
 4 | local_settings.py
 5 | 
 6 | # Byte-compiled / optimized / DLL files
 7 | __pycache__/
 8 | *.py[cod]
 9 | 
10 | # C extensions
11 | *.so
12 | 
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # Installer logs
31 | pip-log.txt
32 | pip-delete-this-directory.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | redditbot
 2 | =========
 3 | 
 4 | This is a reddit bot framework (v2!)
 5 | 
 6 | Built-in Templates:
 7 |  - Message-triggered bots (PMs, comment replies, post replies, username mentions)
 8 |  - Comment-triggered bots
 9 |  - Submission-triggered bots
10 |  - Vote-triggered bots
11 | 
12 | Included working bots:
13 |  - Emote counter (triggered through PMs)
14 |  - XKCD transcriber (trigged via submission and comments, monitors messages to add to ignore list, vote monitoring)
15 | 
16 | Requires:
17 |  python 2.7, praw, simplejson, snudown, a bunch of other stuff (see requirements.txt)
18 | 
19 | 
20 | ---
21 | 
22 | How-to:
23 | 
24 | 1. Subclass UserCommentsVoteTriggeredBot, MailTriggeredBot, SubredditCommentTriggeredBot, SubredditSubmissionTriggeredBot or BotHandler if you need a new type.
25 |  
26 | 2. Implement the \_check() and \_do() functions at a minimum. If you are subclassing a template, be sure to call the super() function for each. If not, implement \_get_content() as well.
27 | 
28 | 3. In v1, the built-in bot templates auto checked to ensure it doesn't reply twice. This functionality has been moved into the `utils` module, and needs to be called yourself.
29 | 
30 | 4. `MultiBotHandler` can be used to run multiple bots at the same time in a single python process.
31 | 
32 | 5. It is important that
33 | 
34 | ```
35 | # Must be first for monkey_patch()
36 | from redditbot.base import patch_all
37 | patch_all()
38 | ```
39 | 
40 | are the first lines that execute when the python process is started. These lines ensure that gevent's monkey patches are made correctly.
41 | 
42 | ---
43 | 
44 | Notes:
45 | 
46 | 1. The example bots can be run with `python runbot.py`
47 | 
48 | 2. Accounts and settings are configured in settings.py
49 | 
50 | 3. local_settings.py can be used to configure dev settings and have git ignore it.
51 | 
52 | 4. Oauth2 support is included (script type apps only).
53 | 


--------------------------------------------------------------------------------
/base/install/requirements.txt:
--------------------------------------------------------------------------------
1 | gevent>=1.0.2
2 | praw>=3.4.0
3 | pylru>=1.0.6
4 | requests>=2.5.1


--------------------------------------------------------------------------------
/base/redditbot/__init__.py:
--------------------------------------------------------------------------------
1 | __import__("pkg_resources").declare_namespace(__name__)


--------------------------------------------------------------------------------
/base/redditbot/base/__init__.py:
--------------------------------------------------------------------------------
1 | def patch_all():
2 |     from gevent import monkey
3 |     monkey.patch_all()
4 | 
5 |     import logging
6 |     logger = logging.getLogger(__name__)
7 |     logger.setLevel(logging.INFO)
8 |     logger.info('Monkey patching...')
9 | 


--------------------------------------------------------------------------------
/base/redditbot/base/handlers.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import logging
  3 | 
  4 | import gevent
  5 | import praw
  6 | import pylru
  7 | import requests
  8 | import requests.auth
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | logger.setLevel(logging.INFO)
 12 | 
 13 | OAUTH_ACCESS_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token'
 14 | 
 15 | 
 16 | class MultiBotHandler(object):
 17 |     def __init__(self, handlers):
 18 |         self.handlers = handlers
 19 | 
 20 |     def run(self):
 21 |         greenlets = []
 22 |         for handler in self.handlers:
 23 |             greenlets.append(gevent.spawn(handler.run))
 24 |         gevent.joinall(greenlets)
 25 | 
 26 | 
 27 | class BotHandler(object):
 28 |     def __init__(self, user_agent, auth, delay, fetch_limit, cache_size=0, dry_run=False):
 29 |         self.user_agent = user_agent
 30 |         self.auth = auth
 31 |         self.delay = delay
 32 |         self.fetch_limit = fetch_limit
 33 |         self.cache_size = cache_size
 34 |         self.dry_run = dry_run
 35 |         self.cache = pylru.lrucache(self.cache_size) if self.cache_size > 0 else None
 36 |         self.api_request_delay = 1.0 if self.__is_oauth() else 2.0
 37 |         self.r = praw.Reddit(self.user_agent, cache_timeout=0, api_request_delay=self.api_request_delay)
 38 |         self.expires = -1
 39 |         self.__auth()
 40 | 
 41 |     def _get_content(self):
 42 |         raise NotImplementedError()
 43 | 
 44 |     def _check(self, obj):
 45 |         raise NotImplementedError()
 46 | 
 47 |     def _do(self, obj):
 48 |         raise NotImplementedError()
 49 | 
 50 |     def __is_oauth(self):
 51 |         return 'client_id' in self.auth and 'secret' in self.auth
 52 | 
 53 |     def __update_access_credentials(self):
 54 |         # Fetch access token
 55 |         client_auth = requests.auth.HTTPBasicAuth(self.auth['client_id'], self.auth['secret'])
 56 |         response = requests.post(OAUTH_ACCESS_TOKEN_URL, auth=client_auth, data={
 57 |             'grant_type': 'password',
 58 |             'username': self.auth['username'],
 59 |             'password': self.auth['password']
 60 |         }, headers={
 61 |             'User-Agent': self.user_agent
 62 |         })
 63 | 
 64 |         # Check response
 65 |         if response.ok:
 66 |             response = response.json()
 67 |         else:
 68 |             logger.error('Could not retrieve access creds: Status {status}'.format(status=response.status_code))
 69 |             return
 70 | 
 71 |         # Update
 72 |         if 'error' in response:
 73 |             logger.error('Could not retrieve access creds: Json error: {status}'.format(status=response['error']))
 74 |         else:
 75 |             print 'setting access creds for oauth'
 76 |             self.r.set_access_credentials(scope='*', access_token=response['access_token'])
 77 |             self.expires = time.time() + int(response['expires_in']) * 0.9
 78 | 
 79 |     def __auth(self):
 80 |         if 'username' not in self.auth or 'password' not in self.auth:
 81 |             raise Exception("Must provide username and password in auth")
 82 | 
 83 |         if self.__is_oauth():
 84 |             self.r.set_oauth_app_info(client_id='a', client_secret='a', redirect_uri='a')
 85 |             self.__update_access_credentials()
 86 |         else:
 87 |             self.r.login(self.auth['username'], self.auth['password'])
 88 | 
 89 |     def __main(self):
 90 |         # Check if we need to update access token
 91 |         if time.time() > self.expires > 0:
 92 |             self.__update_access_credentials()
 93 | 
 94 |         # Get the content
 95 |         content = self._get_content()
 96 |         if not content:
 97 |             logger.warn('Bad content object: skipping...')
 98 |             return
 99 | 
100 |         hits = 0
101 |         misses = 0
102 | 
103 |         # Process all content
104 |         for obj in content:
105 |             # Check if it's in the cache
106 |             if self.cache is not None:
107 |                 if obj.id in self.cache:
108 |                     hits += 1
109 |                     continue
110 |                 misses += 1
111 |                 self.cache[obj.id] = 0
112 | 
113 |             # Process the object, sandbox exceptions
114 |             try:
115 |                 if not self._check(obj):
116 |                     continue
117 |                 logger.info('Found valid object: {id} by {name}.'.format(id=obj.id,
118 |                                                                          name=obj.author.name if obj.author else '[deleted]'))
119 |                 if not self._do(obj):
120 |                     logger.info('Failed to process object {id}.'.format(id=obj.id))
121 |             except Exception as e:
122 |                 logger.exception('Exception while processing object {id}'.format(id=obj.id))
123 | 
124 |         if self.cache is not None:
125 |             logger.info('Cache hits/misses/total: {hits} / {misses} / {total}'.format(hits=hits, misses=misses,
126 |                                                                                       total=hits + misses))
127 | 
128 |     def run(self):
129 |         logger.info('Bot started!')
130 | 
131 |         while True:
132 |             start_time = time.time()
133 | 
134 |             try:
135 |                 self.__main()
136 |             except Exception as e:
137 |                 logger.exception('Exception while processing content generator')
138 | 
139 |             # Sleep at least self.delay per cycle
140 |             time_delta = time.time() - start_time
141 |             sleep_time = self.delay - time_delta
142 |             logger.info('Processing/Sleeping for: {p:.2f}s / {s:.2f}s'.format(p=time_delta, s=max(0, sleep_time)))
143 |             logger.info('Finished processing round for {name}'.format(name=self.user_agent))
144 |             if sleep_time > 0:
145 |                 time.sleep(sleep_time)
146 | 
147 |         logger.info('Bot finished! Exiting gracefully.')
148 | 
149 | 
150 | class UserCommentsVoteTriggeredBot(BotHandler):
151 |     def __init__(self, *args, **kwargs):
152 |         self.monitored_user = kwargs.pop('monitored_user')
153 |         self.score_threshold_max = kwargs.pop('score_threshold_max', None)
154 |         self.score_threshold_min = kwargs.pop('score_threshold_min', None)
155 |         if self.score_threshold_max is None and self.score_threshold_min is None:
156 |             raise Exception("score_threshold_max or score_threshold_min should be set")
157 | 
158 |         super(UserCommentsVoteTriggeredBot, self).__init__(*args, **kwargs)
159 | 
160 |     def _get_content(self):
161 |         return self.r.get_redditor(self.monitored_user).get_comments(limit=self.fetch_limit)
162 | 
163 |     def _check(self, comment):
164 |         # Check vote score min
165 |         if self.score_threshold_min is not None and comment.score < self.score_threshold_min:
166 |             return True
167 | 
168 |         # Check vote score max
169 |         if self.score_threshold_max is not None and comment.score > self.score_threshold_max:
170 |             return True
171 | 
172 |         return False
173 | 
174 | 
175 | class MailTriggeredBot(BotHandler):
176 |     def __init__(self, *args, **kwargs):
177 |         super(MailTriggeredBot, self).__init__(*args, **kwargs)
178 | 
179 |     def _get_content(self):
180 |         return self.r.get_unread(limit=self.fetch_limit)
181 | 
182 |     def is_private_message(self, message):
183 |         return not message.was_comment
184 | 
185 |     def is_comment_reply(self, message):
186 |         return message.was_comment and message.subject == 'comment reply'
187 | 
188 |     def is_post_reply(self, message):
189 |         return message.was_comment and message.subject == 'post reply'
190 | 
191 |     def is_username_mention(self, message):
192 |         return message.was_comment and message.subject == 'username mention'
193 | 
194 | 
195 | class SubredditCommentTriggeredBot(BotHandler):
196 |     def __init__(self, *args, **kwargs):
197 |         self.subreddit = kwargs.pop('subreddit')
198 |         super(SubredditCommentTriggeredBot, self).__init__(*args, **kwargs)
199 | 
200 |     def _get_content(self):
201 |         return self.r.get_comments(self.subreddit, limit=self.fetch_limit)
202 | 
203 | 
204 | class SubredditSubmissionTriggeredBot(BotHandler):
205 |     def __init__(self, *args, **kwargs):
206 |         self.subreddit = kwargs.pop('subreddit')
207 |         super(SubredditSubmissionTriggeredBot, self).__init__(*args, **kwargs)
208 | 
209 |     def _get_content(self):
210 |         return self.r.get_subreddit(self.subreddit).get_new(limit=self.fetch_limit)
211 | 


--------------------------------------------------------------------------------
/base/redditbot/base/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import praw
  4 | import requests
  5 | 
  6 | logger = logging.getLogger(__name__)
  7 | logger.setLevel(logging.INFO)
  8 | 
  9 | 
 10 | def has_replied(praw_object, username):
 11 |     """
 12 |     Returns True if the specified user has a comment in the top level replies of the given submission/comment/message,
 13 |     and False otherwise.
 14 |     For comments, submissions, messages ONLY.
 15 |     """
 16 |     if type(praw_object) == praw.objects.Message:
 17 |         # TODO: Fix this to actually check properly
 18 |         # If it's not the first message in the PM thread, we replied previously.
 19 |         # This is not the best method, and it is a bit flakey,
 20 |         # but good enough for most cases
 21 |         if praw_object.first_message is not None:
 22 |             return True
 23 |         return False
 24 |     elif type(praw_object) == praw.objects.Submission:
 25 |         praw_object.replace_more_comments(limit=None)
 26 |         replies = praw_object.comments
 27 |     elif type(praw_object) == praw.objects.Comment:
 28 |         replies = praw_object.replies
 29 |     else:
 30 |         raise Exception("Object must be an instance of praw.objects.Comment/Submission/Message")
 31 | 
 32 |     if not replies:
 33 |         return False
 34 | 
 35 |     # Check each reply if the username matches
 36 |     username = username.lower()
 37 |     for reply in replies:
 38 |         if reply.author and reply.author.name.lower() == username:
 39 |             return True
 40 | 
 41 |     return False
 42 | 
 43 | 
 44 | def is_comment_owner(praw_comment, username):
 45 |     """
 46 |     Returns True if the specified comment belongs to the user,
 47 |     otherwise False.
 48 |     """
 49 |     return praw_comment.author and praw_comment.author.name.lower() == username.lower()
 50 | 
 51 | 
 52 | def send_reply(praw_object, reply_msg):
 53 |     """
 54 |     Returns the reply object if the message was sent successfully, otherwise None.
 55 |     For comments, submissions, messages ONLY.
 56 |     """
 57 |     try:
 58 |         if type(praw_object) == praw.objects.Submission:
 59 |             reply_obj = praw_object.add_comment(reply_msg)
 60 |         else:
 61 |             reply_obj = praw_object.reply(reply_msg)
 62 |     except requests.HTTPError as e:
 63 |         if e.response.status_code == 403:
 64 |             logger.error('Could not post reply: Forbidden')
 65 |             return None
 66 |         else:
 67 |             raise
 68 |     except Exception as e:
 69 |         logger.exception('Exception while replying')
 70 |         return None
 71 | 
 72 |     logger.info(' => Reply Sent!')
 73 |     return reply_obj
 74 | 
 75 | 
 76 | def edit_reply(praw_comment, reply_msg):
 77 |     """
 78 |     Returns True if the comment was edited successfully, and False otherwise.
 79 |     For comments ONLY.
 80 |     """
 81 |     try:
 82 |         praw_comment.edit(reply_msg)
 83 |     except Exception as e:
 84 |         logger.exception('Exception while editing')
 85 |         return False
 86 | 
 87 |     logger.info(' => Edit was made!')
 88 |     return True
 89 | 
 90 | 
 91 | def has_chain(praw_r, praw_comment, username):
 92 |     """
 93 |     Returns True if the parent was made by username.
 94 |     Returns False otherwise.
 95 |     """
 96 |     if not hasattr(praw_comment, 'parent_id'):
 97 |         return False
 98 |     parent = praw_r.get_info(thing_id=praw_comment.parent_id)
 99 |     if not parent or type(parent) != praw.objects.Comment:
100 |         return False
101 |     return is_comment_owner(parent, username)
102 | 


--------------------------------------------------------------------------------
/base/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | setuptools.setup(
 4 |     name='redditbot.base',
 5 |     version='1.0.0',
 6 |     author='Jeremy Simpson',
 7 |     description='Redditbot framework',
 8 |     license='MIT',
 9 |     classifiers=[
10 |         'Development Status :: 5 - Production/Stable',
11 |         'Intended Audience :: Developers',
12 |         'License :: OSI Approved :: MIT License',
13 |         'Programming Language :: Python :: 2.7',
14 |     ],
15 |     packages=setuptools.find_packages(),
16 |     namespace_packages=['redditbot'],
17 | )
18 | 


--------------------------------------------------------------------------------
/bots/install/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4>=4.3.2
2 | simplejson>=3.6.5
3 | git+https://github.com/reddit/snudown#egg=snudown


--------------------------------------------------------------------------------
/bots/redditbot/__init__.py:
--------------------------------------------------------------------------------
1 | __import__("pkg_resources").declare_namespace(__name__)


--------------------------------------------------------------------------------
/bots/redditbot/bots/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/__init__.py


--------------------------------------------------------------------------------
/bots/redditbot/bots/emote_counter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/emote_counter/__init__.py


--------------------------------------------------------------------------------
/bots/redditbot/bots/emote_counter/bot.py:
--------------------------------------------------------------------------------
 1 | from redditbot.base.handlers import MailTriggeredBot
 2 | from redditbot.base import utils
 3 | 
 4 | 
 5 | class TopEmotesBot(MailTriggeredBot):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super(TopEmotesBot, self).__init__(*args, **kwargs)
 8 | 
 9 |     def _check(self, mail):
10 |         return self.is_private_message(mail)
11 | 
12 |     def _do(self, mail):
13 |         reply_msg = '[](/sbstalkthread)This bot has been decommissioned.\n\n' \
14 |                     'It has migrated over to [lunarmist.net](http://lunarmist.net/emotes/), ' \
15 |                     'and has been enhanced with graphs and better comment coverage. Check it out!'
16 | 
17 |         # Reply to the user and mark it as read
18 |         if utils.send_reply(mail, reply_msg):
19 |             mail.mark_as_read()
20 |             return True
21 |         else:
22 |             return False
23 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/emote_counter/runbot.py:
--------------------------------------------------------------------------------
 1 | # Must be first for monkey_patch()
 2 | from redditbot.base import patch_all
 3 | patch_all()
 4 | 
 5 | import logging
 6 | 
 7 | from bot import TopEmotesBot
 8 | from redditbot.bots import settings
 9 | 
10 | logging.basicConfig()
11 | 
12 | 
13 | def run():
14 |     TopEmotesBot(user_agent='Emote counter by %s' % settings.AUTHOR,
15 |                  auth=settings.REDDIT_ACCOUNTS['counts_your_emotes'],
16 |                  delay=60,
17 |                  fetch_limit=None,
18 |                  cache_size=100).run()
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     run()
23 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/settings/__init__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | 
 4 | from base import *
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | logger.setLevel(logging.INFO)
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('--settings', help='host', default='prod')
11 | 
12 | args, _ = parser.parse_known_args()
13 | 
14 | if args.settings == 'dev-mac':
15 |     logger.info('Loading dev-mac settings')
16 |     from dev_mac import *
17 | elif args.settings == 'dev-win':
18 |     logger.info('Loading dev-win settings')
19 |     from dev_win import *
20 | elif args.settings == 'prod':
21 |     logger.info('Loading prod settings')
22 |     from prod import *
23 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/settings/base.py:
--------------------------------------------------------------------------------
 1 | REDDIT_ACCOUNTS = {
 2 |     'counts_your_emotes': {
 3 |         'username': '',
 4 |         'password': '',
 5 |     },
 6 |     'xkcd_transcriber': {
 7 |         'username': '',
 8 |         'password': '',
 9 |         'client_id': '',
10 |         'secret': ''
11 |     }
12 | }
13 | 
14 | AUTHOR = '/u/name_here'
15 | 
16 | XKCD_DB_LOCATION = '/path/to/db'
17 | 
18 | DRY_RUN = False
19 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/settings/dev_mac.py:
--------------------------------------------------------------------------------
1 | XKCD_DB_LOCATION = ''
2 | DRY_RUN = True
3 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/settings/dev_win.py:
--------------------------------------------------------------------------------
1 | XKCD_DB_LOCATION = ''
2 | DRY_RUN = True
3 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/settings/prod.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/settings/prod.py


--------------------------------------------------------------------------------
/bots/redditbot/bots/xkcdref/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luney112/redditbot/db878d557b74e062f46cc2193c899933f7a63d59/bots/redditbot/bots/xkcdref/__init__.py


--------------------------------------------------------------------------------
/bots/redditbot/bots/xkcdref/bot.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import time
  3 | import logging
  4 | import random
  5 | 
  6 | import praw
  7 | import snudown
  8 | from bs4 import BeautifulSoup
  9 | 
 10 | from redditbot.base import utils
 11 | from redditbot.base.handlers import MailTriggeredBot, UserCommentsVoteTriggeredBot, SubredditCommentTriggeredBot, \
 12 |     SubredditSubmissionTriggeredBot
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | logger.setLevel(logging.INFO)
 16 | 
 17 | # Emote stuff
 18 | FULL_EMOTE_REGEX = re.compile(
 19 |     """\[[^\]]*\]\s*\(\s*/(?P<name>[^\s/]+?)(?P<modifier>-\S+)?\s*(?P<message>["'].*?["'])?\s*\)""")
 20 | PONY_SUBS = ["mylittlepony", "mlplounge", "ploungeafterdark", "mylittlefriends", "mylittleandysonic1"]
 21 | PONY_SECRETS = [
 22 |     u'[](/adorkable "%s")',
 23 |     u'[](/twibook "%s")',
 24 |     u'[](/twicookiejar "%s")',
 25 |     u'[](/twicookie "%s")',
 26 |     u'[](/pretzeltwi "%s")',
 27 |     u'[](/twitongue "%s")',
 28 | ]
 29 | SB_SECRETS = [
 30 |     '[](/sbstalkthread)',
 31 |     '[](/2d)',
 32 |     '[](/sbload)',
 33 |     '[](/sbtarget)',
 34 |     '[](/sweetiecardbot)',
 35 | ]
 36 | WISDOM_MESSAGES = [
 37 |     "Beep boop.",
 38 |     "0100100100100000011011000110100101101011011001010010000001110000011011110110111001111001",
 39 |     "meow meow meow meow meow",
 40 |     "Did you know that a group of cats is called a clowder?",
 41 |     "Did you know that cats have over 20 muscles that control their ears?",
 42 |     "Did you know that cats sleep 70% of their lives?",
 43 |     "Did you know that owning a cat can reduce the risk of stroke and heart attack by a third?",
 44 |     "Did you know that adult cats only meow to communicate with humans? meow.",
 45 |     "Did you know that cats make more than 100 different sounds whereas dogs make around 10?",
 46 |     "Did you know that cats have 1,000 times more data storage than an iPad?",
 47 |     "Did you know that a house cat is faster than Usain Bolt?",
 48 |     "Did you know that cats only sweat through their foot pads?",
 49 |     "Did you know that cats have scent glands along their tail, their forehead, lips, chin, and the underside of their front paws?",
 50 |     "Did you know that cat owners are 17% more likely to have a graduate degree?",
 51 |     "Did you know that when your cat brings home a dead mouse or bird, it may do so to show you that you suck at hunting?",
 52 | ]
 53 | 
 54 | 
 55 | REDDIT_PM_IGNORE = "https://reddit.com/message/compose/?to=xkcd_transcriber&subject=ignore%20me&message=ignore%20me"
 56 | REDDIT_PM_DELETE = "https://reddit.com/message/compose/?to=xkcd_transcriber&subject=delete&message=delete%20{thing_id}"
 57 | NO_BREAK_SPACE = u'\u00A0'
 58 | MAX_MESSAGE_LENGTH = 10000
 59 | 
 60 | XKCD_SIG_LINKS = [
 61 |     u'[xkcd.com](https://www.xkcd.com)',
 62 |     u'[xkcd%ssub](https://www.reddit.com/r/xkcd/)' % NO_BREAK_SPACE,
 63 |     u'[Problems/Bugs?](https://www.reddit.com/r/xkcd_transcriber/)',
 64 |     u'[Statistics](http://xkcdref.info/statistics/)',
 65 |     u'[Stop%sReplying](%s)' % (NO_BREAK_SPACE, REDDIT_PM_IGNORE),
 66 |     u'[Delete](%s)' % REDDIT_PM_DELETE
 67 | ]
 68 | 
 69 | MARKDOWN_ESCAPE_CHARACTERS = "\\`*_{}[]()#+-.!:|&<>/^~"
 70 | 
 71 | 
 72 | class MailXkcdBot(MailTriggeredBot):
 73 |     def __init__(self, *args, **kwargs):
 74 |         self.datastore = kwargs.pop('datastore')
 75 |         self.xkcd_fetcher = kwargs.pop('xkcd_fetcher')
 76 |         super(MailXkcdBot, self).__init__(*args, **kwargs)
 77 | 
 78 |     def _check(self, mail):
 79 |         if utils.has_replied(mail, self.auth['username']):
 80 |             return False
 81 |         if utils.is_comment_owner(mail, self.auth['username']):
 82 |             return False
 83 |         return True
 84 | 
 85 |     def _do(self, mail):
 86 |         body_lower = mail.body.lower()
 87 |         subject_lower = mail.subject.lower()
 88 |         result = True
 89 | 
 90 |         if self.is_private_message(mail):
 91 |             if body_lower.find('ignore me') != -1 or subject_lower.find('ignore me') != -1:
 92 |                 result = self.process_ignore(mail)
 93 |             elif body_lower.startswith('delete') or subject_lower.startswith('delete'):
 94 |                 result = self.process_delete(mail)
 95 |         elif self.is_comment_reply(mail):
 96 |             result = self.process_comment_reply(mail)
 97 | 
 98 |         if result and not self.dry_run:
 99 |             mail.mark_as_read()
100 |         return result
101 | 
102 |     def process_ignore(self, mail):
103 |         # Add to ignore list
104 |         if not self.dry_run:
105 |             self.datastore.add_ignore(mail.author.name.lower())
106 | 
107 |         # Reply to the user
108 |         reply_msg = "You have been added to the ignore list. If this bot continues to respond, PM /u/LunarMist2."
109 | 
110 |         # Do not send if we are doing a dry run
111 |         if self.dry_run:
112 |             return True
113 | 
114 |         #if utils.send_reply(mail, reply_msg):
115 |         #    return True
116 |         #return False
117 |         return True
118 | 
119 |     def process_delete(self, mail):
120 |         # Ensure the mail author is the same as the original referencer
121 |         parts = mail.body.split(' ')
122 |         if len(parts) == 2:
123 |             thing_id = parts[1]
124 |             obj = self.r.get_info(thing_id=thing_id)
125 |             if obj:
126 |                 parent = self.r.get_info(thing_id=obj.parent_id)
127 |                 if parent and parent.author and parent.author.name == mail.author.name:
128 |                     if not self.dry_run:
129 |                         obj.delete()
130 |                     logger.info(' => Comment Deleted!')
131 | 
132 |         return True
133 | 
134 |     def process_comment_reply(self, mail):
135 |         body_lower = mail.body.lower()
136 | 
137 |         # Check for joke replies
138 |         if body_lower.find('thank you') != -1 or body_lower.find('thanks') != -1:
139 |             reply_msg = random.choice(SB_SECRETS) + "My pleasure"
140 |         elif body_lower.find('i love you') != -1:
141 |             reply_msg = random.choice(SB_SECRETS) + "Love ya too~"
142 |         elif body_lower == 'k':
143 |             reply_msg = "[](/o_o)K"
144 |         elif body_lower == ")":
145 |             reply_msg = "("
146 |         else:
147 |             return True
148 | 
149 |         # Do not reply if the user is ignored
150 |         if mail.author and mail.author.name.lower() in self.datastore.get_ignores():
151 |             logger.info('Skipping mail {id}. Reason: Author on ignore list.'.format(id=mail.id))
152 |             return True
153 | 
154 |         # Check it was originally a reply to a transcript
155 |         if not self._is_transcript_reply(self.r, mail, self.auth['username']):
156 |             logger.info('Skipping to post joke reply to {id}. Reason: Not a reply to a transcript'.format(id=mail.id))
157 |             return True
158 | 
159 |         # Do not send if we are doing a dry run
160 |         if self.dry_run:
161 |             return True
162 | 
163 |         # Reply to the user
164 |         if utils.send_reply(mail, reply_msg):
165 |             return True
166 |         return False
167 | 
168 |     def _is_transcript_reply(self, praw_r, praw_comment, username):
169 |         if not hasattr(praw_comment, 'parent_id'):
170 |             return False
171 | 
172 |         parent = praw_r.get_info(thing_id=praw_comment.parent_id)
173 |         if not parent or type(parent) != praw.objects.Comment:
174 |             return False
175 |         return len(parent.body) > 50 and utils.is_comment_owner(parent, username)
176 | 
177 | 
178 | class VoteXkcdBot(UserCommentsVoteTriggeredBot):
179 |     def _do(self, comment):
180 |         logger.info('Comment {id} below score threshold: {score}. Removing'.format(id=comment.id, score=comment.score))
181 |         if not self.dry_run:
182 |             comment.delete()
183 |         return True
184 | 
185 | 
186 | class CommentXkcdBot(SubredditCommentTriggeredBot):
187 |     def __init__(self, *args, **kwargs):
188 |         self.datastore = kwargs.pop('datastore')
189 |         self.xkcd_fetcher = kwargs.pop('xkcd_fetcher')
190 |         super(CommentXkcdBot, self).__init__(*args, **kwargs)
191 | 
192 |     def _check(self, comment):
193 |         if comment.body.lower().find('xkcd.com') == -1:
194 |             return False
195 |         if comment.subreddit.display_name.lower().find('xkcd') != -1:
196 |             return False
197 |         if comment.subreddit.display_name.lower() == 'jerktalkdiamond':
198 |             return False
199 |         if utils.is_comment_owner(comment, self.auth['username']):
200 |             return False
201 |         if utils.has_replied(comment, self.auth['username']):
202 |             return False
203 |         return not utils.has_chain(self.r, comment, self.auth['username'])
204 | 
205 |     def _do(self, comment):
206 |         html = snudown.markdown(comment.body.encode('UTF-8'))
207 |         soup = BeautifulSoup(html)
208 |         refs = {}
209 | 
210 |         # Iterate through all links, get xkcd json
211 |         for link in soup.find_all('a'):
212 |             href = link.get('href')
213 |             if not href:
214 |                 continue
215 |             j = self.xkcd_fetcher.get_json(href)
216 |             if not j:
217 |                 logger.warn('Data could not be fetched for {url}'.format(url=href))
218 |                 continue
219 |             refs[int(j.get('num', -1))] = {
220 |                 'data': j,
221 |                 'href': href
222 |             }
223 | 
224 |         return self.process_references(comment, refs)
225 | 
226 |     def process_references(self, comment, refs):
227 |         if not refs:
228 |             return True
229 | 
230 |         # Record in db the references
231 |         for comic_id, ref in refs.iteritems():
232 |             if comic_id > 0 and not self.dry_run:
233 |                 timestamp = int(time.time())
234 |                 author = comment.author.name if comment.author else '[deleted]'
235 |                 sub = comment.subreddit.display_name
236 |                 link = comment.permalink
237 |                 self.datastore.insert_xkcd_event(comic_id, timestamp, sub, author, link,
238 |                                                  ref['data'].get('from_external', False))
239 | 
240 |         # Do not reply if the user is ignored
241 |         if comment.author and comment.author.name.lower() in self.datastore.get_ignores():
242 |             logger.info('Skipping comment {id}. Reason: Author on ignore list.'.format(id=comment.id))
243 |             return True
244 | 
245 |         return self.send_reply(comment, refs)
246 | 
247 |     def send_reply(self, comment, refs):
248 |         builder = ReferenceBuilder()
249 |         reply_msg = builder.build_all(comment, refs, self.xkcd_fetcher, self.datastore, None)
250 | 
251 |         # Do not send if there's no body
252 |         if builder.get_body_length() == 0:
253 |             return True
254 | 
255 |         # Do not send if we are doing a dry run
256 |         if self.dry_run:
257 |             return True
258 | 
259 |         # Reply to the user
260 |         reply_obj = utils.send_reply(comment, reply_msg)
261 |         if reply_obj is None:
262 |             return False
263 | 
264 |         # Edit and fix [delete] signature link
265 |         builder.build_signature(reply_obj)
266 |         reply_msg = builder.cat()
267 |         if not utils.edit_reply(reply_obj, reply_msg):
268 |             return False
269 | 
270 |         return True
271 | 
272 | 
273 | class SubmissionXkcdBot(SubredditSubmissionTriggeredBot):
274 |     def __init__(self, *args, **kwargs):
275 |         self.datastore = kwargs.pop('datastore')
276 |         self.xkcd_fetcher = kwargs.pop('xkcd_fetcher')
277 |         super(SubmissionXkcdBot, self).__init__(*args, **kwargs)
278 | 
279 |     def _check(self, submission):
280 |         if submission.is_self:
281 |             if submission.selftext.lower().find('xkcd.com') == -1:
282 |                 return False
283 |         else:
284 |             if submission.url.lower().find('xkcd.com') == -1:
285 |                 return False
286 |         if submission.subreddit.display_name.lower().find('xkcd') != -1:
287 |             return False
288 |         if submission.subreddit.display_name.lower() == 'jerktalkdiamond':
289 |             return False
290 |         if utils.is_comment_owner(submission, self.auth['username']):
291 |             return False
292 |         if utils.has_replied(submission, self.auth['username']):
293 |             return False
294 |         return not utils.has_chain(self.r, submission, self.auth['username'])
295 | 
296 |     def _do(self, submission):
297 |         if submission.is_self:
298 |             return self.process_self(submission)
299 |         else:
300 |             return self.process_link(submission)
301 | 
302 |     def process_self(self, submission):
303 |         html = snudown.markdown(submission.selftext.encode('UTF-8'))
304 |         soup = BeautifulSoup(html)
305 |         refs = {}
306 | 
307 |         # Iterate through all links, get xkcd json
308 |         for link in soup.find_all('a'):
309 |             href = link.get('href')
310 |             if not href:
311 |                 continue
312 |             j = self.xkcd_fetcher.get_json(href)
313 |             if not j:
314 |                 logger.warn('Data could not be fetched for {url}'.format(url=href))
315 |                 continue
316 |             refs[int(j.get('num', -1))] = {
317 |                 'data': j,
318 |                 'href': href
319 |             }
320 | 
321 |         return self.process_references(submission, refs)
322 | 
323 |     def process_link(self, submission):
324 |         # Only need to process a single url
325 |         j = self.xkcd_fetcher.get_json(submission.url)
326 |         if not j:
327 |             logger.warn('Data could not be fetched for {url}'.format(url=submission.url))
328 |             return True
329 |         refs = {
330 |             int(j.get('num', -1)): {
331 |                 'data': j,
332 |                 'href': submission.url
333 |             }
334 |         }
335 | 
336 |         return self.process_references(submission, refs)
337 | 
338 |     def process_references(self, submission, refs):
339 |         if not refs:
340 |             return True
341 | 
342 |         # Record in db the references
343 |         for comic_id, ref in refs.iteritems():
344 |             if comic_id > 0 and not self.dry_run:
345 |                 timestamp = int(time.time())
346 |                 author = submission.author.name if submission.author else '[deleted]'
347 |                 sub = submission.subreddit.display_name
348 |                 link = submission.permalink
349 |                 self.datastore.insert_xkcd_event(comic_id, timestamp, sub, author, link,
350 |                                                  ref['data'].get('from_external', False))
351 | 
352 |         # Do not reply if the user is ignored
353 |         if submission.author and submission.author.name.lower() in self.datastore.get_ignores():
354 |             logger.info('Skipping submission {id}. Reason: Author on ignore list.'.format(id=submission.id))
355 |             return True
356 | 
357 |         return self.send_reply(submission, refs)
358 | 
359 |     def send_reply(self, submission, refs):
360 |         # TODO: Re-enable transcripts when the json has been fixed
361 |         builder = ReferenceBuilder(include_transcript=False)
362 |         reply_msg = builder.build_all(submission, refs, self.xkcd_fetcher, self.datastore, None)
363 | 
364 |         # Do not send if there's no body
365 |         if builder.get_body_length() == 0:
366 |             return True
367 | 
368 |         # Do not send if we are doing a dry run
369 |         if self.dry_run:
370 |             return True
371 | 
372 |         # Reply to the user
373 |         reply_obj = utils.send_reply(submission, reply_msg)
374 |         if reply_obj is None:
375 |             return False
376 | 
377 |         # Edit and fix [delete] signature link
378 |         builder.build_signature(reply_obj)
379 |         reply_msg = builder.cat()
380 |         if not utils.edit_reply(reply_obj, reply_msg):
381 |             return False
382 | 
383 |         return True
384 | 
385 | 
386 | class ReferenceBuilder(object):
387 |     def __init__(self, include_transcript=False):
388 |         self.include_transcript = include_transcript
389 |         self.reply_msg_head = ''
390 |         self.reply_msg_sig = ''
391 |         self.reply_msg_body = ''
392 | 
393 |     def build_head(self, comment):
394 |         # Check for secret message
395 |         secret_message = ''
396 |         # Only check for if there's a block of text to process
397 |         text = getattr(comment, 'body', getattr(comment, 'selftext', ''))
398 |         matches = re.finditer(FULL_EMOTE_REGEX, text)
399 |         if matches:
400 |             for match in matches:
401 |                 d = match.groupdict()
402 |                 if d['message'] and d['message'].find('xkcd_transcriber') != -1:
403 |                     secret_message = self._get_secret_reply(comment.author)
404 |                     break
405 | 
406 |         # Secret emote
407 |         secret_emote = ''
408 |         if comment.subreddit.display_name.lower() in PONY_SUBS or secret_message:
409 |             secret_emote = random.choice(PONY_SECRETS) % secret_message + ' '
410 | 
411 |         self.reply_msg_head = secret_emote
412 | 
413 |     def _get_secret_reply(self, author):
414 |         greetings = "Hello, " + author.name if author else "[deleted]" + "."
415 |         wisdom = random.choice(WISDOM_MESSAGES)
416 |         return greetings + wisdom
417 | 
418 |     def build_signature(self, reply_obj):
419 |         if reply_obj is None:
420 |             self.reply_msg_sig = '---\n' + ' ^| '.join(['^' + a for a in XKCD_SIG_LINKS])
421 |         else:
422 |             self.reply_msg_sig = '---\n' + ' ^| '.join(['^' + a for a in XKCD_SIG_LINKS]).format(
423 |                 thing_id=reply_obj.name)
424 | 
425 |     def build_body(self, refs, xkcd_fetcher, datastore):
426 |         # Reset
427 |         self.reply_msg_body = ''
428 | 
429 |         # Build body text
430 |         for comic_id, ref in refs.iteritems():
431 |             data = ref['data']
432 |             if self.reply_msg_body != '':
433 |                 self.reply_msg_body += u'----\n'
434 | 
435 |             if ref['href'].find('imgs.xkcd.com') != -1 or data.get('from_external') is True:
436 |                 self.reply_msg_body += u'[Original Source](https://xkcd.com/{num}/)\n\n'.format(num=comic_id)
437 |             elif data.get('img'):
438 |                 self.reply_msg_body += u'[Image]({image})\n\n'.format(image=self._format_url(data.get('img')))
439 |             if data.get('link'):
440 |                 self.reply_msg_body += u'[Link]({link})\n\n'.format(link=self._format_url(data.get('link')))
441 |             self.reply_msg_body += u'[Mobile](https://m.xkcd.com/{num}/)\n\n'.format(num=comic_id)
442 |             if data.get('title'):
443 |                 self.reply_msg_body += u'**Title:** {title}\n\n'.format(title=self._format_text(data.get('title', '')))
444 |             if data.get('transcript') and self.include_transcript:
445 |                 self.reply_msg_body += u'**Transcript:** {transcript}\n\n'.format(
446 |                     transcript=self._format_text(re.sub('\n{{.+}}', '', data.get('transcript', ''))))
447 |             if data.get('alt'):
448 |                 self.reply_msg_body += u'**Title-text:** {alt}\n\n'.format(alt=self._format_text(data.get('alt', '')))
449 |             if comic_id > 0:
450 |                 explained = xkcd_fetcher.get_explained_link(comic_id)
451 |                 self.reply_msg_body += u'[Comic Explanation]({link})\n\n'.format(link=explained)
452 | 
453 |             stats = datastore.get_stats(comic_id)
454 |             if stats:
455 |                 plural = 's' if stats['count'] != 1 else ''
456 |                 self.reply_msg_body += u'**Stats:** This comic has been referenced {0} time{1}, representing {2:.4f}% of referenced xkcds.\n\n'.format(
457 |                     stats['count'], plural, stats['percentage'])
458 | 
459 |     def build_all(self, comment, refs, xkcd_fetcher, datastore, reply_obj):
460 |         self.build_head(comment)
461 |         self.build_body(refs, xkcd_fetcher, datastore)
462 |         self.build_signature(reply_obj)
463 |         return self.cat()
464 | 
465 |     def cat(self):
466 |         return self.reply_msg_head + self.reply_msg_body + self.reply_msg_sig
467 | 
468 |     def get_body_length(self):
469 |         return len(self.reply_msg_body)
470 | 
471 |     def _format_url(self, url):
472 |         return url.replace('(', '\\(').replace(')', '\\)')
473 | 
474 |     def _format_text(self, text):
475 |         if isinstance(text, unicode):
476 |             text = text.encode('raw_unicode_escape').decode('utf-8')
477 |         lines = text.replace('\n', '\n\n')
478 |         lines = self._escape_markdown(lines)
479 |         return lines
480 | 
481 |     def _escape_markdown(self, text):
482 |         for c in MARKDOWN_ESCAPE_CHARACTERS:
483 |             text = text.replace(str(c), '\\' + str(c))
484 |         return text
485 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/xkcdref/datastore.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sqlite3
  3 | 
  4 | import simplejson
  5 | 
  6 | 
  7 | class SimpleDataStore(object):
  8 |     def __init__(self, db_path):
  9 |         self.db_path = db_path
 10 |         self.conn = None
 11 | 
 12 |     def open(self):
 13 |         if not self.conn:
 14 |             self.conn = sqlite3.connect(self.db_path)
 15 | 
 16 |     def close(self):
 17 |         if self.conn:
 18 |             self.conn.close()
 19 |             self.conn = None
 20 | 
 21 |     def execute(self, *args, **kwargs):
 22 |         self.open()
 23 |         c = self.conn.cursor()
 24 |         c.execute(*args, **kwargs)
 25 |         return c
 26 | 
 27 |     def commit(self):
 28 |         if self.conn:
 29 |             self.conn.commit()
 30 | 
 31 | 
 32 | class BotDataStore(object):
 33 |     def __init__(self, bot_name, database_path):
 34 |         # Create the path if it does not alreay exist
 35 |         if not os.path.exists(os.path.dirname(database_path)):
 36 |             os.makedirs(os.path.dirname(database_path))
 37 | 
 38 |         self.bot_name = bot_name
 39 |         self.database_path = database_path
 40 |         self.datastore = SimpleDataStore(self.database_path)
 41 |         self.create()
 42 | 
 43 |     def create(self):
 44 |         self.datastore.execute("""
 45 |             CREATE TABLE IF NOT EXISTS ignored_users (
 46 |                 bot_name TEXT,
 47 |                 target_name TEXT,
 48 |                 UNIQUE(bot_name, target_name) ON CONFLICT IGNORE
 49 |             );
 50 |             """)
 51 | 
 52 |         self.datastore.execute("""
 53 |             CREATE TABLE IF NOT EXISTS xkcd_comic_references (
 54 |                 comic_id INTEGER,
 55 |                 time INTEGER NOT NULL,
 56 |                 subreddit TEXT,
 57 |                 user TEXT,
 58 |                 link TEXT,
 59 |                 UNIQUE(comic_id, subreddit, user, link) ON CONFLICT IGNORE
 60 |             );
 61 |             """)
 62 | 
 63 |         self.datastore.execute("""
 64 |             CREATE TABLE IF NOT EXISTS xkcd_comic_meta (
 65 |                 comic_id INTEGER PRIMARY KEY,
 66 |                 json TEXT,
 67 |                 title TEXT,
 68 |                 hash_avg TEXT,
 69 |                 hash_d TEXT,
 70 |                 hash_p TEXT
 71 |             );
 72 |             """)
 73 | 
 74 |         self.datastore.execute("""
 75 |             CREATE VIEW IF NOT EXISTS references_counts AS
 76 |                 SELECT
 77 |                     comic_id,
 78 |                     0 AS comic_count,
 79 |                     0.0 AS comic_percentage
 80 |                 FROM
 81 |                     xkcd_comic_meta
 82 |                 WHERE comic_id NOT IN (
 83 |                     SELECT DISTINCT(comic_id) FROM xkcd_comic_references
 84 |                 )
 85 |                 UNION
 86 |                 SELECT
 87 |                     comic_id,
 88 |                     COUNT(*) AS comic_count,
 89 |                     (COUNT(*) * 100.0) / (SELECT COUNT(*) FROM xkcd_comic_references) AS comic_percentage
 90 |                 FROM
 91 |                     xkcd_comic_references
 92 |                 GROUP BY
 93 |                     comic_id
 94 |             ;
 95 |             """)
 96 | 
 97 |         self.datastore.commit()
 98 | 
 99 |     def add_ignore(self, target):
100 |         self.datastore.execute(
101 |             'INSERT INTO ignored_users VALUES(?, ?)',
102 |             (self.bot_name, target)
103 |         )
104 | 
105 |         self.datastore.commit()
106 | 
107 |     def get_ignores(self):
108 |         cursor = self.datastore.execute(
109 |             'SELECT target_name FROM ignored_users WHERE bot_name = ?',
110 |             (self.bot_name,)
111 |         )
112 | 
113 |         return [r[0] for r in cursor]
114 | 
115 |     def get_stats(self, comic_id):
116 |         cursor = self.datastore.execute(
117 |             'SELECT comic_count, comic_percentage FROM references_counts WHERE comic_id = ?',
118 |             (int(comic_id),)
119 |         )
120 | 
121 |         meta = cursor.fetchone()
122 |         if not meta:
123 |             return None
124 |         return {
125 |             'count': meta[0],
126 |             'percentage': meta[1]
127 |         }
128 | 
129 |     def insert_xkcd_event(self, comic_id, time, subreddit, user, link, from_external):
130 |         self.datastore.execute(
131 |             'INSERT INTO xkcd_comic_references VALUES(?, ?, ?, ?, ?)',
132 |             (int(comic_id), int(time), subreddit, user, link)
133 |         )
134 | 
135 |         self.datastore.commit()
136 | 
137 |     def get_xkcd_meta(self, comic_id):
138 |         cursor = self.datastore.execute(
139 |             'SELECT comic_id, json, hash_avg, hash_d, hash_p FROM xkcd_comic_meta WHERE comic_id = ?',
140 |             (int(comic_id),)
141 |         )
142 | 
143 |         meta = cursor.fetchone()
144 |         if not meta:
145 |             return None
146 |         return {
147 |             'comic_id': meta[0],
148 |             'json_data': simplejson.loads(meta[1]),
149 |             'hash_avg': meta[2],
150 |             'hash_d': meta[3],
151 |             'hash_p': meta[4],
152 |         }
153 | 
154 |     def insert_xkcd_meta(self, comic_id, json, hash_avg, hash_d, hash_p):
155 |         r = self.datastore.execute(
156 |             'SELECT 1 FROM xkcd_comic_meta WHERE comic_id = ?',
157 |             (int(comic_id),)
158 |         )
159 | 
160 |         if r.fetchone() is None:
161 |             self.datastore.execute(
162 |                 'INSERT INTO xkcd_comic_meta VALUES(?, ?, ?, ?, ?, ?)',
163 |                 (int(comic_id), simplejson.dumps(json), json.get('title', ''), str(hash_avg), str(hash_d), str(hash_p))
164 |             )
165 | 
166 |             self.datastore.commit()
167 | 
168 |     def close(self):
169 |         try:
170 |             self.datastore.close()
171 |         except Exception as e:
172 |             pass
173 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/xkcdref/runbot.py:
--------------------------------------------------------------------------------
 1 | # Must be first for monkey_patch()
 2 | from redditbot.base import patch_all
 3 | patch_all()
 4 | 
 5 | import logging
 6 | 
 7 | from redditbot.bots import settings
 8 | from redditbot.base.handlers import MultiBotHandler
 9 | from bot import SubmissionXkcdBot, CommentXkcdBot, MailXkcdBot, VoteXkcdBot
10 | from datastore import BotDataStore
11 | from xkcdfetcher import XkcdFetcher
12 | 
13 | logging.basicConfig()
14 | 
15 | 
16 | def run():
17 |     datastore = BotDataStore('xkcd_transcriber', settings.XKCD_DB_LOCATION)
18 |     xkcd_fetcher = XkcdFetcher(datastore)
19 | 
20 |     # If fetch_limit is set to None, it will keep on going back for hugely old submissions
21 |     submission_bot = SubmissionXkcdBot(user_agent='xkcdref bot (submission) by %s' % settings.AUTHOR,
22 |                                        auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'],
23 |                                        delay=20,
24 |                                        fetch_limit=300,
25 |                                        cache_size=600,
26 |                                        dry_run=settings.DRY_RUN,
27 |                                        subreddit='all',
28 |                                        datastore=datastore,
29 |                                        xkcd_fetcher=xkcd_fetcher)
30 | 
31 |     comment_bot = CommentXkcdBot(user_agent='xkcdref bot (comment) by %s' % settings.AUTHOR,
32 |                                  auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'],
33 |                                  delay=15,
34 |                                  fetch_limit=None,
35 |                                  cache_size=2000,
36 |                                  dry_run=settings.DRY_RUN,
37 |                                  subreddit='all',
38 |                                  datastore=datastore,
39 |                                  xkcd_fetcher=xkcd_fetcher)
40 | 
41 |     mail_bot = MailXkcdBot(user_agent='xkcdref bot (message) by %s' % settings.AUTHOR,
42 |                            auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'],
43 |                            delay=60,
44 |                            fetch_limit=None,
45 |                            cache_size=0,
46 |                            dry_run=settings.DRY_RUN,
47 |                            datastore=datastore,
48 |                            xkcd_fetcher=xkcd_fetcher)
49 | 
50 |     vote_bot = VoteXkcdBot(user_agent='xkcdref bot (vote) by %s' % settings.AUTHOR,
51 |                            auth=settings.REDDIT_ACCOUNTS['xkcd_transcriber'],
52 |                            delay=300,
53 |                            fetch_limit=None,
54 |                            cache_size=0,
55 |                            dry_run=settings.DRY_RUN,
56 |                            monitored_user='xkcd_transcriber',
57 |                            score_threshold_min=-1)
58 | 
59 |     # Run all bots
60 |     MultiBotHandler([
61 |         submission_bot,
62 |         comment_bot,
63 |         mail_bot,
64 |         vote_bot
65 |     ]).run()
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     run()
70 | 


--------------------------------------------------------------------------------
/bots/redditbot/bots/xkcdref/xkcdfetcher.py:
--------------------------------------------------------------------------------
  1 | import urllib2
  2 | import re
  3 | import logging
  4 | import urlparse
  5 | 
  6 | import simplejson
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | logger.setLevel(logging.INFO)
 10 | 
 11 | XKCD_JSON_API_URL = 'http://xkcd.com/{comic_id}/info.0.json'
 12 | XKCD_EXPLAINED_URL = 'https://www.explainxkcd.com/wiki/index.php/{comic_id}#Explanation'
 13 | 
 14 | 
 15 | class XkcdFetcher(object):
 16 |     def __init__(self, datastore):
 17 |         self.datastore = datastore
 18 |         self.next_index = 1
 19 |         self.reverse_image_index = {}
 20 |         self.reverse_hash_index = {}
 21 |         self.json_index = {}
 22 | 
 23 |     def get_json(self, url):
 24 |         if not url.startswith('http'):
 25 |             url = '//' + url
 26 |         parsed = urlparse.urlparse(url)
 27 | 
 28 |         if re.match('^(www\.)?imgs\.xkcd\.com$', parsed.netloc.lower()):
 29 |             if parsed.path not in self.reverse_image_index:
 30 |                 self._load_indexes()
 31 |             comic_id = self.reverse_image_index.get(parsed.path)
 32 |             return self.json_index.get(comic_id) if comic_id else None
 33 | 
 34 |         if re.match('^(www\.)?xkcd\.com$', parsed.netloc.lower()) and re.match('^/\d+/?$', parsed.path):
 35 |             m = re.search('^/(\d+)/?$', parsed.path)
 36 |             comic_id = int(m.group(1))
 37 |             if comic_id not in self.json_index:
 38 |                 self._load_indexes()
 39 |             return self.json_index.get(comic_id) if comic_id else None
 40 | 
 41 |         if re.match('^imgur\.com$', parsed.netloc):
 42 |             pass
 43 | 
 44 |         return None
 45 | 
 46 |     def get_explained_link(self, comic_id):
 47 |         return XKCD_EXPLAINED_URL.format(comic_id=comic_id)
 48 | 
 49 |     def _load_indexes(self):
 50 |         while True:
 51 |             # Get metadata
 52 |             meta = self._get_meta(self.next_index)
 53 |             if not meta:
 54 |                 return
 55 | 
 56 |             # comic_id -> json
 57 |             self.json_index[self.next_index] = meta['json_data']
 58 | 
 59 |             # image_url_path_part -> comic_id
 60 |             parsed = urlparse.urlparse(meta['json_data'].get('img', ''))
 61 |             if parsed.path and parsed.path not in self.reverse_image_index:
 62 |                 self.reverse_image_index[parsed.path] = self.next_index
 63 | 
 64 |             # avg_hash -> comic_id
 65 |             if meta['hash_avg'] and meta['hash_avg'] not in self.reverse_hash_index:
 66 |                 self.reverse_hash_index[meta['hash_avg']] = self.next_index
 67 | 
 68 |             self.next_index += 1
 69 | 
 70 |     def _get_meta(self, comic_id):
 71 |         meta = self.datastore.get_xkcd_meta(comic_id)
 72 |         if not meta:
 73 |             comic_id, json_data, hash_avg, hash_d, hash_p = self._build_xkcd_meta(comic_id)
 74 |             if comic_id is not None:
 75 |                 self.datastore.insert_xkcd_meta(comic_id, json_data, hash_avg, hash_d, hash_p)
 76 |                 meta = self.datastore.get_xkcd_meta(comic_id)
 77 |         return meta
 78 | 
 79 |     def _build_xkcd_meta(self, comic_id):
 80 |         j = self._get_xkcd_json(comic_id)
 81 |         if j:
 82 |             hash_avg, hash_d, hash_p = self._get_image_hashes(j.get('img'))
 83 |             return comic_id, j, hash_avg, hash_d, hash_p
 84 |         return None, None, None, None, None
 85 | 
 86 |     def _get_image_hashes(self, url):
 87 |         return '', '', ''
 88 | 
 89 |     """
 90 |     def _get_image_hashes(self, url):
 91 |         if not url:
 92 |             return '', '', ''
 93 |         file_name = '/tmp/' + get_random_file_name()
 94 |         try:
 95 |             self.myopener.retrieve(url, file_name)
 96 |             hash_avg = imagehash.average_hash(Image.open(file_name))
 97 |             hash_d = imagehash.dhash(Image.open(file_name))
 98 |             hash_p = imagehash.phash(Image.open(file_name))
 99 |             return str(hash_avg), str(hash_d), str(hash_p)
100 |         except Exception as e:
101 |             logger.exception('Exception while getting image hashes')
102 |             return '', '', ''
103 |         finally:
104 |             os.remove(file_name)
105 |     """
106 | 
107 |     def _get_xkcd_json(self, comic_id):
108 |         if int(comic_id) == 404:
109 |             return {'title': '404', 'transcript': '404', 'alt': '404', 'img': '', 'num': 404}
110 | 
111 |         try:
112 |             response = urllib2.urlopen(XKCD_JSON_API_URL.format(comic_id=comic_id))
113 |             html = response.read()
114 |             return simplejson.loads(html)
115 |         except Exception as e:
116 |             # logger.exception('Exception while getting xkcd json')
117 |             return None
118 | 


--------------------------------------------------------------------------------
/bots/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | setuptools.setup(
 4 |     name='redditbot.bots',
 5 |     version='1.0.0',
 6 |     author='Jeremy Simpson',
 7 |     description='Redditbot bots',
 8 |     license='MIT',
 9 |     classifiers=[
10 |         'Development Status :: 5 - Production/Stable',
11 |         'Intended Audience :: Developers',
12 |         'License :: OSI Approved :: MIT License',
13 |         'Programming Language :: Python :: 2.7',
14 |     ],
15 |     packages=setuptools.find_packages(),
16 |     namespace_packages=['redditbot'],
17 |     entry_points={
18 |         'console_scripts': [
19 |             'runbot-emote-counter = redditbot.bots.emote_counter.runbot:run',
20 |             'runbot-xkcdref = redditbot.bots.xkcdref.runbot:run'
21 |         ]
22 |     }
23 | )
24 | 


--------------------------------------------------------------------------------