├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt └── src ├── .gitignore ├── bot.py ├── filters.py ├── main.py ├── settings.py.example └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by http://www.gitignore.io 2 | 3 | ### Python ### 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # Installer logs 28 | pip-log.txt 29 | pip-delete-this-directory.txt 30 | 31 | # Unit test / coverage reports 32 | htmlcov/ 33 | .tox/ 34 | .coverage 35 | .cache 36 | nosetests.xml 37 | coverage.xml 38 | 39 | # Translations 40 | *.mo 41 | *.pot 42 | 43 | # Django stuff: 44 | *.log 45 | 46 | # Sphinx documentation 47 | docs/_build/ 48 | 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Michael Cetrulo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | xpost-bot 2 | ========= 3 | 4 | Reddit bot to scan and repost submissions of interest to niche subreddits 5 | 6 | Setup 7 | ----- 8 | 9 | * Install Python+pip (tested on Python 2.7.3) 10 | * Install deps with `pip install -r requirements.txt` 11 | * Copy src/settings.py.example to src/settings.py and fill-in 12 | * Run src/bot.py 13 | 14 | You'll need a user with a bit of karma to auth the bot for the posts to succeed. 15 | 16 | Settings 17 | -------- 18 | 19 | **user_agent** string to identify the bot on Reddit (REQUIRED) 20 | 21 | **bot_username** Reddit user from which the bot will post, e.g. 'Samus_' (REQUIRED) 22 | 23 | **bot_password** credentials for bot_username e.g. 'hunter2' (REQUIRED) 24 | 25 | **destination_subreddit** subreddit to repost matches, e.g. 'AskReddit' 26 | 27 | **title_template** template string to generate submission title, defaults to '{s.title}' which simply copies the original. `s` is an instance of https://praw.readthedocs.org/en/v2.1.16/pages/code_overview.html#praw.objects.Submission 28 | 29 | **matched_keywords**, **matched_regexps** list of strings/string-regexps that the submission must contain/match (any of those) in order to be considered for repost 30 | 31 | **excluded_keywords**, **excluded_regexps** list of strings/string-regexps that the submission must not contain/match (none of those) in order to be considered for repost, takes precedence over matched 32 | 33 | **target_subreddits** list of subreddits to scan for matches, e.g. \['python', 'django'] (defaults to \['all'] meaning the whole site) 34 | 35 | **ignored_subreddits** list of subreddits to exclude from matches (destination_subreddit is automatically ignored) 36 | 37 | **ignored_submitters** list of Reddit usernames for users whose submissions won't be reposted 38 | 39 | TODO 40 | ---- 41 | 42 | * Handle connection errors 43 | * Stop gracefully 44 | * Add logging messages 45 | 46 | Sophisticated Cat 47 | 48 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse==1.2.1 2 | praw==2.1.16 3 | requests==2.3.0 4 | six==1.6.1 5 | update-checker==0.10 6 | wsgiref==0.1.2 7 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | /settings.py 2 | -------------------------------------------------------------------------------- /src/bot.py: -------------------------------------------------------------------------------- 1 | import praw 2 | 3 | from filters import SubmissionFilter 4 | 5 | 6 | __version__ = '0.1.1' 7 | 8 | 9 | class XPostBot(object): 10 | """ XPostBot - Reddit bot to scan relevant content and repost on specialized subreddits """ 11 | 12 | def __init__(self, settings): 13 | """ create instance of PRAW bot and store settigns """ 14 | self.settings = settings 15 | 16 | full_user_agent = '{user_agent} -- {source} by {author} v{version}'.format( 17 | user_agent=self.settings['user_agent'], 18 | source='github.com/git2samus/xpost-bot', 19 | author='/u/Samus_', 20 | version=__version__, 21 | ) 22 | # disable internal cache so praw blocks until it's time to make a new request 23 | self.bot = praw.Reddit(user_agent=full_user_agent, cache_timeout=0) 24 | 25 | def _submissions_gen(self, target_subreddits, newer_than_id=None): 26 | """ internal generator for submissions that match the search critetia from settings """ 27 | # construct multireddit url to query from list of target subreddits 28 | target_multireddit = self.bot.get_subreddit('+'.join(target_subreddits)) 29 | 30 | # if there's no anchor get the current newest and continue from there 31 | if newer_than_id is None: 32 | submissions = target_multireddit.get_new(limit=1) 33 | 34 | try: 35 | newer_than_id = next(submissions).name 36 | except StopIteration: 37 | # if it's an empty subreddit keep it as None 38 | # brings everything that gets posted from here until the next request 39 | pass 40 | 41 | next_anchor_id = None 42 | 43 | while True: 44 | # uses "before" API param because /new lists from newest to oldest 45 | # limit=None brings everything that matches (even if it takes multiple requests) 46 | submissions = target_multireddit.get_new(params={'before': newer_than_id}, limit=None) 47 | 48 | for submission in submissions: 49 | yield submission 50 | 51 | # save the first id as anchor since it'll be the newest 52 | if next_anchor_id is None: 53 | next_anchor_id = submission.name 54 | 55 | # avoid getting newer_than_id=None when there's no results from the previous iterations 56 | if next_anchor_id is not None: 57 | newer_than_id, next_anchor_id = next_anchor_id, None 58 | 59 | def is_logged_in(self): 60 | """ proxy to PRAW is_logged_in flag """ 61 | return self.bot.is_logged_in() 62 | 63 | def login(self): 64 | """ perform login to reddit using user-credentials (not OAuth) """ 65 | bot_username = self.settings['bot_username'] 66 | bot_password = self.settings['bot_password'] 67 | 68 | self.bot.login(bot_username, bot_password) 69 | 70 | def get_submissions(self, newer_than_id=None): 71 | """ main method to retrieve matching submissions (yields indefinitely) """ 72 | target_subreddits = self.settings['target_subreddits'] 73 | submissions_gen = self._submissions_gen(target_subreddits, newer_than_id) 74 | 75 | submission_filter = SubmissionFilter(self.settings) 76 | return submission_filter.filter_stream(submissions_gen) 77 | 78 | def repost_submission(self, submission): 79 | subreddit = self.bot.get_subreddit(self.settings['destination_subreddit']) 80 | 81 | title = self.settings['title_template'].format(s=submission) 82 | try: 83 | return subreddit.submit( 84 | title, url=submission.permalink, raise_captcha_exception=True 85 | ) 86 | except praw.errors.AlreadySubmitted: 87 | pass 88 | 89 | -------------------------------------------------------------------------------- /src/filters.py: -------------------------------------------------------------------------------- 1 | class SubmissionFilter(object): 2 | """ methods to determine whether a submission is relevant for reposting """ 3 | 4 | def __init__(self, settings): 5 | """ store settings """ 6 | self.settings = settings 7 | 8 | def _is_valid_submitter(self, submission): 9 | """ returns True when the submission isn't from an ignored submitter or the submitter deleted his account """ 10 | ignored_submitters = self.settings['ignored_submitters'] 11 | 12 | if not submission.author: 13 | return True # allow [deleted] 14 | 15 | author_name = submission.author.name.lower() 16 | return author_name not in ignored_submitters 17 | 18 | def _is_valid_subreddit(self, submission): 19 | """ returns True when the submission hasn't been done on an ignored subreddit """ 20 | ignored_subreddits = self.settings['ignored_subreddits'] 21 | 22 | subreddit_name = submission.subreddit.display_name.lower() 23 | return subreddit_name not in ignored_subreddits 24 | 25 | def _test_text(self, text, target_keywords=None, target_regexps=None): 26 | """ returns True when 'text' contains any of the target_keywords or matches any of the target_regexps """ 27 | target_keywords = [] if target_keywords is None else target_keywords 28 | target_regexps = [] if target_regexps is None else target_regexps 29 | 30 | text = text.lower() 31 | return any( 32 | keyword in text for keyword in target_keywords 33 | ) or any( 34 | regexp.search(text) for regexp in target_regexps 35 | ) 36 | 37 | def _test_matches(self, submission): 38 | """ returns True when the submission's title or selftext contains any of the matched_keywords or matches any of the matched_regexps """ 39 | matched_keywords = self.settings['matched_keywords'] 40 | matched_regexps = self.settings['matched_regexps'] 41 | 42 | match = self._test_text(submission.title, matched_keywords, matched_regexps) 43 | if not match and submission.is_self: 44 | match = self._test_text(submission.selftext, matched_keywords, matched_regexps) 45 | 46 | return match 47 | 48 | def _test_exclusions(self, submission): 49 | """ returns True when the submission's title or selftext contains any of the excluded_keywords or matches any of the excluded_regexps """ 50 | excluded_keywords = self.settings['excluded_keywords'] 51 | excluded_regexps = self.settings['excluded_regexps'] 52 | 53 | excluded = self._test_text(submission.title, excluded_keywords, excluded_regexps) 54 | if not excluded and submission.is_self: 55 | excluded = self._test_text(submission.selftext, excluded_keywords, excluded_regexps) 56 | 57 | return excluded 58 | 59 | def filter_submission(self, submission): 60 | """ determine whether this submission should be filtered or not, returns True when the sumission: 61 | -isn't from an ignored submitter 62 | -hasn't been posted on an ignored subreddit 63 | -contains any of the matched_keywords or matches any of the matched_regexps 64 | -doesn't contain any of the excluded_keywords or matches any of the excluded_regexps 65 | """ 66 | return all(( 67 | self._is_valid_submitter(submission), 68 | self._is_valid_subreddit(submission), 69 | self._test_matches(submission), 70 | not self._test_exclusions(submission), 71 | )) 72 | 73 | def filter_stream(self, stream): 74 | """ apply self.filter_submission to each element of the stream """ 75 | for submission in stream: 76 | if self.filter_submission(submission): 77 | yield submission 78 | 79 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from datetime import datetime 4 | 5 | from bot import XPostBot 6 | from utils import clean_settings 7 | import settings as raw_settings 8 | 9 | 10 | if __name__ == '__main__': 11 | settings = clean_settings(raw_settings) 12 | 13 | xpost_bot = XPostBot(settings) 14 | xpost_bot.login() # logged-in users get fewer cached responses from Reddit 15 | 16 | newer_than_id = sys.argv[1] if len(sys.argv) > 1 else None 17 | for submission in xpost_bot.get_submissions(newer_than_id): 18 | print('[{}] {} - {}'.format(datetime.now().isoformat(), submission.created_utc, submission)) 19 | print(xpost_bot.repost_submission(submission)) 20 | 21 | -------------------------------------------------------------------------------- /src/settings.py.example: -------------------------------------------------------------------------------- 1 | ## example settings file ## 2 | # copy this file to 'settings.py' and fill the blanks 3 | 4 | user_agent = '' # REQUIRED: bot identity (unique) 5 | 6 | bot_username = '' # REQUIRED: Reddit user from which the bot will post 7 | bot_password = '' # REQUIRED: credentials for bot_username 8 | 9 | destination_subreddit = '' # subreddit to repost matches 10 | title_template = '{s.title}' # template string to generate submission title 11 | # 's' is https://praw.readthedocs.org/en/v2.1.16/pages/code_overview.html#praw.objects.Submission 12 | 13 | # posts containing any of the matched_keywords/regexps and none of the excluded_keywords/regexps will be reposted to destination_subreddit 14 | matched_keywords, matched_regexps = [], [] 15 | excluded_keywords, excluded_regexps = [], [] 16 | 17 | target_subreddits = [] # subreddits to scan for matches (defaults to ['all']) 18 | ignored_subreddits = [] # subreddits not to consider (destination_subreddit is automatically ignored) 19 | ignored_submitters = [] # Reddit usernames whose submissions won't be reposted 20 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def clean_subreddit(raw_subreddit): 5 | #TODO URL to subreddit 6 | return raw_subreddit 7 | 8 | 9 | def clean_username(raw_username): 10 | #TODO URL to username 11 | return raw_username 12 | 13 | 14 | def clean_settings(raw_settings): 15 | """ return a normalized settings dict from settings module """ 16 | clean_settings = {} 17 | 18 | user_agent = getattr(raw_settings, 'user_agent', None) 19 | if not user_agent: 20 | raise Exception('Missing user_agent in settings') 21 | clean_settings['user_agent'] = user_agent 22 | 23 | bot_username = getattr(raw_settings, 'bot_username', None) 24 | if not bot_username: 25 | raise Exception('Missing bot_username in settings') 26 | clean_settings['bot_username'] = bot_username 27 | 28 | bot_password = getattr(raw_settings, 'bot_password', None) 29 | if not bot_password: 30 | raise Exception('Missing bot_password in settings') 31 | clean_settings['bot_password'] = bot_password 32 | 33 | destination_subreddit = getattr(raw_settings, 'destination_subreddit', None) 34 | if not destination_subreddit: 35 | raise Exception('Missing destination_subreddit in settings') 36 | clean_settings['destination_subreddit'] = clean_subreddit(destination_subreddit) 37 | 38 | clean_settings['title_template'] = unicode(getattr(raw_settings, 'title_template', '{s.title}')) 39 | 40 | matched_keywords = getattr(raw_settings, 'matched_keywords', []) 41 | if isinstance(matched_keywords, basestring): 42 | matched_keywords = [matched_keywords] 43 | clean_settings['matched_keywords'] = [keyword.lower() for keyword in matched_keywords] 44 | 45 | matched_regexps = getattr(raw_settings, 'matched_regexps', []) 46 | if isinstance(matched_regexps, basestring): 47 | matched_regexps = [matched_regexps] 48 | clean_settings['matched_regexps'] = [re.compile(regexp, re.I) for regexp in matched_regexps] 49 | 50 | excluded_keywords = getattr(raw_settings, 'excluded_keywords', []) 51 | if isinstance(excluded_keywords, basestring): 52 | excluded_keywords = [excluded_keywords] 53 | clean_settings['excluded_keywords'] = [keyword.lower() for keyword in excluded_keywords] 54 | 55 | excluded_regexps = getattr(raw_settings, 'excluded_regexps', []) 56 | if isinstance(excluded_regexps, basestring): 57 | excluded_regexps = [excluded_regexps] 58 | clean_settings['excluded_regexps'] = [re.compile(regexp, re.I) for regexp in excluded_regexps] 59 | 60 | target_subreddits = getattr(raw_settings, 'target_subreddits', []) 61 | if not target_subreddits: 62 | target_subreddits.append('all') 63 | elif isinstance(target_subreddits, basestring): 64 | target_subreddits = [target_subreddits] 65 | clean_settings['target_subreddits'] = [clean_subreddit(subreddit) for subreddit in target_subreddits] 66 | 67 | ignored_subreddits = getattr(raw_settings, 'ignored_subreddits', []) 68 | if isinstance(ignored_subreddits, basestring): 69 | ignored_subreddits = [ignored_subreddits] 70 | clean_settings['ignored_subreddits'] = [clean_subreddit(subreddit) for subreddit in ignored_subreddits] 71 | clean_settings['ignored_subreddits'].append(clean_settings['destination_subreddit']) 72 | 73 | ignored_submitters = getattr(raw_settings, 'ignored_submitters', []) 74 | if isinstance(ignored_submitters, basestring): 75 | ignored_submitters = [ignored_submitters] 76 | clean_settings['ignored_submitters'] = [clean_username(username) for username in ignored_submitters] 77 | 78 | return clean_settings 79 | 80 | --------------------------------------------------------------------------------