├── cssirlbot ├── __init__.py ├── submissionhistory.py ├── validation.py ├── formatting.py └── processing.py ├── .gitignore ├── requirements.txt ├── .gitattributes ├── README.md ├── config.json └── cssirlbot.py /cssirlbot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | praw.ini 2 | __pycache__ 3 | cssirlbot.log 4 | processedsubmissions.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | praw 3 | git+https://github.com/lepture/mistune 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /cssirlbot/submissionhistory.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | # get processed submissions 4 | processed_submissions = [] 5 | if os.path.isfile("processedsubmissions.txt"): 6 | with open("processedsubmissions.txt", "r") as file: 7 | for line in file: 8 | processed_submissions.append(line[:-1]) 9 | 10 | # mark processed submissions as such 11 | def mark_as_processed(submission): 12 | # prevent duplicates 13 | if submission.id in processed_submissions: 14 | return 15 | 16 | # add to list 17 | processed_submissions.append(submission.id) 18 | 19 | # add to file 20 | with open("processedsubmissions.txt", "a") as file: 21 | file.write(submission.id + "\n") 22 | 23 | # check if submission has been processed 24 | def is_processed(submission): 25 | return submission.id in processed_submissions -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # css-irl-bot 2 | The CSS_IRL bot is a reddit bot that fetches new submissions from a subreddit and parses their titles to check if they are valid CSS. It can post a comment detailing the found errors and/or confirm the validity of the title. It can also parse comments when summoned. 3 | 4 | 5 | 6 | ## Configuration 7 | The bot can be configured through a file named **config.json** as well as **praw.ini**. 8 | 9 | In config.json, you can configure the following settings: 10 | 11 | * `subreddit`: The subreddit where the bot operates. 12 | * `process_submissions`: Process posts. 13 | * `comment_on_invalid_css`: Automatically comment on invalid titles. 14 | * `comment_on_valid_css`: Automatically comment on valid titles. 15 | * `distinguish_comments`: Mark comments as mod comments (requires the "posts" mod permission). 16 | * `sticky_comments`: Sticky comments when processing posts. 17 | * `process_mentions`: Process username mentions. 18 | * `process_external_mentions`: Process mentions outside the home subreddit. 19 | * `feed_check_interval`: How often the bot should check for new posts. 20 | * `logging_level`: How detailed the logs should be. 21 | 22 | You can also manage the strings used by the bot and create or remove aliases for its commands. 23 | 24 | The praw.ini file describes the credentials used by the bot. It has the following format: 25 | ```ini 26 | [cssirlbot] 27 | client_id=XXX 28 | client_secret=XXX 29 | password=XXX 30 | username=XXX 31 | ``` 32 | The client ID and secret can be obtained by [registering a reddit app](https://reddit.com/prefs/apps). 33 | 34 | ## Running the bot 35 | 36 | Download the files and prepare your configuration. Before running the bot, you might want to run `pip install -r requirements.txt` to obtain the dependencies. To start the script, run: 37 | 38 | ``` 39 | python3 cssirlbot.py 40 | ``` 41 | 42 | The bot will try to fetch the newest 100 submissions, mentions and comment replies and parse all of them. 43 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "behavior": { 3 | "subreddit": "css_irl_bot", 4 | "process_submissions": true, 5 | "comment_on_invalid_css": true, 6 | "comment_on_valid_css": true, 7 | "distinguish_comments": false, 8 | "sticky_comments": false, 9 | "process_mentions": true, 10 | "process_external_mentions": false 11 | }, 12 | "internal": { 13 | "feed_check_interval": 10, 14 | "logging_level": "INFO" 15 | }, 16 | "strings": { 17 | "VALID_TITLE_MESSAGE": "Congratulations! Your title contains valid CSS!", 18 | "VALID_TITLE_MESSAGE_FOREIGN": "Hurray! The title of this submission by /u/{author} contains valid CSS!", 19 | "INVALID_TITLE_MESSAGE_HEAD": "Your title contains the following CSS errors:\n\n", 20 | "INVALID_TITLE_MESSAGE_HEAD_FOREIGN": "The title of this submission by /u/{author} contains the following CSS errors:\n\n", 21 | 22 | "CSS_SOURCE_BLOCK": "I detected the following code in the preformatted text:\n\n", 23 | "CSS_SOURCE_INLINE": "I detected the following code in the inline code tags:\n\n", 24 | "CSS_SOURCE_BODY": "I didn't see any code, so I parsed the entire comment.\n\n", 25 | 26 | "VALID_COMMENT_MESSAGE": "Congratulations! Your comment contains valid CSS!", 27 | "VALID_COMMENT_MESSAGE_FOREIGN": "Hurray! /u/{author}'s comment contains valid CSS!", 28 | "INVALID_COMMENT_MESSAGE_HEAD": "Your comment contains the following CSS errors:\n\n", 29 | "INVALID_COMMENT_MESSAGE_HEAD_FOREIGN": "/u/{author}'s comment contains the following CSS errors:\n\n", 30 | 31 | "INVALID_CSS_ERROR": "* Line {lastLine}, column {lastColumn}, **{type}**: `{message}`\n", 32 | "INVALID_CSS_TAIL": "\nAll I know is linting rules, sorry if I missed the joke!", 33 | 34 | "FOOTNOTE": "\n\n*****\n\n^^I'm ^^a ^^bot ^^who ^^validates ^^your ^^titles. ^^[author](https://reddit.com/message/compose/?to=Lachcim) ^^[about](http://redd.it/bpvzqj/) ^^[summon](https://redd.it/e1p0sv/) ^^[source](https://github.com/Lachcim/css-irl-bot)", 35 | "POSTCARD": " ^^Come ^^visit ^^us ^^at ^^/r/css_irl!" 36 | }, 37 | "commands": { 38 | "parse_this": ["this", "me", "myself"], 39 | "parse_parent": ["", "parent"], 40 | "parse_op": ["op"] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /cssirlbot/validation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | def validate_query(css): 5 | # send query to w3c for direct validation, return none on network error 6 | try: 7 | result = requests.post( 8 | "https://validator.w3.org/nu/", 9 | headers={"User-Agent": "cssirlbot"}, 10 | files={ 11 | "css": (None, "yes"), 12 | "out": (None, "json"), 13 | "content": (None, css), 14 | "useragent": (None, "cssirlbot"), 15 | "showsource": (None, "yes") 16 | } 17 | ) 18 | except: 19 | return None 20 | 21 | # return true if there are no messages, return none on wrong status code, otherwise return false 22 | if result.status_code == 200: 23 | parsed_result = json.loads(result.text) 24 | 25 | return len(parsed_result["messages"]) == 0, parsed_result["messages"] 26 | else: 27 | return None 28 | 29 | def validate_text(title): 30 | # check title as it is 31 | result, errors = validate_query(title) 32 | 33 | # finish validation on success or network error 34 | if result != False: 35 | return result, errors 36 | 37 | # finish validation if there was no parse error 38 | if not is_parse_error(errors): 39 | return result, errors 40 | 41 | # if there was a parse error, retry validation with dummy selector wrapped around 42 | new_result, new_errors = validate_query("*{" + title + "}") 43 | 44 | # if new query resulted in a single parse error, return the old error 45 | if is_parse_error(new_errors): 46 | return result, errors 47 | 48 | # otherwise return the new result 49 | return new_result, new_errors 50 | 51 | def is_parse_error(errors): 52 | # check whether an error list only contains a single parse error 53 | 54 | parse_error = False 55 | non_parse_error = False 56 | 57 | for error in errors: 58 | if "Parse Error." in error["message"]: 59 | parse_error = True 60 | else: 61 | non_parse_error = True 62 | 63 | return parse_error and not non_parse_error 64 | -------------------------------------------------------------------------------- /cssirlbot/formatting.py: -------------------------------------------------------------------------------- 1 | def format_title_success_string(config, foreign, author, external): 2 | message = "" 3 | 4 | # add message addressed to op or not 5 | if not foreign: 6 | message += config["strings"]["VALID_TITLE_MESSAGE"] 7 | else: 8 | message += config["strings"]["VALID_TITLE_MESSAGE_FOREIGN"].format(author=author) 9 | 10 | # add universal footnote 11 | message += config["strings"]["FOOTNOTE"] 12 | 13 | # if external, add postcard 14 | if external: 15 | message += config["strings"]["POSTCARD"] 16 | 17 | return message 18 | 19 | def format_title_error_string(errors, config, foreign, author, external): 20 | message = "" 21 | 22 | # add message addressed to op or not 23 | if not foreign: 24 | message += config["strings"]["INVALID_TITLE_MESSAGE_HEAD"] 25 | else: 26 | message += config["strings"]["INVALID_TITLE_MESSAGE_HEAD_FOREIGN"].format(author=author) 27 | 28 | # list errors 29 | for error in errors: 30 | # protection against markdown injection, no way to escape the grave accent 31 | error["message"] = error["message"].replace("`", "'") 32 | 33 | message += config["strings"]["INVALID_CSS_ERROR"].format(**error) 34 | 35 | # add universal error tail and footnote 36 | message += config["strings"]["INVALID_CSS_TAIL"] 37 | message += config["strings"]["FOOTNOTE"] 38 | 39 | # if external, add postcard 40 | if external: 41 | message += config["strings"]["POSTCARD"] 42 | 43 | return message 44 | 45 | def format_comment_success_string(source, css, config, foreign, author, external): 46 | message = "" 47 | 48 | # state the source 49 | message += config["strings"]["CSS_SOURCE_" + source.upper()] 50 | 51 | # print the source 52 | if source != "body": 53 | message += (" " + css).replace("\n", "\n ") + "\n" 54 | 55 | # add message addressed to op or not 56 | if not foreign: 57 | message += config["strings"]["VALID_COMMENT_MESSAGE"] 58 | else: 59 | message += config["strings"]["VALID_COMMENT_MESSAGE_FOREIGN"].format(author=author) 60 | 61 | # add universal footnote 62 | message += config["strings"]["FOOTNOTE"] 63 | 64 | # if external, add postcard 65 | if external: 66 | message += config["strings"]["POSTCARD"] 67 | 68 | return message 69 | 70 | def format_comment_error_string(source, css, errors, config, foreign, author, external): 71 | message = "" 72 | 73 | # state the source 74 | message += config["strings"]["CSS_SOURCE_" + source.upper()] 75 | 76 | # print the source 77 | if source != "body": 78 | message += (" " + css).replace("\n", "\n ") + "\n" 79 | 80 | # add message addressed to op or not 81 | if not foreign: 82 | message += config["strings"]["INVALID_COMMENT_MESSAGE_HEAD"] 83 | else: 84 | message += config["strings"]["INVALID_COMMENT_MESSAGE_HEAD_FOREIGN"].format(author=author) 85 | 86 | # list errors 87 | for error in errors: 88 | # protection against markdown injection, no way to escape the grave accent 89 | error["message"] = error["message"].replace("`", "'") 90 | 91 | message += config["strings"]["INVALID_CSS_ERROR"].format(**error) 92 | 93 | # add universal error tail and footnote 94 | message += config["strings"]["INVALID_CSS_TAIL"] 95 | message += config["strings"]["FOOTNOTE"] 96 | 97 | # if external, add postcard 98 | if external: 99 | message += config["strings"]["POSTCARD"] 100 | 101 | return message 102 | -------------------------------------------------------------------------------- /cssirlbot.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import sys 5 | import threading 6 | import traceback 7 | import praw 8 | import cssirlbot.processing 9 | import cssirlbot.submissionhistory 10 | 11 | # get config 12 | with open("config.json") as file: 13 | config = json.loads(file.read()) 14 | 15 | # configure logging to file 16 | logging.basicConfig(filename="cssirlbot.log", 17 | filemode="a", 18 | format="%(asctime)s %(levelname)s %(message)s") 19 | logging.getLogger().setLevel(config["internal"]["logging_level"]) 20 | 21 | # also log to stdout 22 | stdout_handler = logging.StreamHandler(sys.stdout) 23 | stdout_handler.setLevel(config["internal"]["logging_level"]) 24 | stdout_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s")) 25 | logging.getLogger().addHandler(stdout_handler) 26 | 27 | # parse arguments 28 | argParser = argparse.ArgumentParser() 29 | argParser.add_argument("--dry-run", 30 | dest="dry_run", 31 | action="store_true", 32 | help="collect available posts but don't process them") 33 | args = argParser.parse_args() 34 | 35 | def work(): 36 | try: 37 | logging.info("Checking for new submissions") 38 | 39 | # check target subreddit for new submissions 40 | if config["behavior"]["process_submissions"]: 41 | for submission in subreddit.new(): 42 | # ignore processed submissions 43 | if cssirlbot.submissionhistory.is_processed(submission): 44 | continue 45 | 46 | logging.info("New submission found: http://redd.it/" + submission.id) 47 | 48 | if args.dry_run: 49 | cssirlbot.submissionhistory.mark_as_processed(submission) 50 | logging.info("Collected.") 51 | continue 52 | 53 | # if error occurred during processing, abandon processing session 54 | if not cssirlbot.processing.process_submission(submission, config): 55 | break 56 | 57 | # check username mentions 58 | if config["behavior"]["process_mentions"]: 59 | for mention in reddit.inbox.mentions(): 60 | # ignore processed comments 61 | if cssirlbot.submissionhistory.is_processed(mention): 62 | continue 63 | 64 | logging.info("New mention found: https://reddit.com/r/all/comments/" + mention.submission.id + "/" + mention.id) 65 | 66 | if args.dry_run: 67 | cssirlbot.submissionhistory.mark_as_processed(mention) 68 | logging.info("Collected.") 69 | continue 70 | 71 | # abandon session on error 72 | if not cssirlbot.processing.process_comment(mention, config, reddit): 73 | break 74 | 75 | # comment replies have to be handled separately 76 | for mention in reddit.inbox.comment_replies(): 77 | # ignore processed comments 78 | if cssirlbot.submissionhistory.is_processed(mention): 79 | continue 80 | 81 | logging.info("New comment reply found: https://reddit.com/r/all/comments/" + mention.submission.id + "/" + mention.id) 82 | 83 | if args.dry_run: 84 | cssirlbot.submissionhistory.mark_as_processed(mention) 85 | logging.info("Collected.") 86 | continue 87 | 88 | # abandon session on error 89 | if not cssirlbot.processing.process_comment(mention, config, reddit): 90 | break 91 | except: 92 | logging.error("Error in main loop: ") 93 | logging.info(traceback.format_exc()) 94 | 95 | # restart this function after the configured interval 96 | threading.Timer(config["internal"]["feed_check_interval"], work).start() 97 | 98 | logging.info("Bot starting") 99 | 100 | reddit = praw.Reddit("cssirlbot", user_agent="linux:cssirlbot:1 (by /u/Lachcim)") 101 | subreddit = reddit.subreddit(config["behavior"]["subreddit"]) 102 | 103 | logging.info("Bot online") 104 | 105 | work() 106 | -------------------------------------------------------------------------------- /cssirlbot/processing.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import traceback 4 | import mistune 5 | import praw 6 | import cssirlbot.submissionhistory 7 | import cssirlbot.validation 8 | import cssirlbot.formatting 9 | 10 | def process_submission(submission, config, reply_target=None): 11 | # get config 12 | comment_on_valid = config["behavior"]["comment_on_valid_css"] 13 | comment_on_invalid = config["behavior"]["comment_on_invalid_css"] 14 | distinguish_comments = config["behavior"]["distinguish_comments"] 15 | sticky_comments = config["behavior"]["sticky_comments"] 16 | 17 | # validate submission 18 | result, errors = cssirlbot.validation.validate_text(submission.title) 19 | 20 | # signal failure if needed 21 | if result == None: 22 | logging.error("Error while validating") 23 | return False 24 | 25 | try: 26 | # reply to submission 27 | home_subreddit = config["behavior"]["subreddit"] 28 | foreign = reply_target is not None 29 | reply_target = reply_target or submission 30 | author = submission.author.name if submission.author else "[deleted]" 31 | external = reply_target.subreddit.display_name != home_subreddit 32 | 33 | if result == True and (comment_on_valid or foreign): 34 | comment = reply_target.reply(cssirlbot.formatting.format_title_success_string(config, foreign, author, external)) 35 | elif result == False and (comment_on_invalid or foreign): 36 | comment = reply_target.reply(cssirlbot.formatting.format_title_error_string(errors, config, foreign, author, external)) 37 | 38 | # distinguish comment 39 | if distinguish_comments: 40 | try: 41 | comment.mod.distinguish(how="yes", sticky=sticky_comments) 42 | except: 43 | pass 44 | 45 | # mark submission as processed 46 | cssirlbot.submissionhistory.mark_as_processed(submission) 47 | 48 | logging.info("Processed!") 49 | return True 50 | except praw.exceptions.APIException as e: 51 | return handle_error(submission, e) 52 | 53 | def handle_error(submission, e): 54 | if e.error_type == "RATELIMIT": 55 | # rate limit reached, stop processing and wait for next batch 56 | logging.warning("Rate limit reached") 57 | return False 58 | elif e.error_type in ["TOO_OLD", "THREAD_LOCKED"]: 59 | # prevent bot from processing this submission again 60 | cssirlbot.submissionhistory.mark_as_processed(submission) 61 | 62 | logging.info("Post cannot be replied to") 63 | return True 64 | else: 65 | # other error 66 | logging.warning("Error processing submission") 67 | logging.info(traceback.format_exc()) 68 | return True 69 | 70 | def process_comment(comment, config, reddit): 71 | # get config 72 | home_subreddit = config["behavior"]["subreddit"] 73 | process_external = config["behavior"]["process_external_mentions"] 74 | distinguish_comments = config["behavior"]["distinguish_comments"] 75 | 76 | # exclude external mentions if configured as such 77 | if comment.subreddit.display_name != home_subreddit and not process_external: 78 | logging.info("Ignoring external mention") 79 | cssirlbot.submissionhistory.mark_as_processed(comment) # prevent reprocessing 80 | return True 81 | 82 | # get command used in mention 83 | command = get_command(comment.body, config, reddit.user.me().name) 84 | 85 | # handle invalid commands 86 | if not command: 87 | logging.info("Mention doesn't contain a valid command") 88 | # mark comment as handled 89 | cssirlbot.submissionhistory.mark_as_processed(comment) 90 | return True 91 | 92 | # handle parsing the op 93 | if command == "parse_op" or (command == "parse_parent" and comment.parent_id.startswith("t3_")): 94 | # obtain op submission, trim prefix 95 | op = reddit.submission(id=comment.parent_id[3:]) 96 | 97 | # don't parse op if already parsed 98 | if cssirlbot.submissionhistory.is_processed(op): 99 | logging.info("Mention points to already parsed post") 100 | # mark comment as handled 101 | cssirlbot.submissionhistory.mark_as_processed(comment) 102 | return True 103 | 104 | # delegate submission handling to standard function 105 | result = process_submission(op, config, comment) 106 | if result: 107 | # mark comment as handled on success 108 | cssirlbot.submissionhistory.mark_as_processed(comment) 109 | 110 | return result 111 | 112 | # find css and validate it 113 | css_origin = comment if command == "parse_this" else reddit.comment(id=comment.parent_id[3:]) 114 | css, css_source = find_css(css_origin.body) 115 | result, errors = cssirlbot.validation.validate_text(css) 116 | 117 | # signal failure if needed 118 | if result == None: 119 | logging.error("Error while validating") 120 | return False 121 | 122 | try: 123 | # reply to comment 124 | home_subreddit = config["behavior"]["subreddit"] 125 | foreign = command == "parse_parent" 126 | external = comment.subreddit.display_name != home_subreddit 127 | 128 | # get comment author, account for deleted users 129 | if css_origin.author: 130 | author = css_origin.author.name 131 | else: 132 | author = "[deleted]" 133 | 134 | if result == True: 135 | new_comment = comment.reply(cssirlbot.formatting.format_comment_success_string(css_source, css, config, foreign, author, external)) 136 | else: 137 | new_comment = comment.reply(cssirlbot.formatting.format_comment_error_string(css_source, css, errors, config, foreign, author, external)) 138 | 139 | # distinguish comment 140 | if distinguish_comments: 141 | try: 142 | new_comment.mod.distinguish(how="yes") 143 | except: 144 | pass 145 | 146 | # mark comments as processed 147 | cssirlbot.submissionhistory.mark_as_processed(comment) 148 | if foreign: 149 | cssirlbot.submissionhistory.mark_as_processed(css_origin) 150 | 151 | logging.info("Processed!") 152 | return True 153 | except praw.exceptions.APIException as e: 154 | return handle_error(comment, e) 155 | 156 | def get_command(body, config, username): 157 | # find valid command calls 158 | expression = re.compile("^/?u/" + username + "/?\s*(\S*)\s*$", re.MULTILINE | re.IGNORECASE) 159 | matches = re.findall(expression, body) 160 | 161 | # find first valid command 162 | for match in matches: 163 | for command, keywords in config["commands"].items(): 164 | if match.lower() in keywords: 165 | return command 166 | 167 | # return none on failure 168 | return None 169 | 170 | def find_css(body): 171 | # parse markdown 172 | md = mistune.create_markdown() 173 | # alternative algorithm: disable fenced code 174 | # see https://github.com/Lachcim/css-irl-bot/issues/5 for discussion 175 | # md.block.rules.remove("fenced_code") 176 | html = md(body) 177 | 178 | # it is known that when one parses html with regex, zalgo sings the song 179 | # that ends the world. in this case, however, the html produced by mistune 180 | # can be assumed to be regular and therefore parseable using regex. 181 | 182 | # find code blocks 183 | # expression = re.compile("
(.*?)
", re.DOTALL) 184 | expression = re.compile("
]*>(.*?)
", re.DOTALL) 185 | css = "".join(re.findall(expression, html)) 186 | if css: 187 | return css, "block" 188 | 189 | # if the above failed, find inline code 190 | expression = re.compile("(.*?)", re.DOTALL) 191 | css = "\n".join(re.findall(expression, html)) 192 | if css: 193 | return css, "inline" 194 | 195 | # if all failed, parse the entire comment 196 | return body, "body" 197 | --------------------------------------------------------------------------------