├── cssirlbot
├── __init__.py
├── submissionhistory.py
├── validation.py
├── formatting.py
└── processing.py
├── .gitignore
├── requirements.txt
├── .gitattributes
├── README.md
├── config.json
└── cssirlbot.py
/cssirlbot/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | praw.ini
2 | __pycache__
3 | cssirlbot.log
4 | processedsubmissions.txt
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | praw
3 | git+https://github.com/lepture/mistune
4 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/cssirlbot/submissionhistory.py:
--------------------------------------------------------------------------------
1 | import os.path
2 |
3 | # get processed submissions
4 | processed_submissions = []
5 | if os.path.isfile("processedsubmissions.txt"):
6 | with open("processedsubmissions.txt", "r") as file:
7 | for line in file:
8 | processed_submissions.append(line[:-1])
9 |
10 | # mark processed submissions as such
11 | def mark_as_processed(submission):
12 | # prevent duplicates
13 | if submission.id in processed_submissions:
14 | return
15 |
16 | # add to list
17 | processed_submissions.append(submission.id)
18 |
19 | # add to file
20 | with open("processedsubmissions.txt", "a") as file:
21 | file.write(submission.id + "\n")
22 |
23 | # check if submission has been processed
24 | def is_processed(submission):
25 | return submission.id in processed_submissions
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # css-irl-bot
2 | The CSS_IRL bot is a reddit bot that fetches new submissions from a subreddit and parses their titles to check if they are valid CSS. It can post a comment detailing the found errors and/or confirm the validity of the title. It can also parse comments when summoned.
3 |
4 |
5 |
6 | ## Configuration
7 | The bot can be configured through a file named **config.json** as well as **praw.ini**.
8 |
9 | In config.json, you can configure the following settings:
10 |
11 | * `subreddit`: The subreddit where the bot operates.
12 | * `process_submissions`: Process posts.
13 | * `comment_on_invalid_css`: Automatically comment on invalid titles.
14 | * `comment_on_valid_css`: Automatically comment on valid titles.
15 | * `distinguish_comments`: Mark comments as mod comments (requires the "posts" mod permission).
16 | * `sticky_comments`: Sticky comments when processing posts.
17 | * `process_mentions`: Process username mentions.
18 | * `process_external_mentions`: Process mentions outside the home subreddit.
19 | * `feed_check_interval`: How often the bot should check for new posts.
20 | * `logging_level`: How detailed the logs should be.
21 |
22 | You can also manage the strings used by the bot and create or remove aliases for its commands.
23 |
24 | The praw.ini file describes the credentials used by the bot. It has the following format:
25 | ```ini
26 | [cssirlbot]
27 | client_id=XXX
28 | client_secret=XXX
29 | password=XXX
30 | username=XXX
31 | ```
32 | The client ID and secret can be obtained by [registering a reddit app](https://reddit.com/prefs/apps).
33 |
34 | ## Running the bot
35 |
36 | Download the files and prepare your configuration. Before running the bot, you might want to run `pip install -r requirements.txt` to obtain the dependencies. To start the script, run:
37 |
38 | ```
39 | python3 cssirlbot.py
40 | ```
41 |
42 | The bot will try to fetch the newest 100 submissions, mentions and comment replies and parse all of them.
43 |
--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "behavior": {
3 | "subreddit": "css_irl_bot",
4 | "process_submissions": true,
5 | "comment_on_invalid_css": true,
6 | "comment_on_valid_css": true,
7 | "distinguish_comments": false,
8 | "sticky_comments": false,
9 | "process_mentions": true,
10 | "process_external_mentions": false
11 | },
12 | "internal": {
13 | "feed_check_interval": 10,
14 | "logging_level": "INFO"
15 | },
16 | "strings": {
17 | "VALID_TITLE_MESSAGE": "Congratulations! Your title contains valid CSS!",
18 | "VALID_TITLE_MESSAGE_FOREIGN": "Hurray! The title of this submission by /u/{author} contains valid CSS!",
19 | "INVALID_TITLE_MESSAGE_HEAD": "Your title contains the following CSS errors:\n\n",
20 | "INVALID_TITLE_MESSAGE_HEAD_FOREIGN": "The title of this submission by /u/{author} contains the following CSS errors:\n\n",
21 |
22 | "CSS_SOURCE_BLOCK": "I detected the following code in the preformatted text:\n\n",
23 | "CSS_SOURCE_INLINE": "I detected the following code in the inline code tags:\n\n",
24 | "CSS_SOURCE_BODY": "I didn't see any code, so I parsed the entire comment.\n\n",
25 |
26 | "VALID_COMMENT_MESSAGE": "Congratulations! Your comment contains valid CSS!",
27 | "VALID_COMMENT_MESSAGE_FOREIGN": "Hurray! /u/{author}'s comment contains valid CSS!",
28 | "INVALID_COMMENT_MESSAGE_HEAD": "Your comment contains the following CSS errors:\n\n",
29 | "INVALID_COMMENT_MESSAGE_HEAD_FOREIGN": "/u/{author}'s comment contains the following CSS errors:\n\n",
30 |
31 | "INVALID_CSS_ERROR": "* Line {lastLine}, column {lastColumn}, **{type}**: `{message}`\n",
32 | "INVALID_CSS_TAIL": "\nAll I know is linting rules, sorry if I missed the joke!",
33 |
34 | "FOOTNOTE": "\n\n*****\n\n^^I'm ^^a ^^bot ^^who ^^validates ^^your ^^titles. ^^[author](https://reddit.com/message/compose/?to=Lachcim) ^^[about](http://redd.it/bpvzqj/) ^^[summon](https://redd.it/e1p0sv/) ^^[source](https://github.com/Lachcim/css-irl-bot)",
35 | "POSTCARD": " ^^Come ^^visit ^^us ^^at ^^/r/css_irl!"
36 | },
37 | "commands": {
38 | "parse_this": ["this", "me", "myself"],
39 | "parse_parent": ["", "parent"],
40 | "parse_op": ["op"]
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/cssirlbot/validation.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 |
4 | def validate_query(css):
5 | # send query to w3c for direct validation, return none on network error
6 | try:
7 | result = requests.post(
8 | "https://validator.w3.org/nu/",
9 | headers={"User-Agent": "cssirlbot"},
10 | files={
11 | "css": (None, "yes"),
12 | "out": (None, "json"),
13 | "content": (None, css),
14 | "useragent": (None, "cssirlbot"),
15 | "showsource": (None, "yes")
16 | }
17 | )
18 | except:
19 | return None
20 |
21 | # return true if there are no messages, return none on wrong status code, otherwise return false
22 | if result.status_code == 200:
23 | parsed_result = json.loads(result.text)
24 |
25 | return len(parsed_result["messages"]) == 0, parsed_result["messages"]
26 | else:
27 | return None
28 |
29 | def validate_text(title):
30 | # check title as it is
31 | result, errors = validate_query(title)
32 |
33 | # finish validation on success or network error
34 | if result != False:
35 | return result, errors
36 |
37 | # finish validation if there was no parse error
38 | if not is_parse_error(errors):
39 | return result, errors
40 |
41 | # if there was a parse error, retry validation with dummy selector wrapped around
42 | new_result, new_errors = validate_query("*{" + title + "}")
43 |
44 | # if new query resulted in a single parse error, return the old error
45 | if is_parse_error(new_errors):
46 | return result, errors
47 |
48 | # otherwise return the new result
49 | return new_result, new_errors
50 |
51 | def is_parse_error(errors):
52 | # check whether an error list only contains a single parse error
53 |
54 | parse_error = False
55 | non_parse_error = False
56 |
57 | for error in errors:
58 | if "Parse Error." in error["message"]:
59 | parse_error = True
60 | else:
61 | non_parse_error = True
62 |
63 | return parse_error and not non_parse_error
64 |
--------------------------------------------------------------------------------
/cssirlbot/formatting.py:
--------------------------------------------------------------------------------
1 | def format_title_success_string(config, foreign, author, external):
2 | message = ""
3 |
4 | # add message addressed to op or not
5 | if not foreign:
6 | message += config["strings"]["VALID_TITLE_MESSAGE"]
7 | else:
8 | message += config["strings"]["VALID_TITLE_MESSAGE_FOREIGN"].format(author=author)
9 |
10 | # add universal footnote
11 | message += config["strings"]["FOOTNOTE"]
12 |
13 | # if external, add postcard
14 | if external:
15 | message += config["strings"]["POSTCARD"]
16 |
17 | return message
18 |
19 | def format_title_error_string(errors, config, foreign, author, external):
20 | message = ""
21 |
22 | # add message addressed to op or not
23 | if not foreign:
24 | message += config["strings"]["INVALID_TITLE_MESSAGE_HEAD"]
25 | else:
26 | message += config["strings"]["INVALID_TITLE_MESSAGE_HEAD_FOREIGN"].format(author=author)
27 |
28 | # list errors
29 | for error in errors:
30 | # protection against markdown injection, no way to escape the grave accent
31 | error["message"] = error["message"].replace("`", "'")
32 |
33 | message += config["strings"]["INVALID_CSS_ERROR"].format(**error)
34 |
35 | # add universal error tail and footnote
36 | message += config["strings"]["INVALID_CSS_TAIL"]
37 | message += config["strings"]["FOOTNOTE"]
38 |
39 | # if external, add postcard
40 | if external:
41 | message += config["strings"]["POSTCARD"]
42 |
43 | return message
44 |
45 | def format_comment_success_string(source, css, config, foreign, author, external):
46 | message = ""
47 |
48 | # state the source
49 | message += config["strings"]["CSS_SOURCE_" + source.upper()]
50 |
51 | # print the source
52 | if source != "body":
53 | message += (" " + css).replace("\n", "\n ") + "\n"
54 |
55 | # add message addressed to op or not
56 | if not foreign:
57 | message += config["strings"]["VALID_COMMENT_MESSAGE"]
58 | else:
59 | message += config["strings"]["VALID_COMMENT_MESSAGE_FOREIGN"].format(author=author)
60 |
61 | # add universal footnote
62 | message += config["strings"]["FOOTNOTE"]
63 |
64 | # if external, add postcard
65 | if external:
66 | message += config["strings"]["POSTCARD"]
67 |
68 | return message
69 |
70 | def format_comment_error_string(source, css, errors, config, foreign, author, external):
71 | message = ""
72 |
73 | # state the source
74 | message += config["strings"]["CSS_SOURCE_" + source.upper()]
75 |
76 | # print the source
77 | if source != "body":
78 | message += (" " + css).replace("\n", "\n ") + "\n"
79 |
80 | # add message addressed to op or not
81 | if not foreign:
82 | message += config["strings"]["INVALID_COMMENT_MESSAGE_HEAD"]
83 | else:
84 | message += config["strings"]["INVALID_COMMENT_MESSAGE_HEAD_FOREIGN"].format(author=author)
85 |
86 | # list errors
87 | for error in errors:
88 | # protection against markdown injection, no way to escape the grave accent
89 | error["message"] = error["message"].replace("`", "'")
90 |
91 | message += config["strings"]["INVALID_CSS_ERROR"].format(**error)
92 |
93 | # add universal error tail and footnote
94 | message += config["strings"]["INVALID_CSS_TAIL"]
95 | message += config["strings"]["FOOTNOTE"]
96 |
97 | # if external, add postcard
98 | if external:
99 | message += config["strings"]["POSTCARD"]
100 |
101 | return message
102 |
--------------------------------------------------------------------------------
/cssirlbot.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import logging
4 | import sys
5 | import threading
6 | import traceback
7 | import praw
8 | import cssirlbot.processing
9 | import cssirlbot.submissionhistory
10 |
11 | # get config
12 | with open("config.json") as file:
13 | config = json.loads(file.read())
14 |
15 | # configure logging to file
16 | logging.basicConfig(filename="cssirlbot.log",
17 | filemode="a",
18 | format="%(asctime)s %(levelname)s %(message)s")
19 | logging.getLogger().setLevel(config["internal"]["logging_level"])
20 |
21 | # also log to stdout
22 | stdout_handler = logging.StreamHandler(sys.stdout)
23 | stdout_handler.setLevel(config["internal"]["logging_level"])
24 | stdout_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
25 | logging.getLogger().addHandler(stdout_handler)
26 |
27 | # parse arguments
28 | argParser = argparse.ArgumentParser()
29 | argParser.add_argument("--dry-run",
30 | dest="dry_run",
31 | action="store_true",
32 | help="collect available posts but don't process them")
33 | args = argParser.parse_args()
34 |
35 | def work():
36 | try:
37 | logging.info("Checking for new submissions")
38 |
39 | # check target subreddit for new submissions
40 | if config["behavior"]["process_submissions"]:
41 | for submission in subreddit.new():
42 | # ignore processed submissions
43 | if cssirlbot.submissionhistory.is_processed(submission):
44 | continue
45 |
46 | logging.info("New submission found: http://redd.it/" + submission.id)
47 |
48 | if args.dry_run:
49 | cssirlbot.submissionhistory.mark_as_processed(submission)
50 | logging.info("Collected.")
51 | continue
52 |
53 | # if error occurred during processing, abandon processing session
54 | if not cssirlbot.processing.process_submission(submission, config):
55 | break
56 |
57 | # check username mentions
58 | if config["behavior"]["process_mentions"]:
59 | for mention in reddit.inbox.mentions():
60 | # ignore processed comments
61 | if cssirlbot.submissionhistory.is_processed(mention):
62 | continue
63 |
64 | logging.info("New mention found: https://reddit.com/r/all/comments/" + mention.submission.id + "/" + mention.id)
65 |
66 | if args.dry_run:
67 | cssirlbot.submissionhistory.mark_as_processed(mention)
68 | logging.info("Collected.")
69 | continue
70 |
71 | # abandon session on error
72 | if not cssirlbot.processing.process_comment(mention, config, reddit):
73 | break
74 |
75 | # comment replies have to be handled separately
76 | for mention in reddit.inbox.comment_replies():
77 | # ignore processed comments
78 | if cssirlbot.submissionhistory.is_processed(mention):
79 | continue
80 |
81 | logging.info("New comment reply found: https://reddit.com/r/all/comments/" + mention.submission.id + "/" + mention.id)
82 |
83 | if args.dry_run:
84 | cssirlbot.submissionhistory.mark_as_processed(mention)
85 | logging.info("Collected.")
86 | continue
87 |
88 | # abandon session on error
89 | if not cssirlbot.processing.process_comment(mention, config, reddit):
90 | break
91 | except:
92 | logging.error("Error in main loop: ")
93 | logging.info(traceback.format_exc())
94 |
95 | # restart this function after the configured interval
96 | threading.Timer(config["internal"]["feed_check_interval"], work).start()
97 |
98 | logging.info("Bot starting")
99 |
100 | reddit = praw.Reddit("cssirlbot", user_agent="linux:cssirlbot:1 (by /u/Lachcim)")
101 | subreddit = reddit.subreddit(config["behavior"]["subreddit"])
102 |
103 | logging.info("Bot online")
104 |
105 | work()
106 |
--------------------------------------------------------------------------------
/cssirlbot/processing.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import re
3 | import traceback
4 | import mistune
5 | import praw
6 | import cssirlbot.submissionhistory
7 | import cssirlbot.validation
8 | import cssirlbot.formatting
9 |
10 | def process_submission(submission, config, reply_target=None):
11 | # get config
12 | comment_on_valid = config["behavior"]["comment_on_valid_css"]
13 | comment_on_invalid = config["behavior"]["comment_on_invalid_css"]
14 | distinguish_comments = config["behavior"]["distinguish_comments"]
15 | sticky_comments = config["behavior"]["sticky_comments"]
16 |
17 | # validate submission
18 | result, errors = cssirlbot.validation.validate_text(submission.title)
19 |
20 | # signal failure if needed
21 | if result == None:
22 | logging.error("Error while validating")
23 | return False
24 |
25 | try:
26 | # reply to submission
27 | home_subreddit = config["behavior"]["subreddit"]
28 | foreign = reply_target is not None
29 | reply_target = reply_target or submission
30 | author = submission.author.name if submission.author else "[deleted]"
31 | external = reply_target.subreddit.display_name != home_subreddit
32 |
33 | if result == True and (comment_on_valid or foreign):
34 | comment = reply_target.reply(cssirlbot.formatting.format_title_success_string(config, foreign, author, external))
35 | elif result == False and (comment_on_invalid or foreign):
36 | comment = reply_target.reply(cssirlbot.formatting.format_title_error_string(errors, config, foreign, author, external))
37 |
38 | # distinguish comment
39 | if distinguish_comments:
40 | try:
41 | comment.mod.distinguish(how="yes", sticky=sticky_comments)
42 | except:
43 | pass
44 |
45 | # mark submission as processed
46 | cssirlbot.submissionhistory.mark_as_processed(submission)
47 |
48 | logging.info("Processed!")
49 | return True
50 | except praw.exceptions.APIException as e:
51 | return handle_error(submission, e)
52 |
53 | def handle_error(submission, e):
54 | if e.error_type == "RATELIMIT":
55 | # rate limit reached, stop processing and wait for next batch
56 | logging.warning("Rate limit reached")
57 | return False
58 | elif e.error_type in ["TOO_OLD", "THREAD_LOCKED"]:
59 | # prevent bot from processing this submission again
60 | cssirlbot.submissionhistory.mark_as_processed(submission)
61 |
62 | logging.info("Post cannot be replied to")
63 | return True
64 | else:
65 | # other error
66 | logging.warning("Error processing submission")
67 | logging.info(traceback.format_exc())
68 | return True
69 |
70 | def process_comment(comment, config, reddit):
71 | # get config
72 | home_subreddit = config["behavior"]["subreddit"]
73 | process_external = config["behavior"]["process_external_mentions"]
74 | distinguish_comments = config["behavior"]["distinguish_comments"]
75 |
76 | # exclude external mentions if configured as such
77 | if comment.subreddit.display_name != home_subreddit and not process_external:
78 | logging.info("Ignoring external mention")
79 | cssirlbot.submissionhistory.mark_as_processed(comment) # prevent reprocessing
80 | return True
81 |
82 | # get command used in mention
83 | command = get_command(comment.body, config, reddit.user.me().name)
84 |
85 | # handle invalid commands
86 | if not command:
87 | logging.info("Mention doesn't contain a valid command")
88 | # mark comment as handled
89 | cssirlbot.submissionhistory.mark_as_processed(comment)
90 | return True
91 |
92 | # handle parsing the op
93 | if command == "parse_op" or (command == "parse_parent" and comment.parent_id.startswith("t3_")):
94 | # obtain op submission, trim prefix
95 | op = reddit.submission(id=comment.parent_id[3:])
96 |
97 | # don't parse op if already parsed
98 | if cssirlbot.submissionhistory.is_processed(op):
99 | logging.info("Mention points to already parsed post")
100 | # mark comment as handled
101 | cssirlbot.submissionhistory.mark_as_processed(comment)
102 | return True
103 |
104 | # delegate submission handling to standard function
105 | result = process_submission(op, config, comment)
106 | if result:
107 | # mark comment as handled on success
108 | cssirlbot.submissionhistory.mark_as_processed(comment)
109 |
110 | return result
111 |
112 | # find css and validate it
113 | css_origin = comment if command == "parse_this" else reddit.comment(id=comment.parent_id[3:])
114 | css, css_source = find_css(css_origin.body)
115 | result, errors = cssirlbot.validation.validate_text(css)
116 |
117 | # signal failure if needed
118 | if result == None:
119 | logging.error("Error while validating")
120 | return False
121 |
122 | try:
123 | # reply to comment
124 | home_subreddit = config["behavior"]["subreddit"]
125 | foreign = command == "parse_parent"
126 | external = comment.subreddit.display_name != home_subreddit
127 |
128 | # get comment author, account for deleted users
129 | if css_origin.author:
130 | author = css_origin.author.name
131 | else:
132 | author = "[deleted]"
133 |
134 | if result == True:
135 | new_comment = comment.reply(cssirlbot.formatting.format_comment_success_string(css_source, css, config, foreign, author, external))
136 | else:
137 | new_comment = comment.reply(cssirlbot.formatting.format_comment_error_string(css_source, css, errors, config, foreign, author, external))
138 |
139 | # distinguish comment
140 | if distinguish_comments:
141 | try:
142 | new_comment.mod.distinguish(how="yes")
143 | except:
144 | pass
145 |
146 | # mark comments as processed
147 | cssirlbot.submissionhistory.mark_as_processed(comment)
148 | if foreign:
149 | cssirlbot.submissionhistory.mark_as_processed(css_origin)
150 |
151 | logging.info("Processed!")
152 | return True
153 | except praw.exceptions.APIException as e:
154 | return handle_error(comment, e)
155 |
156 | def get_command(body, config, username):
157 | # find valid command calls
158 | expression = re.compile("^/?u/" + username + "/?\s*(\S*)\s*$", re.MULTILINE | re.IGNORECASE)
159 | matches = re.findall(expression, body)
160 |
161 | # find first valid command
162 | for match in matches:
163 | for command, keywords in config["commands"].items():
164 | if match.lower() in keywords:
165 | return command
166 |
167 | # return none on failure
168 | return None
169 |
170 | def find_css(body):
171 | # parse markdown
172 | md = mistune.create_markdown()
173 | # alternative algorithm: disable fenced code
174 | # see https://github.com/Lachcim/css-irl-bot/issues/5 for discussion
175 | # md.block.rules.remove("fenced_code")
176 | html = md(body)
177 |
178 | # it is known that when one parses html with regex, zalgo sings the song
179 | # that ends the world. in this case, however, the html produced by mistune
180 | # can be assumed to be regular and therefore parseable using regex.
181 |
182 | # find code blocks
183 | # expression = re.compile("
(.*?)", re.DOTALL)
184 | expression = re.compile("]*>(.*?)", re.DOTALL)
185 | css = "".join(re.findall(expression, html))
186 | if css:
187 | return css, "block"
188 |
189 | # if the above failed, find inline code
190 | expression = re.compile("(.*?)", re.DOTALL)
191 | css = "\n".join(re.findall(expression, html))
192 | if css:
193 | return css, "inline"
194 |
195 | # if all failed, parse the entire comment
196 | return body, "body"
197 |
--------------------------------------------------------------------------------