├── requirements.txt ├── util.py ├── .gitignore ├── LICENCE ├── README.md ├── autowikibot-remover.py └── autowikibot-commenter.py /requirements.txt: -------------------------------------------------------------------------------- 1 | praw 2 | pyimgur 3 | beautifulsoup4 4 | wikipedia 5 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | ### File borrowed from Zack Maril @ https://github.com/zmaril 2 | import re, time 3 | 4 | def formatted(*args): 5 | now = time.strftime("%Y-%m-%d %H:%M:%S") 6 | return "["+now+"] "+" ".join(map(str,args)) 7 | 8 | 9 | def log(*args): 10 | print apply(formatted,args) 11 | 12 | def fail(*args): 13 | print '\033[91m'+apply(formatted,args)+'\033[0m' 14 | 15 | def warn(*args): 16 | print '\033[93m'+apply(formatted,args)+'\033[0m' 17 | 18 | def success(*args): 19 | print '\033[92m'+apply(formatted,args)+'\033[0m' 20 | 21 | def special(*args): 22 | print '\033[95m'+apply(formatted,args)+'\033[0m' 23 | 24 | def bluelog(*args): 25 | print '\033[94m'+apply(formatted,args)+'\033[0m' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # PyInstaller 25 | # Usually these files are written by a python script from a template 26 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 27 | *.manifest 28 | *.spec 29 | 30 | # Installer logs 31 | pip-log.txt 32 | pip-delete-this-directory.txt 33 | 34 | # Unit test / coverage reports 35 | htmlcov/ 36 | .tox/ 37 | .coverage 38 | .cache 39 | nosetests.xml 40 | coverage.xml 41 | 42 | # Translations 43 | *.mo 44 | *.pot 45 | 46 | # Django stuff: 47 | *.log 48 | 49 | # Sphinx documentation 50 | docs/_build/ 51 | 52 | # PyBuilder 53 | target/ 54 | 55 | # PyCharm 56 | .idea/ 57 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2014 Acini (alias) 2 | 3 | Definitions 4 | 5 | 1. A "Creator Instance" is a Reddit user account which is operated and 6 | maintained by original author of this source code and is used by the 7 | Program for processing and replying to comments. 8 | 2. A subreddit where a Creator Instance has ability to submit comments 9 | to is referred to as "Covered Subreddit". 10 | 11 | Permission is hereby granted, free of charge, to any person 12 | obtaining a copy of this software and associated documentation 13 | files (the "Software"), to deal in the Software without restriction, 14 | including without limitation the rights to use, copy, modify, 15 | merge, publish, distribute, sublicense, and/or sell copies 16 | of the Software, and to permit persons to whom the Software 17 | is furnished to do so, subject to the following conditions: 18 | 19 | 1. The above copyright notice and this permission notice shall 20 | be included in all copies or substantial portions of the Software. 21 | 22 | 2. The source code (modified or unmodified) cannot be used for submitting comments 23 | in Covered Subreddits when such comments serve the same purpose. The souce code 24 | (modified or unmodified) cannot be used for purpose of harassing operator/s of 25 | Creator Instance. This clause will override any other clauses in this licence 26 | if it conflicts with them. 27 | 28 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 30 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 31 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 32 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | AutoWikibot 2 | =========== 3 | 4 | Reddit bot that replies to comments with excerpt from linked wikipedia article or section. 5 | 6 | Current instance: 7 | [In action](http://www.reddit.com/u/autowikibot) | 8 | [Subreddit](http://www.reddit.com/r/autowikibot/) 9 | 10 | Features 11 | ======== 12 | 13 | * Responds to comments like "wikibot, what is dancing?" and "wikibot, tell me about enigma machine" 14 | * In-post summoning to keywords. e.g. I guess, OP should add some more ?- Liverwurst -? to the recipe 15 | * Suggest upto 4 related interesting articles 16 | * Deletes on parent commenter command 17 | * Deletes if comment score below threshold 18 | * User blacklist 19 | * Automated subreddit blacklisting on first HTTP 403 encountered 20 | 21 | Requirements 22 | ============ 23 | 24 | Tested in Python 2.7.6 25 | 26 | To install the required dependencies from PyPI, run `pip install -r requirements.txt` 27 | from the command line. This will install the `praw`, `pyimgur`, `beautifulsoup4`, 28 | `wikipedia` modules. 29 | 30 | 31 | Configuration 32 | ============= 33 | 34 | First, with your bot account, create 3 comments where they will not be removed by someone. 35 | 36 | 1. comment with banned users list separated by single newline 37 | 2. comment with excluded subreddits (without /r/) list separated by single newline 38 | 3. comment with list of subreddits where bot will only reply to top level (root) comments 39 | 4. comment with a number indicating total number of posts made by bot. Set 0 at first setup. 40 | 41 | Second, You need to create a file called datafile.inf and have following data in it on separate lines: 42 | 43 | * reddit bot username 44 | * reddit bot account password 45 | * imgur client id 46 | * ID of comment with banned users list 47 | * ID of comment with excluded subreddits 48 | * ID of comment with root only subreddits 49 | * ID of comment with a number of total number of posts 50 | 51 | The file will look something like this: 52 | 53 | ```` 54 | wikipedia_robot 55 | botspassword 56 | rt23rnsr2453fop 57 | cetagti 58 | cefsfs4 59 | cef43fs 60 | ce5gd56 61 | ```` 62 | 63 | License 64 | ========= 65 | 66 | This source code is available under a custom licence. See the accompanying file LICENCE. 67 | -------------------------------------------------------------------------------- /autowikibot-remover.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import praw, time, re, pickle, traceback, os 3 | from util import success, warn, log, fail 4 | 5 | ### Uncomment to debug 6 | #import logging 7 | #logging.basicConfig(level=logging.DEBUG) 8 | 9 | ### Set root directory to script directory 10 | abspath = os.path.abspath(__file__) 11 | dname = os.path.dirname(abspath) 12 | os.chdir(dname) 13 | 14 | r = praw.Reddit("autowikibot by /u/acini at /r/autowikibot") 15 | excludekeyword = "leave me alone" 16 | includekeyword = "follow me again" 17 | global banned_users 18 | 19 | ### Login 20 | with open ('datafile.inf', 'r') as myfile: 21 | datafile_lines=myfile.readlines() 22 | USERNAME = datafile_lines[0].strip() 23 | PASSWORD = datafile_lines[1].strip() 24 | 25 | Trying = True 26 | while Trying: 27 | try: 28 | r.login(USERNAME, PASSWORD) 29 | success("LOGGED IN") 30 | Trying = False 31 | except praw.errors.InvalidUserPass: 32 | fail("WRONG USERNAME OR PASSWORD") 33 | exit() 34 | except Exception as e: 35 | fail(e) 36 | time.sleep(5) 37 | 38 | ### Load saved data 39 | try: 40 | banned_users_page = r.get_wiki_page('autowikibot','userblacklist') 41 | banned_users = banned_users_page.content_md.strip().split() 42 | deleted = 0 43 | success("DATA LOADED") 44 | except Exception as e: 45 | fail(e) 46 | #traceback.print_exc() 47 | exit() 48 | 49 | 50 | 51 | while True: 52 | try: 53 | 54 | ### Check inbox few times 55 | log("AUTODELETE CYCLES STARTED") 56 | for x in range(1, 11): 57 | log("CYCLE %s"%x) 58 | try: 59 | unread = r.get_unread(limit=None) 60 | for msg in unread: 61 | 62 | if re.search(r'\+delete\s', msg.body.lower()): 63 | try: 64 | id = re.findall(r'\+delete\s(.*?)$',msg.body.lower())[0] 65 | id = 't1_'+id 66 | comment = r.get_info(thing_id=id) 67 | comment_parent = r.get_info(thing_id=comment.parent_id) 68 | 69 | 70 | if msg.author.name == comment_parent.author.name or msg.author.name == 'acini': 71 | comment.delete() 72 | deleted+=1 73 | #msg.reply('I have deleted [my comment]('+comment.permalink+'), which was reply to your [this comment]('+comment_parent.permalink+').\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot') 74 | success("DELETION AT %s"%comment_parent.id) 75 | msg.mark_as_read() 76 | else: 77 | #msg.reply('Oops, only /u/'+str(comment_parent.author.name)+' can delete that [comment]('+comment.permalink+'). Downvote the comment if you think it is not helping.\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot') 78 | fail("BAD DELETE REQUEST BY /u/%s"%str(msg.author.name)) 79 | msg.mark_as_read() 80 | continue 81 | except Exception as e: 82 | if (str(e)=="'NoneType' object has no attribute 'name'"): 83 | comment.delete() 84 | deleted+=1 85 | #msg.reply('[My comment]('+comment.permalink+') which was reply to [this comment]('+comment_parent.permalink+') is also found orphan. I have deleted it as requested.\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot') 86 | success("DELETION (ORPHAN) AT %s"%comment_parent.id) 87 | else: 88 | fail("%s\033[1;m"%e) 89 | msg.mark_as_read() 90 | continue 91 | 92 | if re.search(r'\+toggle-nsfw\s', msg.body.lower()): 93 | try: 94 | id = re.findall(r'\+toggle-nsfw\s(.*?)$',msg.body.lower())[0] 95 | id = 't1_'+id 96 | comment = r.get_info(thing_id=id) 97 | comment_parent = r.get_info(thing_id=comment.parent_id) 98 | 99 | 100 | if msg.author.name == comment_parent.author.name or msg.author.name == 'acini': 101 | if '[](#nsfw-toggled)' in comment.body.lower(): 102 | #msg.reply('Sorry, NSFW can be toggled only once for a particular comment.') 103 | msg.mark_as_read() 104 | continue 105 | 106 | elif '[](#nsfw-start)' in comment.body.lower(): 107 | nsfwstate = 'OFF' 108 | nsfwurl = "http://www.reddit.com/message/compose?to=%28This%20is%20a%20placeholder%29&subject=NSFW%20toggled:&message=NSFW%20was%20toggled%20"+nsfwstate+"%20by%20parent%20commenter%20for%20this%20comment." 109 | nsfwtag = " [](#sfw)[](#nsfw-toggled)" 110 | replacedb = re.sub(r'\[\]\(\#nsfw-start\).*?\[\]\(\#nsfw-end\)',nsfwtag,comment.body).replace('&','&').replace('>','>').replace('^toggle ^NSFW','').replace('^or[](#or)','') 111 | 112 | elif '[](#sfw)' in comment.body.lower(): 113 | nsfwstate = 'ON' 114 | nsfwurl = "http://www.reddit.com/message/compose?to=%28This%20is%20a%20placeholder%29&subject=NSFW%20toggled:&message=NSFW%20was%20toggled%20"+nsfwstate+"%20by%20parent%20commenter%20for%20this%20comment." 115 | nsfwtag = " [](#nsfw-start)**^NSFW** [^^(?)]("+nsfwurl+")[](#nsfw-end)[](#nsfw-toggled)" 116 | replacedb = comment.body.replace('[](#sfw)',nsfwtag).replace('&','&').replace('>','>').replace('^toggle ^NSFW','').replace('^or[](#or)','') 117 | 118 | comment.edit(replacedb) 119 | ##msg.reply('NSFW was toggled **'+nsfwstate+'** for [this comment]('+comment.permalink+').\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot') 120 | success("NSFW TOGGLE AT %s"%comment_parent.id) 121 | msg.mark_as_read() 122 | else: 123 | ##msg.reply('Oops, only /u/'+str(comment_parent.author.name)+' can toggle NSFW for that [comment]('+comment.permalink+'). \n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot') 124 | fail("BAD NSFW TOGGLE REQUEST BY /u/%s"%str(msg.author.name)) 125 | msg.mark_as_read() 126 | continue 127 | except Exception as e: 128 | if (str(e)=="'NoneType' object has no attribute 'name'"): 129 | comment.delete() 130 | deleted+=1 131 | ##msg.reply('[My comment]('+comment.permalink+') which was reply to [this comment]('+comment_parent.permalink+') is also found orphan. I have deleted it as requested.\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot') 132 | success("DELETION (ORPHAN) AT %s"%comment_parent.id) 133 | else: 134 | fail("%s\033[1;m"%e) 135 | msg.mark_as_read() 136 | continue 137 | ### Add user to exclude list 138 | if re.search(excludekeyword, msg.body.lower()): 139 | banned_users = banned_users_page.content_md.strip().split() 140 | banned_users.append(msg.author.name) 141 | banned_users.sort() 142 | banned_users = list(set(banned_users)) 143 | banned_users.sort(reverse=True) 144 | c_banned_users = "" 145 | for item in banned_users: 146 | c_banned_users = " "+item+'\n'+c_banned_users 147 | editsummary = 'added '+str(msg.author.name) 148 | r.edit_wiki_page('autowikibot','userblacklist',c_banned_users,editsummary) 149 | time.sleep(1) 150 | msg.mark_as_read() 151 | #msg.reply("*Done! I won't reply to your comments now. Allow me 15 minutes to put this in effect.*\n\n*Have a nice day!*") 152 | 153 | success("BANNED /u/%s AT %s"%(msg.author.name,msg.id)) 154 | 155 | if re.search(includekeyword, msg.body.lower()): 156 | msg.mark_as_read() 157 | banned_users = banned_users_page.content_md.strip().split() 158 | if msg.author.name in banned_users: 159 | banned_users.remove(str(msg.author.name)) 160 | banned_users = list(set(banned_users)) 161 | banned_users.sort(reverse=True) 162 | c_banned_users = "" 163 | for item in banned_users: 164 | c_banned_users = " "+item+'\n'+c_banned_users 165 | editsummary = 'removed '+str(msg.author.name) 166 | r.edit_wiki_page('autowikibot','userblacklist',c_banned_users,editsummary) 167 | #msg.reply("*OK! I removed you from the blacklist. I will resume replying to your comments now.*") 168 | success("UNBANNED /u/%s AT %s"%(msg.author.name,msg.id)) 169 | else: 170 | #msg.reply("*Dear, you are not in the blacklist.*") 171 | warn("BAD UNBAN REQUEST BY /u/%s AT %s"%(msg.author.name,msg.id)) 172 | 173 | log('Sleeping') 174 | time.sleep(60) 175 | except Exception as e: 176 | traceback.print_exc() 177 | fail(e) 178 | time.sleep(60) 179 | continue 180 | log("AUTODELETE CYCLES COMPLETED") 181 | 182 | log("COMMENT SCORE CHECK CYCLE STARTED") 183 | user = r.get_redditor(USERNAME) 184 | total = 0 185 | upvoted = 0 186 | unvoted = 0 187 | downvoted = 0 188 | for c in user.get_comments(limit=None): 189 | 190 | if len(str(c.score)) == 4: 191 | spaces = "" 192 | if len(str(c.score)) == 3: 193 | spaces = " " 194 | if len(str(c.score)) == 2: 195 | spaces = " " 196 | if len(str(c.score)) == 1: 197 | spaces = " " 198 | 199 | total = total + 1 200 | if c.score < 1 or '#placeholder-awb' in c.body.lower: 201 | c.delete() 202 | print "\033[1;41m%s%s\033[1;m"%(spaces,c.score), 203 | deleted = deleted + 1 204 | downvoted = downvoted + 1 205 | elif c.score > 10: 206 | print "\033[1;32m%s%s\033[1;m"%(spaces,c.score), 207 | upvoted = upvoted + 1 208 | elif c.score > 1: 209 | print "\033[1;34m%s%s\033[1;m"%(spaces,c.score), 210 | upvoted = upvoted + 1 211 | elif c.score > 0: 212 | print "\033[1;30m%s%s\033[1;m"%(spaces,c.score), 213 | unvoted = unvoted + 1 214 | 215 | print ("") 216 | log("COMMENT SCORE CHECK CYCLE COMPLETED") 217 | urate = round(upvoted / float(total) * 100) 218 | nrate = round(unvoted / float(total) * 100) 219 | drate = round(downvoted / float(total) * 100) 220 | warn("Upvoted: %s\t%s\b\b %%"%(upvoted,urate)) 221 | warn("Unvoted %s\t%s\b\b %%"%(unvoted,nrate)) 222 | warn("Downvoted: %s\t%s\b\b %%"%(downvoted,drate)) 223 | warn("Total: %s"%total) 224 | 225 | except KeyboardInterrupt: 226 | log("EXITING") 227 | break 228 | except Exception as e: 229 | #traceback.print_exc() 230 | fail(e) 231 | time.sleep(3) 232 | continue 233 | 234 | -------------------------------------------------------------------------------- /autowikibot-commenter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import praw, time, datetime, re, urllib, urllib2, pickle, pyimgur, os, traceback, wikipedia, string, socket, sys, collections 4 | from nsfw import getnsfw 5 | from util import success, warn, log, fail, special, bluelog 6 | from bs4 import BeautifulSoup 7 | from HTMLParser import HTMLParser 8 | 9 | ### Uncomment to debug 10 | #import logging 11 | #logging.basicConfig(level=logging.DEBUG) 12 | 13 | ### Set root directory to script directory 14 | abspath = os.path.abspath(__file__) 15 | dname = os.path.dirname(abspath) 16 | os.chdir(dname) 17 | 18 | ###Load data 19 | def load_data(): 20 | global banned_users 21 | global badsubs 22 | global root_only_subs 23 | global summon_only_subs 24 | global imgur_client_id 25 | global banned_users_page 26 | global badsubs_page 27 | global root_only_subs_page 28 | global summon_only_subs_page 29 | imgur_client_id = datafile_lines[2].strip() 30 | banned_users_page = r.get_wiki_page('autowikibot','userblacklist') 31 | badsubs_page = r.get_wiki_page('autowikibot','excludedsubs') 32 | root_only_subs_page = r.get_wiki_page('autowikibot','rootonlysubs') 33 | summon_only_subs_page = r.get_wiki_page('autowikibot','summononlysubs') 34 | try: 35 | banned_users = banned_users_page.content_md.strip().split() 36 | badsubs = badsubs_page.content_md.strip().split() 37 | root_only_subs = root_only_subs_page.content_md.strip().split() 38 | summon_only_subs = summon_only_subs_page.content_md.strip().split() 39 | success("DATA LOADED") 40 | except Exception as e: 41 | #traceback.print_exc() 42 | fail("DATA LOAD FAILED: %s"%e) 43 | exit() 44 | 45 | def save_changing_variables(editsummary): 46 | ##Save badsubs 47 | global badsubs 48 | badsubs = list(set(badsubs)) 49 | badsubs.sort(reverse=True) 50 | c_badsubs = "" 51 | for item in badsubs: 52 | c_badsubs = " "+item+'\n'+c_badsubs 53 | r.edit_wiki_page('autowikibot','excludedsubs',c_badsubs,editsummary) 54 | ##Save root_only_subs 55 | global root_only_subs 56 | root_only_subs = list(set(root_only_subs)) 57 | root_only_subs.sort(reverse=True) 58 | c_root_only_subs = "" 59 | for item in root_only_subs: 60 | c_root_only_subs = " "+item+'\n'+c_root_only_subs 61 | r.edit_wiki_page('autowikibot','rootonlysubs',c_root_only_subs,editsummary) 62 | ##Save summon_only_subs 63 | global summon_only_subs 64 | summon_only_subs = list(set(summon_only_subs)) 65 | summon_only_subs.sort(reverse=True) 66 | c_summon_only_subs = "" 67 | for item in summon_only_subs: 68 | c_summon_only_subs = " "+item+'\n'+c_summon_only_subs 69 | r.edit_wiki_page('autowikibot','summononlysubs',c_summon_only_subs,editsummary) 70 | 71 | 72 | success("DATA SAVED") 73 | 74 | with open ('datafile.inf', 'r') as myfile: 75 | datafile_lines=myfile.readlines() 76 | 77 | ### Login 78 | r = praw.Reddit("autowikibot by /u/acini at /r/autowikibot") 79 | USERNAME = datafile_lines[0].strip() 80 | PASSWORD = datafile_lines[1].strip() 81 | Trying = True 82 | while Trying: 83 | try: 84 | r.login(USERNAME, PASSWORD) 85 | success("LOGGED IN") 86 | Trying = False 87 | except praw.errors.InvalidUserPass: 88 | fail("WRONG USERNAME OR PASSWORD") 89 | exit() 90 | except Exception as e: 91 | fail("%s"%e) 92 | time.sleep(5) 93 | 94 | def is_summon_chain(post): 95 | if not post.is_root: 96 | parent_comment_id = post.parent_id 97 | parent_comment = r.get_info(thing_id=parent_comment_id) 98 | if parent_comment.author != None and str(parent_comment.author.name) == 'autowikibot': 99 | return True 100 | else: 101 | return False 102 | else: 103 | return False 104 | 105 | def comment_limit_reached(post): 106 | global submissioncount 107 | count_of_this = int(float(submissioncount[str(post.submission.id)])) 108 | if count_of_this > 4 and not (str(post.subreddit) == 'autowikibotdelreq' or str(post.subreddit) == 'autowikibot' or str(post.subreddit) == 'todayilearned'): 109 | return True 110 | else: 111 | return False 112 | 113 | def is_already_done(post): 114 | done = False 115 | numofr = 0 116 | try: 117 | repliesarray = post.replies 118 | numofr = len(list(repliesarray)) 119 | except: 120 | pass 121 | if numofr != 0: 122 | for repl in post.replies: 123 | if repl.author != None and (repl.author.name == 'autowikibot' or repl.author.name == 'Text_Reader_Bot'): 124 | warn("%s IS ALREADY DONE"%post.id) 125 | done = True 126 | continue 127 | if done: 128 | return True 129 | else: 130 | return False 131 | 132 | def post_reply(reply,post): 133 | global badsubs 134 | global submissioncount 135 | global totalposted 136 | try: 137 | reply = "##### \n\n###### \n\n#### \n"+reply+"^Parent ^commenter ^can [^toggle ^NSFW](/message/compose?to=autowikibot&subject=AutoWikibot NSFW toggle&message=%2Btoggle-nsfw+____id____) ^or[](#or) [^delete](/message/compose?to=autowikibot&subject=AutoWikibot Deletion&message=%2Bdelete+____id____)^. ^Will ^also ^delete ^on ^comment ^score ^of ^-1 ^or ^less. ^| [^(FAQs)](http://www.np.reddit.com/r/autowikibot/wiki/index) ^| [^Mods](http://www.np.reddit.com/r/autowikibot/comments/1x013o/for_moderators_switches_commands_and_css/) ^| [^Magic ^Words](http://www.np.reddit.com/r/autowikibot/comments/1ux484/ask_wikibot/)" 138 | a = post.reply('[#placeholder-awb]Comment is being processed... It will be automatically replaced by new text within a minute or will be deleted if that fails.') 139 | postsuccess = r.get_info(thing_id='t1_'+str(a.id)).edit(reply.replace('____id____',str(a.id))) 140 | if not postsuccess: 141 | raise Exception ('reply unsuccessful') 142 | totalposted = totalposted + 1 143 | submissioncount[str(post.submission.id)]+=1 144 | success("[OK] #%s "%totalposted) 145 | return True 146 | except Exception as e: 147 | warn("REPLY FAILED: %s @ %s"%(e,post.subreddit)) 148 | if str(e).find('TOO_LONG') > -1: 149 | a.delete() 150 | elif str(e) == '403 Client Error: Forbidden' and str(post.subreddit) not in badsubs: 151 | badsubs = badsubs_page.content_md.strip().split() 152 | badsubs.append(str(post.subreddit)) 153 | editsummary = 'added '+str(post.subreddit) 154 | save_changing_variables(editsummary) 155 | else: 156 | fail(e) 157 | a.delete() 158 | return False 159 | 160 | def filterpass(post): 161 | global summary_call 162 | global has_link 163 | global mod_switch 164 | global badsubs 165 | global r 166 | if (post.author.name == USERNAME) or post.author.name in banned_users: 167 | return False 168 | summary_call = re.search(r'wikibot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',post.body.lower()) or re.search(r'wikibot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',post.body.lower()) or re.search("\?\-.*\-\?",post.body.lower()) 169 | has_link = any(string in post.body for string in ['en.wikipedia.org/wiki/', 'en.m.wikipedia.org/wiki/']) 170 | mod_switch = re.search(r'wikibot moderator switch: summon only: on',post.body.lower()) or re.search(r'wikibot moderator switch: summon only: off',post.body.lower()) or re.search(r'wikibot moderator switch: root only: on',post.body.lower()) or re.search(r'wikibot moderator switch: root only: off',post.body.lower()) 171 | if has_link or summary_call or mod_switch: 172 | if re.search(r">", post.body) and not summary_call and not re.search(r"autowikibot-welcome-token", post.body.lower()): 173 | return False 174 | elif re.search(r"wikipedia.org/wiki/.*wikipedia.org/wiki/", post.body, re.DOTALL): 175 | return False 176 | elif str(post.subreddit) in badsubs and not mod_switch: 177 | return False 178 | elif any(string in post.body for string in ['/wiki/File:', '/wiki/List_of', '/wiki/User:', '/wiki/Template:', '/wiki/Category:', '/wiki/Wikipedia:', '/wiki/Talk:']): 179 | return False 180 | elif str(post.subreddit) in root_only_subs and not post.is_root and not mod_switch: 181 | return False 182 | elif str(post.subreddit) in summon_only_subs and not summary_call and not mod_switch: 183 | return False 184 | if is_summon_chain(post): 185 | warn('SKIPPED CHAINED REPLY') 186 | return False 187 | elif is_already_done(post): 188 | return False 189 | elif comment_limit_reached(post): 190 | try: 191 | title = "COMMENT LIMIT " + "/r/"+str(post.subreddit) 192 | suburl = str(post.submission.short_link) 193 | r.submit('acini',title,url=suburl) 194 | except: 195 | pass 196 | return False 197 | else: 198 | return True 199 | 200 | def get_url_string(post): 201 | try: 202 | after_split = post.body.split("wikipedia.org/wiki/")[1] 203 | for e in ['\n', ' ']: 204 | after_split = after_split.split(e)[0] 205 | if after_split.endswith(')') and not re.search(r'\(',after_split): 206 | after_split = after_split.split(')')[0] 207 | if re.search(r'\)',after_split) and not re.search(r'\(',after_split): 208 | after_split = after_split.split(')')[0] 209 | return after_split 210 | except: 211 | pass 212 | 213 | def process_summary_call(post): 214 | #special("__________________________________________________") 215 | #special("SUMMARY CALL: %s"%post.id) 216 | replacedbody = post.body.lower().replace('wikibot','___uawb___wikibot') 217 | if re.search(r'wikibot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',replacedbody): 218 | post_body = re.sub(r'wikibot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',r'\2',replacedbody).split('___uawb___')[1].split('.')[0].split('?')[0] 219 | term = post_body.strip() 220 | elif re.search(r'wikibot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',replacedbody): 221 | post_body = re.sub(r'wikibot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',r'\3',replacedbody).split('___uawb___')[1].split('.')[0].split('?')[0] 222 | term = post_body.strip() 223 | elif re.search("\?\-.*\-\?",replacedbody): 224 | term = re.search("\?\-.*\-\?",post.body.lower()).group(0).strip('?').strip('-').strip() 225 | 226 | special("SUMMARY CALL: %s @ %s"%(filter(lambda x: x in string.printable, term),post.id)) 227 | if term.lower().strip() == 'love': 228 | #post_reply('*Baby don\'t hurt me! Now seriously, stop asking me about love so many times! O.o What were we discussing about in this thread again?*',post) 229 | return(False,False) 230 | #if term.lower().strip() == 'wikibot': 231 | #post_reply('*Me! I know me.*',post) 232 | return(False,False) 233 | if term.lower().strip() == 'reddit': 234 | #post_reply('*This place. It feels like home.*',post) 235 | return(False,False) 236 | if term.strip().__len__() < 2 or term == None: 237 | #log("EMPTY TERM") 238 | return(False,False) 239 | try: 240 | title = wikipedia.page(term,auto_suggest=False).title 241 | if title.lower() == term: 242 | bit_comment_start = "" 243 | elif title.lower() != term: 244 | try: 245 | discard = wikipedia.page(term,auto_suggest=False,redirect=False).title 246 | except Exception as e: 247 | if re.search('resulted in a redirect',str(e)): 248 | bit_comment_start = "*\"" + term.strip() + "\" redirects to* " 249 | else: 250 | bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* " 251 | if re.search(r'#',title): 252 | url = wikipedia.page(title.split('#')[0],auto_suggest=False).url 253 | sectionurl = url + "#" + title.split('#')[1] 254 | comment = "*Nearest match for* ***" + term.strip() + "*** *is the section ["+title.split('#')[1]+"]("+sectionurl.replace(')','\)')+") in article ["+title.split('#')[0]+"]("+url+").*\n\n---\n\n" 255 | post_reply(comment,post) 256 | #log("RELEVANT SECTION SUGGESTED: %s"%filter(lambda x: x in string.printable, title)) 257 | return (False,False) 258 | url_string = title 259 | #log("INTERPRETATION: %s"%filter(lambda x: x in string.printable, title)) 260 | return (url_string,bit_comment_start) 261 | except Exception as e: 262 | if bool(re.search('.*may refer to:.*',filter(lambda x: x in string.printable, str(e)))): 263 | deflist = ">Definitions for few of those terms:" 264 | for idx, val in enumerate(filter(lambda x: x in string.printable, str(e)).split('may refer to: \n')[1].split('\n')): 265 | deflist = deflist + "\n\n>1. **"+val.strip()+"**: "+ wikipedia.summary(val,auto_suggest=False,sentences=1) 266 | if idx > 3: 267 | break 268 | summary = "*Oops,* ***"+term.strip()+"*** *landed me on a disambiguation page.*\n\n---\n\n"+deflist+"\n\n---\n\n" 269 | #log("ASKING FOR DISAMBIGUATION") 270 | else: 271 | #log("INTERPRETATION FAIL: %s"%filter(lambda x: x in string.printable, term)) 272 | try: 273 | terms = "\""+term+"\"" 274 | suggesttitle = str(wikipedia.search(terms,results=1)[0]) 275 | #log("SUGGESTING: %s"%filter(lambda x: x in string.printable, suggesttitle)) 276 | if suggesttitle.lower() == term: 277 | bit_comment_start = "" 278 | else: 279 | bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* " 280 | if str(suggesttitle).endswith(')') and not re.search('\(',str(suggesttitle)): 281 | suggesttitle = suggesttitle[0:--(suggesttitle.__len__()-1)] 282 | return (str(suggesttitle),bit_comment_start) 283 | except: 284 | trialtitle = wikipedia.page(term,auto_suggest=True).title 285 | if trialtitle.lower() == term: 286 | bit_comment_start = "" 287 | else: 288 | bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* " 289 | #log("TRIAL SUGGESTION: %s"%filter(lambda x: x in string.printable, trialtitle)) 290 | if str(trialtitle).endswith(')') and not re.search('\(',str(trialtitle)): 291 | trialtitle = trialtitle[0:--(trialtitle.__len__()-1)] 292 | return (str(trialtitle),bit_comment_start) 293 | post_reply(summary,post) 294 | return (False,False) 295 | 296 | def clean_soup(soup): 297 | while soup.table: 298 | discard = soup.table.extract() 299 | while soup.find(id='coordinates'): 300 | discard = soup.find(id='coordinates').extract() 301 | while soup.find("strong", { "class" : "error mw-ext-cite-error" }): 302 | discard = soup.find("strong", { "class" : "error mw-ext-cite-error" }).extract() 303 | while soup.find("sup", { "class" : "reference" }): 304 | discard = soup.find("sup", { "class" : "reference" }).extract() 305 | while soup.find("span", { "class" : "t_nihongo_help noprint" }): 306 | discard = soup.find("span", { "class" : "t_nihongo_help noprint" }).extract() 307 | while soup.find("span", { "class" : "sortkey" }): 308 | discard = soup.find("span", { "class" : "sortkey" }).extract() 309 | 310 | for tag in soup: 311 | if tag.name == 'a' and tag.has_attr('href'): 312 | rep = "["+tag.text+"]("+tag['href']+")" 313 | discard = tag.replace_with(rep) 314 | return soup 315 | 316 | def reddify(html): 317 | global has_list 318 | if re.search('<li>',html): 319 | has_list = True 320 | else: 321 | has_list = False 322 | html = html.replace('<b>', '__') 323 | html = html.replace('</b>', '__') 324 | html = html.replace('<i>', '*') 325 | html = html.replace('</i>', '*') 326 | if '__*' in html and '*__' in html: 327 | html = html.replace('__*', '___') 328 | html = html.replace('*__', '___') 329 | html = re.sub('<sup>','^',html) 330 | html = re.sub('<sup.*?>',' ',html) 331 | html = html.replace('</sup>','') 332 | html = html.replace('<dt>','<p>') 333 | html = html.replace('</dt>','</p>') 334 | html = html.replace('<ul>','<p>') 335 | html = html.replace('</ul>','</p>') 336 | html = html.replace('<ol>','<p>') 337 | html = html.replace('</ol>','</p>') 338 | html = html.replace('<dd>','<p>>') 339 | html = html.replace('</dd>','</p> ') 340 | html = html.replace('<li>','<p>* ') 341 | html = html.replace('</li>','</p>') 342 | html = html.replace('<blockquote>','<p>>') 343 | html = html.replace('</blockquote>','</p> ') 344 | return html 345 | 346 | def strip_wiki(wiki): 347 | wiki = re.sub('\[[0-9]\][^(]','',wiki) 348 | wiki = re.sub('\[[0-9][0-9]\][^(]','',wiki) 349 | wiki = re.sub('\[[0-9][0-9][0-9]\][^(]','',wiki) 350 | wiki = re.sub("\( listen\)", '', wiki) 351 | return wiki 352 | 353 | def truncate(data, length): 354 | if data.__len__() > length: 355 | log("TEXT CUT AT %s CHARACTERS"%length) 356 | data = data[0:length]+" ... \n`(Truncated at "+str(length)+" characters)`" 357 | return data 358 | else: 359 | return data 360 | 361 | def process_brackets_links(string): 362 | string = ("%s)"%string) 363 | string = string.replace("\\", "") 364 | return string 365 | 366 | def process_brackets_syntax(string): 367 | string = string.replace("\\", "") 368 | string = ("%s\)"%string) 369 | return string 370 | 371 | ### declare variables 372 | load_data() 373 | im = pyimgur.Imgur(imgur_client_id) 374 | global pagepropsdata 375 | submissioncount = collections.Counter() 376 | lastload = int(float(time.strftime("%s"))) 377 | has_list = False 378 | totalposted = 0 379 | 380 | while True: 381 | try: 382 | #comments = r.get_comments("all",limit = 1000) 383 | #for post in comments: 384 | for post in praw.helpers.comment_stream(r,str(sys.argv[1]), limit = None, verbosity=0): 385 | 386 | ### Dirty timer hack 387 | now = int(float(time.strftime("%s"))) 388 | diff = now - lastload 389 | if diff > 899: 390 | banned_users = banned_users_page.content_md.strip().split() 391 | bluelog("BANNED USER LIST RENEWED") 392 | save_changing_variables('scheduled dump') 393 | lastload = now 394 | 395 | if filterpass(post): 396 | if mod_switch: 397 | try: 398 | mod_switch_summon_on = re.search(r'wikibot moderator switch: summon only: on',post.body.lower()) 399 | mod_switch_summon_off = re.search(r'wikibot moderator switch: summon only: off',post.body.lower()) 400 | mod_switch_root_on = re.search(r'wikibot moderator switch: root only: on',post.body.lower()) 401 | mod_switch_root_off = re.search(r'wikibot moderator switch: root only: off',post.body.lower()) 402 | 403 | mods = r.get_moderators(str(post.subreddit)) 404 | is_mod = False 405 | for idx in range(0,len(mods)): 406 | if mods[idx].name == post.author.name: 407 | is_mod = True 408 | break 409 | if is_mod: 410 | if mod_switch_summon_on: 411 | if str(post.subreddit) in summon_only_subs: 412 | comment = "*Summon only feature is already* ***ON*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n" 413 | else: 414 | summon_only_subs.append(str(post.subreddit)) 415 | if str(post.subreddit) in badsubs: 416 | badsubs.remove(str(post.subreddit)) 417 | editsummary = 'added '+str(post.subreddit)+', reason:mod_switch_summon_on' 418 | save_changing_variables(editsummary) 419 | comment = "*Summon only feature switched* ***ON*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n" 420 | elif mod_switch_summon_off: 421 | if str(post.subreddit) not in summon_only_subs: 422 | comment = "*Summon only feature is already* ***OFF*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n" 423 | else: 424 | badsubs = badsubs_page.content_md.strip().split() 425 | summon_only_subs.remove(str(post.subreddit)) 426 | if str(post.subreddit) in badsubs: 427 | badsubs.remove(str(post.subreddit)) 428 | editsummary = 'removed '+str(post.subreddit)+', reason:mod_switch_summon_off' 429 | save_changing_variables(editsummary) 430 | comment = "*Summon only feature switched* ***OFF*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n" 431 | elif mod_switch_root_on: 432 | if str(post.subreddit) in root_only_subs: 433 | comment = "*Root only feature is already* ***ON*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n" 434 | else: 435 | root_only_subs.append(str(post.subreddit)) 436 | if str(post.subreddit) in badsubs: 437 | badsubs.remove(str(post.subreddit)) 438 | editsummary = 'added '+str(post.subreddit)+', reason:mod_switch_root_on' 439 | save_changing_variables(editsummary) 440 | comment = "*Root only feature switched* ***ON*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n" 441 | elif mod_switch_root_off: 442 | if str(post.subreddit) not in root_only_subs: 443 | comment = "*Root only feature is already* ***OFF*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n" 444 | else: 445 | badsubs = badsubs_page.content_md.strip().split() 446 | root_only_subs.remove(str(post.subreddit)) 447 | if str(post.subreddit) in badsubs: 448 | badsubs.remove(str(post.subreddit)) 449 | editsummary = 'removed '+str(post.subreddit)+', reason:mod_switch_root_off' 450 | save_changing_variables(editsummary) 451 | comment = "*Root only feature switched* ***OFF*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n" 452 | else: 453 | comment = False 454 | 455 | if comment: 456 | a = post_reply(comment,post) 457 | title = "MODSWITCH: %s"%str(post.subreddit) 458 | subtext = "/u/"+str(post.author.name)+": @ [comment]("+post.permalink+")\n\n"+str(post.body)+"\n\n---\n\n"+comment 459 | r.submit('acini',title,text=subtext) 460 | if a: 461 | special("MODSWITCH: %s @ %s"%(comment.replace('*',''),post.id)) 462 | else: 463 | fail("MODSWITCH REPLY FAILED: %s @ %s"%(comment,post.id)) 464 | title = "MODSWITCH REPLY FAILED: %s"%str(post.subreddit) 465 | subtext = "/u/"+str(post.author.name)+": @ [comment]("+post.permalink+")\n\n"+str(post.body)+"\n\n---\n\n"+comment 466 | r.submit('acini',title,text=subtext) 467 | else: 468 | if post.subreddit not in badsubs: 469 | comment = "*Moderator switches can only be switched ON and OFF by moderators of this subreddit.*\n\n*If you want specific feature turned ON or OFF, [ask the moderators](/message/compose?to=%2Fr%2F"+str(post.subreddit)+") and provide them with [this link](http://www.np.reddit.com/r/autowikibot/wiki/modfaqs).*\n\n---\n\n" 470 | post_reply(comment,post) 471 | except Exception as e: 472 | title = "MODSWITCH FAILURE !!: %s"%str(post.subreddit) 473 | traceback.print_exc() 474 | subtext = "/u/"+str(post.author.name)+": @ [comment]("+post.permalink+")\n\n"+str(post.body)+"\n\n---\n\n"+str(e) 475 | r.submit('acini',title,text=subtext) 476 | continue 477 | elif has_link: 478 | url_string = get_url_string(post) 479 | #log("__________________________________________________") 480 | #log("LINK TRIGGER: %s"%post.id) 481 | bit_comment_start = "" 482 | else: 483 | try: 484 | url_string = "" 485 | url_string, bit_comment_start = process_summary_call(post) 486 | if url_string == False: 487 | continue 488 | url_string = str(url_string) 489 | except Exception as e: 490 | if bool(re.search('.*may refer to:.*',filter(lambda x: x in string.printable, str(e)))): 491 | deflist = ">Definitions for few of those terms:" 492 | for idx, val in enumerate(filter(lambda x: x in string.printable, str(e)).split('may refer to: \n')[1].split('\n')): 493 | deflist = deflist + "\n\n>1. **"+val.strip()+"**: "+ wikipedia.summary(val,auto_suggest=False,sentences=1) 494 | if idx > 3: 495 | break 496 | summary = "*Oops,* ***"+url_string.strip()+"*** *landed me on a disambiguation page.*\n\n---\n\n"+deflist+"\n\n---\n\n" 497 | #log("ASKING FOR DISAMBIGUATION") 498 | post_reply(summary,post) 499 | continue 500 | if not url_string: 501 | continue 502 | 503 | article_name_terminal = None 504 | 505 | is_section = False 506 | ### check for subheading in url string, process if present 507 | if re.search(r"#",url_string) and not summary_call: 508 | pagenameraw = url_string.split('#')[0] 509 | pagename = pagenameraw.replace(')','\)') 510 | pagename = pagename.replace('(','\(') 511 | pagename = pagename.strip().replace('.','%') 512 | pagename = urllib.unquote(str(pagename)) 513 | sectionnameraw = url_string.split('#')[1] 514 | sectionname = sectionnameraw.replace('(','\(') 515 | sectionname = sectionname.replace(')','\)') 516 | sectionname = sectionname.strip().replace('.','%') 517 | sectionname = urllib.unquote(str(sectionname)) 518 | try: 519 | url = ("https://en.wikipedia.org/w/api.php?action=parse&page="+pagename.encode('utf-8','ignore')+"&format=xml&prop=sections") 520 | socket.setdefaulttimeout(30) 521 | slsoup = BeautifulSoup(urllib2.urlopen(url).read()) 522 | if slsoup.find_all('s').__len__() == 0: 523 | raise Exception("no sections found") 524 | for s in slsoup.find_all('s'): 525 | if s['anchor'] == sectionnameraw: 526 | section = str(s['index']) 527 | bit_comment_start = "Section "+section+". [**"+sectionname.decode('utf-8','ignore').replace('_',' ')+"**](https://en.wikipedia.org/wiki/"+url_string+") of article " 528 | url_string = pagenameraw 529 | url = ("https://en.wikipedia.org/w/api.php?action=parse&page="+pagename.encode('utf-8','ignore')+"&format=xml&prop=images§ion="+section) 530 | sisoup = BeautifulSoup(urllib2.urlopen(url).read()) 531 | try: 532 | page_image = sisoup.img.text 533 | except: 534 | page_image = "" 535 | pic_markdown = "Image from section" 536 | 537 | while url_string.endswith('))'): 538 | url_string = url_string.replace('))',')') 539 | 540 | url_string_for_fetch = url_string.replace('_', '%20').replace("\\", "") 541 | url_string_for_fetch = url_string_for_fetch.replace(' ', '%20').replace("\\", "") 542 | article_name = url_string.replace('_', ' ') 543 | article_name_terminal = article_name.decode('utf-8','ignore') 544 | ### In case user comments like "/wiki/Article.", remove last 1 letter 545 | if url_string_for_fetch.endswith(".") or url_string_for_fetch.endswith("]"): 546 | url_string_for_fetch = url_string_for_fetch[0:--(url_string_for_fetch.__len__()-1)] 547 | is_section = True 548 | break 549 | except Exception as e: 550 | #traceback.print_exc() 551 | fail(e) 552 | continue 553 | 554 | if article_name_terminal == None and not summary_call: 555 | #log("MALFORMATTED LINK") 556 | #notify = '*Hey '+post.author.name+', that Wikipedia link is probably malformatted.*\n\n---\n\n' 557 | #post_reply(notify,post) 558 | continue 559 | log("ARTICLE: %s / SECTION #%s @ %s"%(filter(lambda x: x in string.printable, article_name_terminal),section,post.id)) 560 | else: 561 | section = 0 562 | pic_markdown = "Image" 563 | while url_string.endswith('))'): 564 | url_string = url_string.replace('))',')') 565 | 566 | url_string_for_fetch = url_string.replace('_', '%20').replace("\\", "") 567 | url_string_for_fetch = url_string_for_fetch.replace(' ', '%20').replace("\\", "") 568 | article_name = url_string.replace('_', ' ') 569 | while url_string_for_fetch.endswith('))'): 570 | url_string_for_fetch = url_string_for_fetch.replace('))',')') 571 | 572 | 573 | ### In case user comments like "/wiki/Article.", remove last 1 letter 574 | if url_string_for_fetch.endswith(".") or url_string_for_fetch.endswith("]"): 575 | url_string_for_fetch = url_string_for_fetch[0:--(url_string_for_fetch.__len__()-1)] 576 | url = ("https://en.wikipedia.org/w/api.php?action=query&titles="+url_string_for_fetch+"&prop=pageprops&format=xml") 577 | try: 578 | socket.setdefaulttimeout(30) 579 | pagepropsdata = urllib2.urlopen(url).read() 580 | pagepropsdata = pagepropsdata.decode('utf-8','ignore') 581 | ppsoup = BeautifulSoup(pagepropsdata) 582 | article_name_terminal = ppsoup.page['title'] 583 | except: 584 | try: 585 | article_name_terminal = article_name.replace('\\', '') 586 | except: 587 | article_name_terminal = article_name.replace('\\', '').decode('utf-8','ignore') 588 | 589 | article_name_terminal = urllib.unquote(article_name_terminal) 590 | while article_name_terminal.endswith('))'): 591 | article_name_terminal = article_name_terminal.replace('))',')') 592 | log("ARTICLE: %s @ %s"%(filter(lambda x: x in string.printable, article_name_terminal),post.id)) 593 | 594 | try: 595 | page_image = ppsoup.pageprops["page_image"] 596 | except: 597 | page_image = "" 598 | 599 | if article_name_terminal == None and not summary_call: 600 | #log("MALFORMATTED LINK") 601 | #notify = '*Hey '+post.author.name+', that Wikipedia link is probably malformatted.*' 602 | #post_reply(notify,post) 603 | continue 604 | 605 | 606 | ### fetch data from wikipedia 607 | url = ("https://en.wikipedia.org/w/api.php?action=parse&page="+url_string_for_fetch+"&format=xml&prop=text§ion="+str(section)+"&redirects") 608 | try: 609 | socket.setdefaulttimeout(30) 610 | sectiondata = urllib2.urlopen(url).read() 611 | sectiondata = sectiondata.decode('utf-8','ignore') 612 | sectiondata = reddify(sectiondata) 613 | soup = BeautifulSoup(sectiondata) 614 | soup = BeautifulSoup(soup.text) 615 | sectionnsoup = soup 616 | except Exception as e: 617 | #fail("FETCH: %s"%e) 618 | continue 619 | 620 | soup = clean_soup(soup) 621 | 622 | ### extract paragraph 623 | try: 624 | if soup.p.text.__len__() < 500: 625 | all_p = soup.find_all('p') 626 | wt = "" 627 | for idx, val in enumerate(all_p): 628 | s = all_p[idx] 629 | for tag in s: 630 | if tag.name == 'a' and tag.has_attr('href'): 631 | urlstart = "" 632 | if re.search('#cite',tag['href']): 633 | tag.replace_with('') 634 | continue 635 | elif re.search('/wiki/',tag['href']): 636 | urlstart = "https://en.wikipedia.org" 637 | elif re.search('#',tag['href']): 638 | tag.unwrap() 639 | continue 640 | elif not re.search(r'^http://',tag['href']): 641 | tag.replace_with(tag.text) 642 | continue 643 | rep = "["+tag.text+"]("+urlstart+tag['href'].replace(')','\)')+")" 644 | discard = tag.replace_with(rep) 645 | wt = (wt+"\n\n>"+s.text) # Post 3 paragraphs 646 | data = wt 647 | if has_list: 648 | para = 100 649 | else: 650 | para = 1 651 | if idx > para: 652 | break 653 | else: 654 | s = soup.p 655 | for tag in s: 656 | if tag.name == 'a' and tag.has_attr('href'): 657 | urlstart = "" 658 | if re.search('#cite',tag['href']): 659 | tag.replace_with('') 660 | continue 661 | elif re.search('/wiki/',tag['href']): 662 | urlstart = "https://en.wikipedia.org" 663 | elif re.search('#',tag['href']): 664 | tag.unwrap() 665 | continue 666 | elif not re.search(r'^http://',tag['href']): 667 | tag.replace_with(tag.text) 668 | continue 669 | rep = "["+tag.text+"]("+urlstart+tag['href'].replace(')','\)')+")" 670 | discard = tag.replace_with(rep) 671 | data = s.text #Post only first paragraph 672 | except Exception as e: 673 | #fail("TEXT PACKAGE FAIL: %s"%e) 674 | if summary_call: 675 | try: 676 | term = url_string 677 | tell_me_text = wikipedia.summary(term,auto_suggest=False,redirect=True) 678 | tell_me_link = wikipedia.page(term,auto_suggest=False).url 679 | title = wikipedia.page(term,auto_suggest=False).title 680 | if bool(re.search(title,tell_me_text)): 681 | summary = re.sub(title,"[**"+title+"**]("+tell_me_link+")",tell_me_text) 682 | else: 683 | summary = "[**"+title+"**](" + tell_me_link + "): " + tell_me_text 684 | #log("INTERPRETATION: %s"%filter(lambda x: x in string.printable, title)) 685 | if re.search(r'#',title): 686 | summary = wikipedia.page(title.split('#')[0]).section(title.split('#')[1]) 687 | if summary == None or str(filter(lambda x: x in string.printable, summary)).strip() == "": 688 | page_url = wikipedia.page(title.split('#')[0]).url 689 | summary = "Sorry, I failed to fetch the section, but here's the link: "+page_url+"#"+title.split('#')[1] 690 | if re.search(r'(',page_url): 691 | page_url = process_brackets_links(page_url) 692 | comment = "*Here you go:*\n\n---\n\n>\n"+summary+"\n\n---\n\n" 693 | post_reply(comment,post) 694 | continue 695 | except Exception as e: 696 | if bool(re.search('.*may refer to:.*',filter(lambda x: x in string.printable, str(e)))): 697 | deflist = ">Definitions for few of those terms:" 698 | for idx, val in enumerate(filter(lambda x: x in string.printable, str(e)).split('may refer to: \n')[1].split('\n')): 699 | deflist = deflist + "\n\n>1. **"+val.strip()+"**: "+ wikipedia.summary(val,auto_suggest=False,sentences=1) 700 | if idx > 3: 701 | break 702 | #comment = "*Oops,* ***"+process_brackets_syntax(url_string).strip()+"*** *landed me on a disambiguation page.*\n\n---"+deflist+"\n\n---\n\nAnd the remaining list:\n\n"+str(e).replace('\n','\n\n>')+"\n\n---\n\n" 703 | summary = "*Oops,* ***"+process_brackets_syntax(url_string).strip()+"*** *landed me on a disambiguation page.*\n\n---\n\n"+deflist+"\n\n---\n\n" 704 | #log("ASKING FOR DISAMBIGUATION") 705 | else: 706 | #log("INTERPRETATION FAIL: %s"%term) 707 | try: 708 | terms = "\""+term+"\"" 709 | suggest = wikipedia.search(terms,results=1)[0] 710 | trialsummary = wikipedia.summary(suggest,auto_suggest=True) 711 | comment = "*Nearest match for* ***"+term.trim()+"*** *is* ***"+suggest+"*** :\n\n---\n\n>"+trialsummary+"\n\n---\n\n" 712 | #log("SUGGESTING %s"%suggest) 713 | except: 714 | comment = "*Sorry, couldn't find a wikipedia article about that or maybe I couldn't process that due to Wikipedia server errors.*\n\n---\n\n" 715 | #log("COULD NOT SUGGEST FOR %s"%term) 716 | post_reply(comment,post) 717 | continue 718 | continue 719 | data = strip_wiki(data) 720 | data = re.sub("Cite error: There are ref tags on this page, but the references will not show without a \{\{reflist\}\} template \(see the help page\)\.", '', data) 721 | #truncateddata = truncate(data,1000) 722 | if data.__len__() < 50: 723 | #log("TOO SMALL INTRODUCTION PARAGRAPH") 724 | continue 725 | #success("TEXT PACKAGED") 726 | 727 | ### Fetch page image from wikipedia 728 | try: 729 | ### Extract image url 730 | try: 731 | page_image = urllib.unquote(page_image.decode('utf-8','ignore')) 732 | except: 733 | raise Exception("no page image") 734 | if page_image.endswith("ogg") or page_image == "": 735 | raise Exception("no image") 736 | url = ("https://en.wikipedia.org/w/api.php?action=query&titles=File:"+page_image+"&prop=imageinfo&iiprop=url|mediatype&iiurlwidth=640&format=xml") 737 | socket.setdefaulttimeout(30) 738 | wi_api_data = urllib2.urlopen(url).read() 739 | wisoup = BeautifulSoup(wi_api_data) 740 | image_url = wisoup.ii['thumburl'] 741 | image_source_url = wisoup.ii['descriptionurl'] 742 | image_source_url = re.sub(r'\)','\)',image_source_url) 743 | image_source_url = re.sub(r'\(','\(',image_source_url) 744 | global image_source_markdown 745 | image_source_markdown = ("[^(i)]("+image_source_url+")") 746 | 747 | ### Upload to imgur 748 | uploaded_image = im.upload_image(url=image_url, title=page_image) 749 | 750 | ### Extract caption from already fetched sectiondata 751 | try: 752 | caption_div = sectionnsoup.find("div", { "class" : "thumbcaption" }) 753 | if caption_div is None: 754 | raise Exception("caption not packaged: no caption found in section 0") 755 | if page_image not in str(caption_div.find("div", { "class" : "magnify" })): 756 | raise Exception("caption not packaged: page image not in section 0") 757 | discard = caption_div.find("div", { "class" : "magnify" }).extract() 758 | caption = caption_div.text.strip() 759 | caption = strip_wiki(caption) 760 | caption = re.sub(r'\)','\)',caption) 761 | caption = re.sub(r'\(','\(',caption) 762 | caption = re.sub(r'\*','',caption) 763 | caption = re.sub(r'\n',' ',caption) 764 | if caption != "": 765 | caption_markdown = (" - *"+caption+"*") 766 | caption_div = None 767 | #success("CAPTION PACKAGED") 768 | else: 769 | raise Exception("caption not packaged: no caption found in section 0") 770 | except Exception as e: 771 | if str(e) == "caption not packaged: page image has no caption": 772 | pic_markdown = "Image" 773 | elif str(e) == "caption not packaged: page image not in section 0": 774 | pic_markdown = "Image from article" 775 | caption_markdown = "" 776 | #log(e) 777 | image_markdown = ("====\n\n>[**"+pic_markdown+"**]("+uploaded_image.link.replace('http://','https://')+") "+image_source_markdown+caption_markdown) 778 | #success("IMAGE PACKAGED VIA %s"%uploaded_image.link) 779 | except Exception as e: 780 | image_markdown = "" 781 | #traceback.print_exc() 782 | #log("IMAGE: %s"%str(e).strip().replace('\n','')) 783 | 784 | ###Interesting articles 785 | try: 786 | intlist = wikipedia.search(article_name_terminal,results=5) 787 | if intlist.__len__() > 1: 788 | if article_name_terminal in intlist: 789 | intlist.remove(article_name_terminal) 790 | interesting_list = "" 791 | for topic in intlist: 792 | try: 793 | topicurl = wikipedia.page(topic,auto_suggest=False).url.replace('(','\(').replace(')','\)') 794 | except: 795 | continue 796 | topic = topic.replace(' ',' ^').replace(' ^(',' ^\(') 797 | interesting_list = interesting_list + " [^" + topic + "]" + "(" +topicurl.replace('http://','https://')+ ") ^|" 798 | interesting_markdown = "^Interesting:"+interesting_list.strip('^|') 799 | #success("%s INTERESTING ARTICLE LINKS PACKAGED"%intlist.__len__()) 800 | else: 801 | raise Exception("no suggestions") 802 | except Exception as e: 803 | interesting_markdown = "" 804 | #traceback.print_exc() 805 | #log("INTERESTING ARTICLE LINKS NOT PACKAGED: %s"%str(e).strip().replace('\n','')) 806 | 807 | ###NSFW tagging 808 | #badwords = getnsfw(data) 809 | badwords = None #mark all articles as sfw for now 810 | if badwords: 811 | badlist = '' 812 | for word in badwords: 813 | badlist = badlist + word + ',%20' 814 | nsfwurl = "/message/compose?to=%28This%20is%20a%20placeholder%29/r/autowikibot&subject="+str(len(badwords))+"%20NSFW%20words%20are%20present%20in%20this%20comment:&message="+badlist.strip(',%20')+"%0a%0aIf%20you%20think%20any%20of%20word/s%20above%20is%20SFW,%20forward%20this%20message%20to%20/r/autowikibot%20%28keep%20the%20subject%20unchanged%29%0a%0acontext:"+str(post.permalink) 815 | nsfwtag = " [](#nsfw-start)**^NSFW** [^^(?)]("+nsfwurl+")[](#nsfw-end)" 816 | #success("FOUND %s NSFW WORDS"%str(len(badwords))) 817 | else: 818 | nsfwtag = " [](#sfw)" 819 | 820 | post_markdown = bit_comment_start+" [**"+article_name_terminal+"**](https://en.wikipedia.org/wiki/"+url_string_for_fetch.replace(')','\)')+"):"+nsfwtag+" \n\n---\n\n>"+data+"\n\n>"+image_markdown+"\n\n---\n\n"+interesting_markdown+"\n\n" 821 | a = post_reply(post_markdown,post) 822 | image_markdown = "" 823 | if not a: 824 | continue 825 | 826 | except KeyboardInterrupt: 827 | save_changing_variables('exit dump') 828 | warn("EXITING") 829 | break 830 | except Exception as e: 831 | traceback.print_exc() 832 | warn("GLOBAL: %s"%e) 833 | time.sleep(3) 834 | continue 835 | 836 | --------------------------------------------------------------------------------