├── requirements.txt
├── util.py
├── .gitignore
├── LICENCE
├── README.md
├── autowikibot-remover.py
└── autowikibot-commenter.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | praw
2 | pyimgur
3 | beautifulsoup4
4 | wikipedia
5 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
 1 | ### File borrowed from Zack Maril @ https://github.com/zmaril
 2 | import re, time
 3 | 
 4 | def formatted(*args):
 5 |     now = time.strftime("%Y-%m-%d %H:%M:%S")
 6 |     return "["+now+"] "+" ".join(map(str,args))
 7 | 
 8 | 
 9 | def log(*args):
10 |     print apply(formatted,args)
11 | 
12 | def fail(*args):
13 |     print '\033[91m'+apply(formatted,args)+'\033[0m'
14 | 
15 | def warn(*args):
16 |     print '\033[93m'+apply(formatted,args)+'\033[0m'
17 | 
18 | def success(*args):
19 |     print '\033[92m'+apply(formatted,args)+'\033[0m'
20 |     
21 | def special(*args):
22 |     print '\033[95m'+apply(formatted,args)+'\033[0m'
23 |     
24 | def bluelog(*args):
25 |     print '\033[94m'+apply(formatted,args)+'\033[0m'


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # PyInstaller
25 | #  Usually these files are written by a python script from a template
26 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
27 | *.manifest
28 | *.spec
29 | 
30 | # Installer logs
31 | pip-log.txt
32 | pip-delete-this-directory.txt
33 | 
34 | # Unit test / coverage reports
35 | htmlcov/
36 | .tox/
37 | .coverage
38 | .cache
39 | nosetests.xml
40 | coverage.xml
41 | 
42 | # Translations
43 | *.mo
44 | *.pot
45 | 
46 | # Django stuff:
47 | *.log
48 | 
49 | # Sphinx documentation
50 | docs/_build/
51 | 
52 | # PyBuilder
53 | target/
54 | 
55 | # PyCharm
56 | .idea/
57 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2014 Acini (alias)
 2 | 
 3 | Definitions
 4 | 
 5 | 1. A "Creator Instance" is a Reddit user account which is operated and
 6 | maintained by original author of this source code and is used by the
 7 | Program for processing and replying to comments.
 8 | 2. A subreddit where a Creator Instance has ability to submit comments
 9 | to is referred to as "Covered Subreddit".
10 | 
11 | Permission is hereby granted, free of charge, to any person
12 | obtaining a copy of this software and associated documentation
13 | files (the "Software"), to deal in the Software without restriction,
14 | including without limitation the rights to use, copy, modify,
15 | merge, publish, distribute, sublicense, and/or sell copies
16 | of the Software, and to permit persons to whom the Software
17 | is furnished to do so, subject to the following conditions:
18 | 
19 | 1. The above copyright notice and this permission notice shall
20 | be included in all copies or substantial portions of the Software.
21 | 
22 | 2. The source code (modified or unmodified) cannot be used for submitting comments
23 | in Covered Subreddits when such comments serve the same purpose. The souce code
24 | (modified or unmodified) cannot be used for purpose of harassing operator/s of
25 | Creator Instance. This clause will override any other clauses in this licence
26 | if it conflicts with them.
27 | 
28 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
30 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
31 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
32 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | AutoWikibot
 2 | ===========
 3 | 
 4 | Reddit bot that replies to comments with excerpt from linked wikipedia article or section.
 5 | 
 6 | Current instance:
 7 | [In action](http://www.reddit.com/u/autowikibot) |
 8 | [Subreddit](http://www.reddit.com/r/autowikibot/)
 9 | 
10 | Features
11 | ========
12 | 
13 | * Responds to comments like "wikibot, what is dancing?" and  "wikibot, tell me about enigma machine"
14 | * In-post summoning to keywords. e.g. I guess, OP should add some more ?- Liverwurst -? to the recipe
15 | * Suggest upto 4 related interesting articles
16 | * Deletes on parent commenter command
17 | * Deletes if comment score below threshold
18 | * User blacklist
19 | * Automated subreddit blacklisting on first HTTP 403 encountered
20 | 
21 | Requirements
22 | ============
23 | 
24 | Tested in Python 2.7.6
25 | 
26 | To install the required dependencies from PyPI, run `pip install -r requirements.txt` 
27 | from the command line. This will install the `praw`, `pyimgur`, `beautifulsoup4`, 
28 | `wikipedia` modules.
29 | 
30 | 
31 | Configuration
32 | =============
33 | 
34 | First, with your bot account, create 3 comments where they will not be removed by someone.
35 | 
36 | 1. comment with banned users list separated by single newline
37 | 2. comment with excluded subreddits (without /r/) list separated by single newline
38 | 3. comment with list of subreddits where bot will only reply to top level (root) comments
39 | 4. comment with a number indicating total number of posts made by bot. Set 0 at first setup.
40 | 
41 | Second, You need to create a file called datafile.inf and have following data in it on separate lines:
42 | 
43 | * reddit bot username
44 | * reddit bot account password
45 | * imgur client id
46 | * ID of comment with banned users list
47 | * ID of comment with excluded subreddits
48 | * ID of comment with root only subreddits
49 | * ID of comment with a number of total number of posts
50 | 
51 | The file will look something like this:
52 | 
53 | ````
54 | wikipedia_robot
55 | botspassword
56 | rt23rnsr2453fop
57 | cetagti
58 | cefsfs4
59 | cef43fs
60 | ce5gd56
61 | ````
62 | 
63 | License
64 | =========
65 | 
66 | This source code is available under a custom licence. See the accompanying file LICENCE.
67 | 


--------------------------------------------------------------------------------
/autowikibot-remover.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import praw, time, re, pickle, traceback, os
  3 | from util import success, warn, log, fail
  4 | 
  5 | ### Uncomment to debug
  6 | #import logging
  7 | #logging.basicConfig(level=logging.DEBUG)
  8 | 
  9 | ### Set root directory to script directory
 10 | abspath = os.path.abspath(__file__)
 11 | dname = os.path.dirname(abspath)
 12 | os.chdir(dname)
 13 | 
 14 | r = praw.Reddit("autowikibot by /u/acini at /r/autowikibot")
 15 | excludekeyword = "leave me alone"
 16 | includekeyword = "follow me again"
 17 | global banned_users
 18 | 
 19 | ### Login
 20 | with open ('datafile.inf', 'r') as myfile:
 21 |     datafile_lines=myfile.readlines()
 22 | USERNAME = datafile_lines[0].strip()
 23 | PASSWORD = datafile_lines[1].strip()
 24 | 
 25 | Trying = True
 26 | while Trying:
 27 |         try:
 28 |                 r.login(USERNAME, PASSWORD)
 29 |                 success("LOGGED IN")
 30 |                 Trying = False
 31 |         except praw.errors.InvalidUserPass:
 32 |                 fail("WRONG USERNAME OR PASSWORD")
 33 |                 exit()
 34 |         except Exception as e:
 35 | 	  fail(e)
 36 | 	  time.sleep(5)
 37 | 
 38 | ### Load saved data
 39 | try:
 40 |   banned_users_page = r.get_wiki_page('autowikibot','userblacklist')
 41 |   banned_users = banned_users_page.content_md.strip().split()
 42 |   deleted = 0
 43 |   success("DATA LOADED")
 44 | except Exception as e:
 45 |   fail(e)
 46 |   #traceback.print_exc()
 47 |   exit()
 48 | 
 49 |   
 50 |   
 51 | while True:
 52 |   try:
 53 |     
 54 |     ### Check inbox few times
 55 |     log("AUTODELETE CYCLES STARTED")
 56 |     for x in range(1, 11):
 57 |       log("CYCLE %s"%x)
 58 |       try:
 59 | 	unread = r.get_unread(limit=None)
 60 | 	for msg in unread:
 61 | 	  
 62 | 	  if re.search(r'\+delete\s', msg.body.lower()):
 63 | 	    try:
 64 | 	      id = re.findall(r'\+delete\s(.*?)$',msg.body.lower())[0]
 65 | 	      id = 't1_'+id
 66 | 	      comment = r.get_info(thing_id=id)
 67 | 	      comment_parent = r.get_info(thing_id=comment.parent_id)
 68 | 	      
 69 | 	      
 70 | 	      if msg.author.name == comment_parent.author.name or msg.author.name == 'acini':
 71 | 		comment.delete()
 72 | 		deleted+=1
 73 | 		#msg.reply('I have deleted [my comment]('+comment.permalink+'), which was reply to your [this comment]('+comment_parent.permalink+').\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot')
 74 | 		success("DELETION AT %s"%comment_parent.id)
 75 | 		msg.mark_as_read()
 76 | 	      else:
 77 | 		#msg.reply('Oops, only /u/'+str(comment_parent.author.name)+' can delete that [comment]('+comment.permalink+'). Downvote the comment if you think it is not helping.\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot')
 78 | 		fail("BAD DELETE REQUEST BY /u/%s"%str(msg.author.name))
 79 | 		msg.mark_as_read()
 80 | 	      continue
 81 | 	    except Exception as e:
 82 | 	      if (str(e)=="'NoneType' object has no attribute 'name'"):
 83 | 		comment.delete()
 84 | 		deleted+=1
 85 | 		#msg.reply('[My comment]('+comment.permalink+') which was reply to [this comment]('+comment_parent.permalink+') is also found orphan. I have deleted it as requested.\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot')
 86 | 		success("DELETION (ORPHAN) AT %s"%comment_parent.id)
 87 | 	      else:
 88 | 		fail("%s\033[1;m"%e)
 89 | 	      msg.mark_as_read()
 90 | 	      continue
 91 | 	  
 92 | 	  if re.search(r'\+toggle-nsfw\s', msg.body.lower()):
 93 | 	    try:
 94 | 	      id = re.findall(r'\+toggle-nsfw\s(.*?)$',msg.body.lower())[0]
 95 | 	      id = 't1_'+id
 96 | 	      comment = r.get_info(thing_id=id)
 97 | 	      comment_parent = r.get_info(thing_id=comment.parent_id)
 98 | 	      
 99 | 	      
100 | 	      if msg.author.name == comment_parent.author.name or msg.author.name == 'acini':
101 | 		if '[](#nsfw-toggled)' in comment.body.lower():
102 | 		  #msg.reply('Sorry, NSFW can be toggled only once for a particular comment.')
103 | 		  msg.mark_as_read()
104 | 		  continue
105 | 		
106 | 		elif '[](#nsfw-start)' in comment.body.lower():
107 | 		  nsfwstate = 'OFF'
108 | 		  nsfwurl = "http://www.reddit.com/message/compose?to=%28This%20is%20a%20placeholder%29&subject=NSFW%20toggled:&message=NSFW%20was%20toggled%20"+nsfwstate+"%20by%20parent%20commenter%20for%20this%20comment."
109 | 		  nsfwtag = " [](#sfw)[](#nsfw-toggled)"
110 | 		  replacedb = re.sub(r'\[\]\(\#nsfw-start\).*?\[\]\(\#nsfw-end\)',nsfwtag,comment.body).replace('&amp;','&').replace('&gt;','>').replace('^toggle ^NSFW','').replace('^or[](#or)','')
111 | 		  
112 | 		elif '[](#sfw)' in comment.body.lower():
113 | 		  nsfwstate = 'ON'
114 | 		  nsfwurl = "http://www.reddit.com/message/compose?to=%28This%20is%20a%20placeholder%29&subject=NSFW%20toggled:&message=NSFW%20was%20toggled%20"+nsfwstate+"%20by%20parent%20commenter%20for%20this%20comment."
115 | 		  nsfwtag = " [](#nsfw-start)**^NSFW** [^^(?)]("+nsfwurl+")[](#nsfw-end)[](#nsfw-toggled)"
116 | 		  replacedb = comment.body.replace('[](#sfw)',nsfwtag).replace('&amp;','&').replace('&gt;','>').replace('^toggle ^NSFW','').replace('^or[](#or)','')
117 | 		
118 | 		comment.edit(replacedb)
119 | 		##msg.reply('NSFW was toggled **'+nsfwstate+'** for [this comment]('+comment.permalink+').\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot')
120 | 		success("NSFW TOGGLE AT %s"%comment_parent.id)
121 | 		msg.mark_as_read()
122 | 	      else:
123 | 		##msg.reply('Oops, only /u/'+str(comment_parent.author.name)+' can toggle NSFW for that [comment]('+comment.permalink+'). \n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot')
124 | 		fail("BAD NSFW TOGGLE REQUEST BY /u/%s"%str(msg.author.name))
125 | 		msg.mark_as_read()
126 | 	      continue
127 | 	    except Exception as e:
128 | 	      if (str(e)=="'NoneType' object has no attribute 'name'"):
129 | 		comment.delete()
130 | 		deleted+=1
131 | 		##msg.reply('[My comment]('+comment.permalink+') which was reply to [this comment]('+comment_parent.permalink+') is also found orphan. I have deleted it as requested.\n\nHave an amazing day, '+str(msg.author.name)+'!\n\n-AutoWikibot')
132 | 		success("DELETION (ORPHAN) AT %s"%comment_parent.id)
133 | 	      else:
134 | 		fail("%s\033[1;m"%e)
135 | 	      msg.mark_as_read()
136 | 	      continue
137 | 	  ### Add user to exclude list
138 | 	  if re.search(excludekeyword, msg.body.lower()):
139 | 	    banned_users = banned_users_page.content_md.strip().split()
140 | 	    banned_users.append(msg.author.name)
141 | 	    banned_users.sort()
142 | 	    banned_users = list(set(banned_users))
143 | 	    banned_users.sort(reverse=True)
144 | 	    c_banned_users = ""
145 | 	    for item in banned_users:
146 | 	      c_banned_users = "    "+item+'\n'+c_banned_users
147 | 	    editsummary = 'added '+str(msg.author.name)
148 | 	    r.edit_wiki_page('autowikibot','userblacklist',c_banned_users,editsummary)
149 | 	    time.sleep(1)
150 | 	    msg.mark_as_read()
151 | 	    #msg.reply("*Done! I won't reply to your comments now. Allow me 15 minutes to put this in effect.*\n\n*Have a nice day!*")
152 | 	    
153 | 	    success("BANNED /u/%s AT %s"%(msg.author.name,msg.id))
154 | 	    
155 | 	  if re.search(includekeyword, msg.body.lower()):
156 | 	    msg.mark_as_read()
157 | 	    banned_users = banned_users_page.content_md.strip().split()
158 | 	    if msg.author.name in banned_users:
159 | 	      banned_users.remove(str(msg.author.name))
160 | 	      banned_users = list(set(banned_users))
161 | 	      banned_users.sort(reverse=True)
162 | 	      c_banned_users = ""
163 | 	      for item in banned_users:
164 | 		c_banned_users = "    "+item+'\n'+c_banned_users
165 | 	      editsummary = 'removed '+str(msg.author.name)
166 | 	      r.edit_wiki_page('autowikibot','userblacklist',c_banned_users,editsummary)
167 | 	      #msg.reply("*OK! I removed you from the blacklist. I will resume replying to your comments now.*")
168 | 	      success("UNBANNED /u/%s AT %s"%(msg.author.name,msg.id))
169 | 	    else:
170 | 	      #msg.reply("*Dear, you are not in the blacklist.*")
171 | 	      warn("BAD UNBAN REQUEST BY /u/%s AT %s"%(msg.author.name,msg.id))
172 | 	      
173 | 	log('Sleeping')
174 | 	time.sleep(60)
175 |       except Exception as e:
176 | 	traceback.print_exc()
177 | 	fail(e)
178 | 	time.sleep(60)
179 | 	continue
180 |     log("AUTODELETE CYCLES COMPLETED")
181 |     
182 |     log("COMMENT SCORE CHECK CYCLE STARTED")
183 |     user = r.get_redditor(USERNAME)
184 |     total = 0
185 |     upvoted = 0
186 |     unvoted = 0
187 |     downvoted = 0
188 |     for c in user.get_comments(limit=None):
189 |       
190 |       if len(str(c.score)) == 4:
191 | 	spaces = ""
192 |       if len(str(c.score)) == 3:
193 | 	spaces = " "
194 |       if len(str(c.score)) == 2:
195 | 	spaces = "  "
196 |       if len(str(c.score)) == 1:
197 | 	spaces = "   "
198 |       
199 |       total = total + 1
200 |       if c.score < 1 or '#placeholder-awb' in c.body.lower:
201 | 	c.delete()
202 | 	print "\033[1;41m%s%s\033[1;m"%(spaces,c.score),
203 | 	deleted = deleted + 1
204 | 	downvoted = downvoted + 1
205 |       elif c.score > 10:
206 | 	print "\033[1;32m%s%s\033[1;m"%(spaces,c.score),
207 | 	upvoted = upvoted + 1
208 |       elif c.score > 1:
209 | 	print "\033[1;34m%s%s\033[1;m"%(spaces,c.score),
210 | 	upvoted = upvoted + 1
211 |       elif c.score > 0:
212 | 	print "\033[1;30m%s%s\033[1;m"%(spaces,c.score),
213 | 	unvoted = unvoted + 1
214 |       
215 |     print ("")
216 |     log("COMMENT SCORE CHECK CYCLE COMPLETED")
217 |     urate = round(upvoted / float(total) * 100)
218 |     nrate = round(unvoted / float(total) * 100)
219 |     drate = round(downvoted / float(total) * 100)
220 |     warn("Upvoted:      %s\t%s\b\b %%"%(upvoted,urate))
221 |     warn("Unvoted       %s\t%s\b\b %%"%(unvoted,nrate))
222 |     warn("Downvoted:    %s\t%s\b\b %%"%(downvoted,drate))
223 |     warn("Total:        %s"%total)
224 |         
225 |   except KeyboardInterrupt:
226 |     log("EXITING")
227 |     break
228 |   except Exception as e:
229 |     #traceback.print_exc()
230 |     fail(e)
231 |     time.sleep(3)
232 |     continue
233 |   
234 | 


--------------------------------------------------------------------------------
/autowikibot-commenter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import praw, time, datetime, re, urllib, urllib2, pickle, pyimgur, os, traceback, wikipedia, string, socket, sys, collections
  4 | from nsfw import getnsfw
  5 | from util import success, warn, log, fail, special, bluelog
  6 | from bs4 import BeautifulSoup
  7 | from HTMLParser import HTMLParser
  8 | 
  9 | ### Uncomment to debug
 10 | #import logging
 11 | #logging.basicConfig(level=logging.DEBUG)
 12 | 
 13 | ### Set root directory to script directory
 14 | abspath = os.path.abspath(__file__)
 15 | dname = os.path.dirname(abspath)
 16 | os.chdir(dname)
 17 | 
 18 | ###Load data
 19 | def load_data():
 20 |   global banned_users
 21 |   global badsubs
 22 |   global root_only_subs
 23 |   global summon_only_subs
 24 |   global imgur_client_id
 25 |   global banned_users_page
 26 |   global badsubs_page
 27 |   global root_only_subs_page
 28 |   global summon_only_subs_page
 29 |   imgur_client_id = datafile_lines[2].strip()
 30 |   banned_users_page = r.get_wiki_page('autowikibot','userblacklist')
 31 |   badsubs_page = r.get_wiki_page('autowikibot','excludedsubs')
 32 |   root_only_subs_page = r.get_wiki_page('autowikibot','rootonlysubs')
 33 |   summon_only_subs_page = r.get_wiki_page('autowikibot','summononlysubs')
 34 |   try:
 35 |     banned_users = banned_users_page.content_md.strip().split()
 36 |     badsubs = badsubs_page.content_md.strip().split()
 37 |     root_only_subs = root_only_subs_page.content_md.strip().split()
 38 |     summon_only_subs = summon_only_subs_page.content_md.strip().split()
 39 |     success("DATA LOADED")
 40 |   except Exception as e:
 41 |     #traceback.print_exc()
 42 |     fail("DATA LOAD FAILED: %s"%e)
 43 |     exit()
 44 | 
 45 | def save_changing_variables(editsummary):
 46 |   ##Save badsubs
 47 |   global badsubs
 48 |   badsubs = list(set(badsubs))
 49 |   badsubs.sort(reverse=True)
 50 |   c_badsubs = ""
 51 |   for item in badsubs:
 52 |     c_badsubs = "    "+item+'\n'+c_badsubs
 53 |   r.edit_wiki_page('autowikibot','excludedsubs',c_badsubs,editsummary)
 54 |   ##Save root_only_subs
 55 |   global root_only_subs
 56 |   root_only_subs = list(set(root_only_subs))
 57 |   root_only_subs.sort(reverse=True)
 58 |   c_root_only_subs = ""
 59 |   for item in root_only_subs:
 60 |     c_root_only_subs = "    "+item+'\n'+c_root_only_subs
 61 |   r.edit_wiki_page('autowikibot','rootonlysubs',c_root_only_subs,editsummary)
 62 |   ##Save summon_only_subs
 63 |   global summon_only_subs
 64 |   summon_only_subs = list(set(summon_only_subs))
 65 |   summon_only_subs.sort(reverse=True)
 66 |   c_summon_only_subs = ""
 67 |   for item in summon_only_subs:
 68 |     c_summon_only_subs = "    "+item+'\n'+c_summon_only_subs
 69 |   r.edit_wiki_page('autowikibot','summononlysubs',c_summon_only_subs,editsummary)
 70 |   
 71 |   
 72 |   success("DATA SAVED")
 73 | 
 74 | with open ('datafile.inf', 'r') as myfile:
 75 |   datafile_lines=myfile.readlines()
 76 | 
 77 | ### Login
 78 | r = praw.Reddit("autowikibot by /u/acini at /r/autowikibot")
 79 | USERNAME = datafile_lines[0].strip()
 80 | PASSWORD = datafile_lines[1].strip()
 81 | Trying = True
 82 | while Trying:
 83 | 	try:
 84 | 		r.login(USERNAME, PASSWORD)
 85 | 		success("LOGGED IN")
 86 | 		Trying = False
 87 | 	except praw.errors.InvalidUserPass:
 88 | 		fail("WRONG USERNAME OR PASSWORD")
 89 | 		exit()
 90 | 	except Exception as e:
 91 | 	  fail("%s"%e)
 92 | 	  time.sleep(5)
 93 |     
 94 | def is_summon_chain(post):
 95 |   if not post.is_root:
 96 |     parent_comment_id = post.parent_id
 97 |     parent_comment = r.get_info(thing_id=parent_comment_id)
 98 |     if parent_comment.author != None and str(parent_comment.author.name) == 'autowikibot':
 99 |       return True
100 |     else:
101 |       return False
102 |   else:
103 |     return False
104 |   
105 | def comment_limit_reached(post):
106 |   global submissioncount
107 |   count_of_this = int(float(submissioncount[str(post.submission.id)]))
108 |   if count_of_this > 4 and not (str(post.subreddit) == 'autowikibotdelreq' or str(post.subreddit) == 'autowikibot' or str(post.subreddit) == 'todayilearned'):
109 |     return True
110 |   else:
111 |     return False
112 |   
113 | def is_already_done(post):
114 |   done = False
115 |   numofr = 0
116 |   try:
117 |     repliesarray = post.replies
118 |     numofr = len(list(repliesarray))
119 |   except:
120 |     pass
121 |   if numofr != 0:
122 |     for repl in post.replies:
123 |       if repl.author != None and (repl.author.name == 'autowikibot' or repl.author.name == 'Text_Reader_Bot'):
124 | 	warn("%s IS ALREADY DONE"%post.id)
125 | 	done = True
126 | 	continue
127 |   if done:
128 |     return True
129 |   else:
130 |     return False
131 | 
132 | def post_reply(reply,post):
133 |   global badsubs
134 |   global submissioncount
135 |   global totalposted
136 |   try:
137 |     reply = "#####&#009;\n\n######&#009;\n\n####&#009;\n"+reply+"^Parent ^commenter ^can [^toggle ^NSFW](/message/compose?to=autowikibot&subject=AutoWikibot NSFW toggle&message=%2Btoggle-nsfw+____id____) ^or[](#or) [^delete](/message/compose?to=autowikibot&subject=AutoWikibot Deletion&message=%2Bdelete+____id____)^. ^Will ^also ^delete ^on ^comment ^score ^of ^-1 ^or ^less. ^| [^(FAQs)](http://www.np.reddit.com/r/autowikibot/wiki/index) ^| [^Mods](http://www.np.reddit.com/r/autowikibot/comments/1x013o/for_moderators_switches_commands_and_css/) ^| [^Magic ^Words](http://www.np.reddit.com/r/autowikibot/comments/1ux484/ask_wikibot/)"
138 |     a = post.reply('[#placeholder-awb]Comment is being processed... It will be automatically replaced by new text within a minute or will be deleted if that fails.')
139 |     postsuccess = r.get_info(thing_id='t1_'+str(a.id)).edit(reply.replace('____id____',str(a.id)))
140 |     if not postsuccess:
141 |       raise Exception ('reply unsuccessful')
142 |     totalposted = totalposted + 1
143 |     submissioncount[str(post.submission.id)]+=1
144 |     success("[OK] #%s "%totalposted)
145 |     return True
146 |   except Exception as e:
147 |     warn("REPLY FAILED: %s @ %s"%(e,post.subreddit))
148 |     if str(e).find('TOO_LONG') > -1:
149 |       a.delete()
150 |     elif str(e) == '403 Client Error: Forbidden' and str(post.subreddit) not in badsubs:
151 |       badsubs = badsubs_page.content_md.strip().split()
152 |       badsubs.append(str(post.subreddit))
153 |       editsummary = 'added '+str(post.subreddit)
154 |       save_changing_variables(editsummary)
155 |     else:
156 |       fail(e)
157 |       a.delete()
158 |     return False
159 | 	    
160 | def filterpass(post):
161 |   global summary_call
162 |   global has_link
163 |   global mod_switch
164 |   global badsubs
165 |   global r
166 |   if (post.author.name == USERNAME) or post.author.name in banned_users:
167 |     return False
168 |   summary_call = re.search(r'wikibot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',post.body.lower()) or re.search(r'wikibot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',post.body.lower()) or re.search("\?\-.*\-\?",post.body.lower())
169 |   has_link = any(string in post.body for string in ['en.wikipedia.org/wiki/', 'en.m.wikipedia.org/wiki/'])
170 |   mod_switch = re.search(r'wikibot moderator switch: summon only: on',post.body.lower()) or re.search(r'wikibot moderator switch: summon only: off',post.body.lower()) or re.search(r'wikibot moderator switch: root only: on',post.body.lower()) or re.search(r'wikibot moderator switch: root only: off',post.body.lower())
171 |   if has_link or summary_call or mod_switch:
172 |     if re.search(r"&gt;", post.body) and not summary_call and not re.search(r"autowikibot-welcome-token", post.body.lower()):
173 |       return False
174 |     elif re.search(r"wikipedia.org/wiki/.*wikipedia.org/wiki/", post.body, re.DOTALL):
175 |       return False
176 |     elif str(post.subreddit) in badsubs and not mod_switch:
177 |       return False
178 |     elif any(string in post.body for string in ['/wiki/File:', '/wiki/List_of', '/wiki/User:', '/wiki/Template:', '/wiki/Category:', '/wiki/Wikipedia:', '/wiki/Talk:']):
179 |       return False
180 |     elif str(post.subreddit) in root_only_subs and not post.is_root and not mod_switch:
181 |       return False
182 |     elif str(post.subreddit) in summon_only_subs and not summary_call and not mod_switch:
183 |       return False
184 |     if is_summon_chain(post):
185 |       warn('SKIPPED CHAINED REPLY')
186 |       return False
187 |     elif is_already_done(post):
188 |       return False
189 |     elif comment_limit_reached(post):
190 |       try:
191 | 	title = "COMMENT LIMIT " + "/r/"+str(post.subreddit)
192 | 	suburl = str(post.submission.short_link)
193 | 	r.submit('acini',title,url=suburl)
194 |       except:
195 | 	pass
196 |       return False
197 |     else:
198 |       return True
199 | 
200 | def get_url_string(post):
201 |   try:
202 |     after_split = post.body.split("wikipedia.org/wiki/")[1]
203 |     for e in ['\n', ' ']:
204 |       after_split = after_split.split(e)[0]
205 |     if after_split.endswith(')') and not re.search(r'\(',after_split):
206 |       after_split = after_split.split(')')[0]
207 |     if re.search(r'\)',after_split) and not re.search(r'\(',after_split):
208 |       after_split = after_split.split(')')[0]
209 |     return after_split
210 |   except:
211 |     pass
212 |    
213 | def process_summary_call(post):
214 |   #special("__________________________________________________")
215 |   #special("SUMMARY CALL: %s"%post.id)
216 |   replacedbody = post.body.lower().replace('wikibot','___uawb___wikibot')
217 |   if re.search(r'wikibot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',replacedbody):
218 |     post_body = re.sub(r'wikibot.\s*tell\s.{1,23}\sabout\s+(an\s+|a\s+|the\s+|)(.*?)$',r'\2',replacedbody).split('___uawb___')[1].split('.')[0].split('?')[0]
219 |     term = post_body.strip()
220 |   elif re.search(r'wikibot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',replacedbody):
221 |     post_body = re.sub(r'wikibot.\s*wh.{1,3}(\'s|\s+is|\s+are|\s+was)\s+(an\s+|a\s+|the\s+|)(.*?)$',r'\3',replacedbody).split('___uawb___')[1].split('.')[0].split('?')[0]
222 |     term = post_body.strip()
223 |   elif re.search("\?\-.*\-\?",replacedbody):
224 |     term = re.search("\?\-.*\-\?",post.body.lower()).group(0).strip('?').strip('-').strip()
225 |     
226 |   special("SUMMARY CALL: %s @ %s"%(filter(lambda x: x in string.printable, term),post.id))
227 |   if term.lower().strip() == 'love':
228 |     #post_reply('*Baby don\'t hurt me! Now seriously, stop asking me about love so many times! O.o What were we discussing about in this thread again?*',post)
229 |     return(False,False)
230 |   #if term.lower().strip() == 'wikibot':
231 |     #post_reply('*Me! I know me.*',post)
232 |     return(False,False)
233 |   if term.lower().strip() == 'reddit':
234 |     #post_reply('*This place. It feels like home.*',post)
235 |     return(False,False)
236 |   if term.strip().__len__() < 2 or term == None:
237 |     #log("EMPTY TERM")
238 |     return(False,False)
239 |   try:
240 |     title = wikipedia.page(term,auto_suggest=False).title
241 |     if title.lower() == term:
242 |       bit_comment_start = ""
243 |     elif title.lower() != term:
244 |       try:
245 | 	discard = wikipedia.page(term,auto_suggest=False,redirect=False).title
246 |       except Exception as e:
247 | 	if re.search('resulted in a redirect',str(e)):
248 | 	  bit_comment_start = "*\"" + term.strip() + "\" redirects to* "
249 |     else:
250 |       bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* "
251 |     if re.search(r'#',title):
252 |       url = wikipedia.page(title.split('#')[0],auto_suggest=False).url
253 |       sectionurl =  url + "#" + title.split('#')[1]
254 |       comment = "*Nearest match for* ***" + term.strip() + "*** *is the section ["+title.split('#')[1]+"]("+sectionurl.replace(')','\)')+") in article ["+title.split('#')[0]+"]("+url+").*\n\n---\n\n"
255 |       post_reply(comment,post)
256 |       #log("RELEVANT SECTION SUGGESTED: %s"%filter(lambda x: x in string.printable, title))
257 |       return (False,False)
258 |     url_string = title
259 |     #log("INTERPRETATION: %s"%filter(lambda x: x in string.printable, title))
260 |     return (url_string,bit_comment_start)
261 |   except Exception as e:
262 |     if bool(re.search('.*may refer to:.*',filter(lambda x: x in string.printable, str(e)))):
263 |       deflist = ">Definitions for few of those terms:"
264 |       for idx, val in enumerate(filter(lambda x: x in string.printable, str(e)).split('may refer to: \n')[1].split('\n')):
265 | 	deflist = deflist + "\n\n>1. **"+val.strip()+"**: "+ wikipedia.summary(val,auto_suggest=False,sentences=1)
266 | 	if idx > 3:
267 | 	  break
268 |       summary = "*Oops,* ***"+term.strip()+"*** *landed me on a disambiguation page.*\n\n---\n\n"+deflist+"\n\n---\n\n"
269 |       #log("ASKING FOR DISAMBIGUATION")
270 |     else:
271 |       #log("INTERPRETATION FAIL: %s"%filter(lambda x: x in string.printable, term))
272 |       try:
273 | 	terms = "\""+term+"\""
274 | 	suggesttitle = str(wikipedia.search(terms,results=1)[0])
275 | 	#log("SUGGESTING: %s"%filter(lambda x: x in string.printable, suggesttitle))
276 | 	if suggesttitle.lower() == term:
277 | 	  bit_comment_start = ""
278 | 	else:
279 | 	  bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* "
280 | 	if str(suggesttitle).endswith(')') and not re.search('\(',str(suggesttitle)):
281 | 	  suggesttitle = suggesttitle[0:--(suggesttitle.__len__()-1)]
282 | 	return (str(suggesttitle),bit_comment_start)
283 |       except:
284 | 	trialtitle = wikipedia.page(term,auto_suggest=True).title
285 | 	if trialtitle.lower() == term:
286 | 	  bit_comment_start = ""
287 | 	else:
288 | 	  bit_comment_start = "*Nearest match for* ***" + term.strip() + "*** *is* "
289 | 	#log("TRIAL SUGGESTION: %s"%filter(lambda x: x in string.printable, trialtitle))  
290 | 	if str(trialtitle).endswith(')') and not re.search('\(',str(trialtitle)):
291 | 	  trialtitle = trialtitle[0:--(trialtitle.__len__()-1)]
292 | 	return (str(trialtitle),bit_comment_start)
293 |     post_reply(summary,post)
294 |     return (False,False)
295 | 
296 | def clean_soup(soup):
297 |   while soup.table:
298 |     discard = soup.table.extract()
299 |   while soup.find(id='coordinates'):
300 |     discard = soup.find(id='coordinates').extract()
301 |   while soup.find("strong", { "class" : "error mw-ext-cite-error" }):
302 |     discard = soup.find("strong", { "class" : "error mw-ext-cite-error" }).extract()
303 |   while soup.find("sup", { "class" : "reference" }):
304 |     discard = soup.find("sup", { "class" : "reference" }).extract()
305 |   while soup.find("span", { "class" : "t_nihongo_help noprint" }):
306 |     discard = soup.find("span", { "class" : "t_nihongo_help noprint" }).extract()
307 |   while soup.find("span", { "class" : "sortkey" }):
308 |     discard = soup.find("span", { "class" : "sortkey" }).extract()
309 |   
310 |   for tag in soup:
311 |     if tag.name == 'a' and tag.has_attr('href'):
312 |       rep = "["+tag.text+"]("+tag['href']+")"
313 |       discard = tag.replace_with(rep)
314 |   return soup
315 | 
316 | def reddify(html):
317 |   global has_list
318 |   if re.search('&lt;li&gt;',html):
319 |     has_list = True
320 |   else:
321 |     has_list = False
322 |   html = html.replace('&lt;b&gt;', '__')
323 |   html = html.replace('&lt;/b&gt;', '__')
324 |   html = html.replace('&lt;i&gt;', '*')
325 |   html = html.replace('&lt;/i&gt;', '*')
326 |   if '__*' in html and '*__' in html:
327 |     html = html.replace('__*', '___')
328 |     html = html.replace('*__', '___')
329 |   html = re.sub('&lt;sup&gt;','^',html)
330 |   html = re.sub('&lt;sup.*?&gt;',' ',html)
331 |   html = html.replace('&lt;/sup&gt;','')
332 |   html = html.replace('&lt;dt&gt;','&lt;p&gt;')
333 |   html = html.replace('&lt;/dt&gt;','&lt;/p&gt;')
334 |   html = html.replace('&lt;ul&gt;','&lt;p&gt;')
335 |   html = html.replace('&lt;/ul&gt;','&lt;/p&gt;')
336 |   html = html.replace('&lt;ol&gt;','&lt;p&gt;')
337 |   html = html.replace('&lt;/ol&gt;','&lt;/p&gt;')
338 |   html = html.replace('&lt;dd&gt;','&lt;p&gt;>')
339 |   html = html.replace('&lt;/dd&gt;','&lt;/p&gt; ')
340 |   html = html.replace('&lt;li&gt;','&lt;p&gt;* ')
341 |   html = html.replace('&lt;/li&gt;','&lt;/p&gt;')
342 |   html = html.replace('&lt;blockquote&gt;','&lt;p&gt;>')
343 |   html = html.replace('&lt;/blockquote&gt;','&lt;/p&gt; ')
344 |   return html
345 | 
346 | def strip_wiki(wiki):
347 |   wiki = re.sub('\[[0-9]\][^(]','',wiki)
348 |   wiki = re.sub('\[[0-9][0-9]\][^(]','',wiki)
349 |   wiki = re.sub('\[[0-9][0-9][0-9]\][^(]','',wiki)
350 |   wiki = re.sub("\( listen\)", '', wiki)
351 |   return wiki
352 | 
353 | def truncate(data, length):
354 |   if data.__len__() > length:
355 |     log("TEXT CUT AT %s CHARACTERS"%length)
356 |     data = data[0:length]+" ... \n`(Truncated at "+str(length)+" characters)`"
357 |     return data
358 |   else:
359 |     return data
360 |   
361 | def process_brackets_links(string):
362 |   string = ("%s)"%string)
363 |   string = string.replace("\\", "")
364 |   return string
365 | 
366 | def process_brackets_syntax(string):
367 |   string = string.replace("\\", "")
368 |   string = ("%s\)"%string)
369 |   return string
370 |   
371 | ### declare variables
372 | load_data()
373 | im = pyimgur.Imgur(imgur_client_id)
374 | global pagepropsdata
375 | submissioncount = collections.Counter()
376 | lastload = int(float(time.strftime("%s")))
377 | has_list = False
378 | totalposted = 0
379 | 
380 | while True:
381 |   try:
382 |     #comments = r.get_comments("all",limit = 1000)
383 |     #for post in comments:
384 |     for post in praw.helpers.comment_stream(r,str(sys.argv[1]), limit = None, verbosity=0):
385 |       
386 |       ### Dirty timer hack
387 |       now = int(float(time.strftime("%s")))
388 |       diff = now - lastload
389 |       if diff > 899:
390 | 	banned_users = banned_users_page.content_md.strip().split()
391 | 	bluelog("BANNED USER LIST RENEWED")
392 | 	save_changing_variables('scheduled dump')
393 | 	lastload = now
394 |       
395 |       if filterpass(post):
396 | 	if mod_switch:
397 | 	  try:
398 | 	    mod_switch_summon_on = re.search(r'wikibot moderator switch: summon only: on',post.body.lower())
399 | 	    mod_switch_summon_off = re.search(r'wikibot moderator switch: summon only: off',post.body.lower())
400 | 	    mod_switch_root_on = re.search(r'wikibot moderator switch: root only: on',post.body.lower())
401 | 	    mod_switch_root_off = re.search(r'wikibot moderator switch: root only: off',post.body.lower())
402 | 	    
403 | 	    mods = r.get_moderators(str(post.subreddit))
404 | 	    is_mod = False
405 | 	    for idx in range(0,len(mods)):
406 | 	      if mods[idx].name == post.author.name:
407 | 		is_mod = True
408 | 		break
409 | 	    if is_mod:
410 | 	      if mod_switch_summon_on:
411 | 		if str(post.subreddit) in summon_only_subs:
412 | 		  comment = "*Summon only feature is already* ***ON*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n"
413 | 		else:
414 | 		  summon_only_subs.append(str(post.subreddit))
415 | 		  if str(post.subreddit) in badsubs:
416 | 		    badsubs.remove(str(post.subreddit))
417 | 		  editsummary = 'added '+str(post.subreddit)+', reason:mod_switch_summon_on'
418 | 		  save_changing_variables(editsummary)
419 | 		  comment = "*Summon only feature switched* ***ON*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n"
420 | 	      elif mod_switch_summon_off:
421 | 		if str(post.subreddit) not in summon_only_subs:
422 | 		  comment = "*Summon only feature is already* ***OFF*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n"
423 | 		else:
424 | 		  badsubs = badsubs_page.content_md.strip().split()
425 | 		  summon_only_subs.remove(str(post.subreddit))
426 | 		  if str(post.subreddit) in badsubs:
427 | 		    badsubs.remove(str(post.subreddit))
428 | 		  editsummary = 'removed '+str(post.subreddit)+', reason:mod_switch_summon_off'
429 | 		  save_changing_variables(editsummary)
430 | 		  comment = "*Summon only feature switched* ***OFF*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n"
431 | 	      elif mod_switch_root_on:
432 | 		if str(post.subreddit) in root_only_subs:
433 | 		  comment = "*Root only feature is already* ***ON*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n"
434 | 		else:
435 | 		  root_only_subs.append(str(post.subreddit))
436 | 		  if str(post.subreddit) in badsubs:
437 | 		    badsubs.remove(str(post.subreddit))
438 | 		  editsummary = 'added '+str(post.subreddit)+', reason:mod_switch_root_on'
439 | 		  save_changing_variables(editsummary)
440 | 		  comment = "*Root only feature switched* ***ON*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n"
441 | 	      elif mod_switch_root_off:
442 | 		if str(post.subreddit) not in root_only_subs:
443 | 		  comment = "*Root only feature is already* ***OFF*** *in /r/"+str(post.subreddit)+"*\n\n---\n\n"
444 | 		else:
445 | 		  badsubs = badsubs_page.content_md.strip().split()
446 | 		  root_only_subs.remove(str(post.subreddit))
447 | 		  if str(post.subreddit) in badsubs:
448 | 		    badsubs.remove(str(post.subreddit))
449 | 		  editsummary = 'removed '+str(post.subreddit)+', reason:mod_switch_root_off'
450 | 		  save_changing_variables(editsummary)
451 | 		  comment = "*Root only feature switched* ***OFF*** *for /r/"+str(post.subreddit)+"*\n\n---\n\n"
452 | 	      else:
453 | 		comment = False
454 | 	      
455 | 	      if comment:
456 | 		a = post_reply(comment,post)
457 | 		title = "MODSWITCH: %s"%str(post.subreddit)
458 | 		subtext = "/u/"+str(post.author.name)+": @ [comment]("+post.permalink+")\n\n"+str(post.body)+"\n\n---\n\n"+comment
459 | 		r.submit('acini',title,text=subtext)
460 | 	      if a:
461 | 		special("MODSWITCH: %s @ %s"%(comment.replace('*',''),post.id))
462 | 	      else:
463 | 		fail("MODSWITCH REPLY FAILED: %s @ %s"%(comment,post.id))
464 | 		title = "MODSWITCH REPLY FAILED: %s"%str(post.subreddit)
465 | 		subtext = "/u/"+str(post.author.name)+": @ [comment]("+post.permalink+")\n\n"+str(post.body)+"\n\n---\n\n"+comment
466 | 		r.submit('acini',title,text=subtext)
467 | 	    else:
468 | 	      if post.subreddit not in badsubs:
469 | 		comment = "*Moderator switches can only be switched ON and OFF by moderators of this subreddit.*\n\n*If you want specific feature turned ON or OFF, [ask the moderators](/message/compose?to=%2Fr%2F"+str(post.subreddit)+") and provide them with [this link](http://www.np.reddit.com/r/autowikibot/wiki/modfaqs).*\n\n---\n\n"
470 | 		post_reply(comment,post)
471 | 	  except Exception as e:
472 | 	    title = "MODSWITCH FAILURE !!: %s"%str(post.subreddit)
473 | 	    traceback.print_exc()
474 | 	    subtext = "/u/"+str(post.author.name)+": @ [comment]("+post.permalink+")\n\n"+str(post.body)+"\n\n---\n\n"+str(e)
475 | 	    r.submit('acini',title,text=subtext)
476 | 	  continue
477 | 	elif has_link:
478 | 	  url_string = get_url_string(post)
479 | 	  #log("__________________________________________________")
480 | 	  #log("LINK TRIGGER: %s"%post.id)
481 | 	  bit_comment_start = ""
482 | 	else:
483 | 	  try:
484 | 	    url_string = ""
485 | 	    url_string, bit_comment_start = process_summary_call(post)
486 | 	    if url_string == False:
487 | 	      continue
488 | 	    url_string = str(url_string)
489 | 	  except Exception as e:
490 | 	    if bool(re.search('.*may refer to:.*',filter(lambda x: x in string.printable, str(e)))):
491 | 	      deflist = ">Definitions for few of those terms:"
492 | 	      for idx, val in enumerate(filter(lambda x: x in string.printable, str(e)).split('may refer to: \n')[1].split('\n')):
493 | 		deflist = deflist + "\n\n>1. **"+val.strip()+"**: "+ wikipedia.summary(val,auto_suggest=False,sentences=1)
494 | 		if idx > 3:
495 | 		  break
496 | 	      summary = "*Oops,* ***"+url_string.strip()+"*** *landed me on a disambiguation page.*\n\n---\n\n"+deflist+"\n\n---\n\n"
497 | 	      #log("ASKING FOR DISAMBIGUATION")
498 | 	      post_reply(summary,post)
499 | 	      continue
500 | 	if not url_string:
501 | 	  continue
502 | 	
503 | 	article_name_terminal = None
504 | 	
505 | 	is_section = False
506 | 	### check for subheading in url string, process if present
507 | 	if re.search(r"#",url_string) and not summary_call:
508 | 	  pagenameraw = url_string.split('#')[0]
509 | 	  pagename = pagenameraw.replace(')','\)')
510 | 	  pagename = pagename.replace('(','\(')
511 | 	  pagename = pagename.strip().replace('.','%')
512 | 	  pagename = urllib.unquote(str(pagename))
513 | 	  sectionnameraw = url_string.split('#')[1]
514 | 	  sectionname = sectionnameraw.replace('(','\(')
515 | 	  sectionname = sectionname.replace(')','\)')
516 | 	  sectionname = sectionname.strip().replace('.','%')
517 | 	  sectionname = urllib.unquote(str(sectionname))
518 | 	  try:
519 | 	    url = ("https://en.wikipedia.org/w/api.php?action=parse&page="+pagename.encode('utf-8','ignore')+"&format=xml&prop=sections")
520 | 	    socket.setdefaulttimeout(30)
521 | 	    slsoup = BeautifulSoup(urllib2.urlopen(url).read())
522 | 	    if slsoup.find_all('s').__len__() == 0:
523 | 	      raise Exception("no sections found")
524 | 	    for s in slsoup.find_all('s'):
525 | 	      if s['anchor'] == sectionnameraw:
526 | 		section = str(s['index'])
527 | 		bit_comment_start = "Section "+section+". [**"+sectionname.decode('utf-8','ignore').replace('_',' ')+"**](https://en.wikipedia.org/wiki/"+url_string+") of article "
528 | 		url_string = pagenameraw
529 | 		url = ("https://en.wikipedia.org/w/api.php?action=parse&page="+pagename.encode('utf-8','ignore')+"&format=xml&prop=images&section="+section)
530 | 		sisoup = BeautifulSoup(urllib2.urlopen(url).read())
531 | 		try:
532 | 		  page_image = sisoup.img.text
533 | 		except:
534 | 		  page_image = ""
535 | 		pic_markdown = "Image from section"
536 | 		
537 | 		while url_string.endswith('))'):
538 | 		  url_string = url_string.replace('))',')')
539 | 	  
540 | 		url_string_for_fetch = url_string.replace('_', '%20').replace("\\", "")
541 | 		url_string_for_fetch = url_string_for_fetch.replace(' ', '%20').replace("\\", "")
542 | 		article_name = url_string.replace('_', ' ')
543 | 		article_name_terminal = article_name.decode('utf-8','ignore')
544 | 		### In case user comments like "/wiki/Article.", remove last 1 letter
545 | 		if url_string_for_fetch.endswith(".") or url_string_for_fetch.endswith("]"):
546 | 		  url_string_for_fetch = url_string_for_fetch[0:--(url_string_for_fetch.__len__()-1)]
547 | 		is_section = True
548 | 		break
549 | 	  except Exception as e:
550 | 	    #traceback.print_exc()
551 | 	    fail(e)
552 | 	    continue
553 | 	  
554 | 	  if article_name_terminal == None and not summary_call:
555 | 	    #log("MALFORMATTED LINK")
556 | 	    #notify = '*Hey '+post.author.name+', that Wikipedia link is probably malformatted.*\n\n---\n\n'
557 | 	    #post_reply(notify,post)
558 | 	    continue
559 | 	  log("ARTICLE: %s / SECTION #%s @ %s"%(filter(lambda x: x in string.printable, article_name_terminal),section,post.id))
560 | 	else:
561 | 	  section = 0
562 | 	  pic_markdown = "Image"
563 | 	  while url_string.endswith('))'):
564 | 	    url_string = url_string.replace('))',')')
565 | 	    
566 | 	  url_string_for_fetch = url_string.replace('_', '%20').replace("\\", "")
567 | 	  url_string_for_fetch = url_string_for_fetch.replace(' ', '%20').replace("\\", "")
568 | 	  article_name = url_string.replace('_', ' ')
569 | 	  while url_string_for_fetch.endswith('))'):
570 | 	    url_string_for_fetch = url_string_for_fetch.replace('))',')')
571 | 	  
572 | 	  
573 | 	  ### In case user comments like "/wiki/Article.", remove last 1 letter
574 | 	  if url_string_for_fetch.endswith(".") or url_string_for_fetch.endswith("]"):
575 | 	    url_string_for_fetch = url_string_for_fetch[0:--(url_string_for_fetch.__len__()-1)]
576 | 	  url = ("https://en.wikipedia.org/w/api.php?action=query&titles="+url_string_for_fetch+"&prop=pageprops&format=xml")
577 | 	  try:
578 | 	    socket.setdefaulttimeout(30)
579 | 	    pagepropsdata = urllib2.urlopen(url).read()
580 | 	    pagepropsdata = pagepropsdata.decode('utf-8','ignore')
581 | 	    ppsoup = BeautifulSoup(pagepropsdata)
582 | 	    article_name_terminal = ppsoup.page['title']
583 | 	  except:
584 | 	    try:
585 | 	      article_name_terminal = article_name.replace('\\', '')
586 | 	    except:
587 | 	      article_name_terminal = article_name.replace('\\', '').decode('utf-8','ignore')
588 | 	      
589 | 	  article_name_terminal = urllib.unquote(article_name_terminal)
590 | 	  while article_name_terminal.endswith('))'):
591 | 	    article_name_terminal = article_name_terminal.replace('))',')')
592 | 	  log("ARTICLE: %s @ %s"%(filter(lambda x: x in string.printable, article_name_terminal),post.id))
593 |       
594 | 	  try:
595 | 	    page_image = ppsoup.pageprops["page_image"]
596 | 	  except:
597 | 	    page_image = ""
598 | 	  
599 | 	  if article_name_terminal == None and not summary_call:
600 | 	    #log("MALFORMATTED LINK")
601 | 	    #notify = '*Hey '+post.author.name+', that Wikipedia link is probably malformatted.*'
602 | 	    #post_reply(notify,post)
603 | 	    continue
604 | 	  
605 | 	
606 | 	### fetch data from wikipedia
607 | 	url = ("https://en.wikipedia.org/w/api.php?action=parse&page="+url_string_for_fetch+"&format=xml&prop=text&section="+str(section)+"&redirects")
608 | 	try:
609 | 	  socket.setdefaulttimeout(30)
610 | 	  sectiondata = urllib2.urlopen(url).read()
611 | 	  sectiondata = sectiondata.decode('utf-8','ignore')
612 | 	  sectiondata = reddify(sectiondata)
613 | 	  soup = BeautifulSoup(sectiondata)
614 | 	  soup = BeautifulSoup(soup.text)
615 | 	  sectionnsoup = soup
616 | 	except Exception as e:
617 | 	  #fail("FETCH: %s"%e)
618 | 	  continue
619 | 	
620 | 	soup = clean_soup(soup)
621 | 	
622 | 	### extract paragraph
623 | 	try:
624 | 	  if soup.p.text.__len__() < 500:
625 | 	    all_p = soup.find_all('p')
626 | 	    wt = ""
627 | 	    for idx, val in enumerate(all_p):
628 | 	      s = all_p[idx]
629 | 	      for tag in s:
630 | 		if tag.name == 'a' and tag.has_attr('href'):
631 | 		  urlstart = ""
632 | 		  if re.search('#cite',tag['href']):
633 | 		    tag.replace_with('')
634 | 		    continue
635 | 		  elif re.search('/wiki/',tag['href']):
636 | 		    urlstart = "https://en.wikipedia.org"
637 | 		  elif re.search('#',tag['href']):
638 | 		    tag.unwrap()
639 | 		    continue
640 | 		  elif not re.search(r'^http://',tag['href']):
641 | 		    tag.replace_with(tag.text)
642 | 		    continue
643 | 		  rep = "["+tag.text+"]("+urlstart+tag['href'].replace(')','\)')+")"
644 | 		  discard = tag.replace_with(rep)
645 | 	      wt = (wt+"\n\n>"+s.text)                                      # Post 3 paragraphs
646 | 	      data = wt
647 | 	      if has_list:
648 | 		para = 100
649 | 	      else:
650 | 		para = 1
651 | 	      if idx > para:
652 | 		break
653 | 	  else:
654 | 	    s = soup.p
655 | 	    for tag in s:
656 | 	      if tag.name == 'a' and tag.has_attr('href'):
657 | 		urlstart = ""
658 | 		if re.search('#cite',tag['href']):
659 | 		  tag.replace_with('')
660 | 		  continue
661 | 		elif re.search('/wiki/',tag['href']):
662 | 		  urlstart = "https://en.wikipedia.org"
663 | 		elif re.search('#',tag['href']):
664 | 		  tag.unwrap()
665 | 		  continue
666 | 		elif not re.search(r'^http://',tag['href']):
667 | 		  tag.replace_with(tag.text)
668 | 		  continue
669 | 		rep = "["+tag.text+"]("+urlstart+tag['href'].replace(')','\)')+")"
670 | 		discard = tag.replace_with(rep)
671 | 	    data = s.text                             #Post only first paragraph
672 | 	except Exception as e:
673 | 	  #fail("TEXT PACKAGE FAIL: %s"%e)
674 | 	  if summary_call:
675 | 	    try:
676 | 	      term = url_string
677 | 	      tell_me_text = wikipedia.summary(term,auto_suggest=False,redirect=True)
678 | 	      tell_me_link = wikipedia.page(term,auto_suggest=False).url
679 | 	      title = wikipedia.page(term,auto_suggest=False).title
680 | 	      if bool(re.search(title,tell_me_text)):
681 | 		summary = re.sub(title,"[**"+title+"**]("+tell_me_link+")",tell_me_text)
682 | 	      else:
683 | 		summary = "[**"+title+"**](" + tell_me_link + "): " + tell_me_text 
684 | 	      #log("INTERPRETATION: %s"%filter(lambda x: x in string.printable, title))
685 | 	      if re.search(r'#',title):
686 | 		summary = wikipedia.page(title.split('#')[0]).section(title.split('#')[1])
687 | 		if summary == None or str(filter(lambda x: x in string.printable, summary)).strip() == "":
688 | 		  page_url = wikipedia.page(title.split('#')[0]).url
689 | 		  summary = "Sorry, I failed to fetch the section, but here's the link: "+page_url+"#"+title.split('#')[1]
690 | 	      if re.search(r'(',page_url):
691 | 		page_url = process_brackets_links(page_url)
692 | 	      comment = "*Here you go:*\n\n---\n\n>\n"+summary+"\n\n---\n\n"
693 | 	      post_reply(comment,post)
694 | 	      continue
695 | 	    except Exception as e:
696 | 	      if bool(re.search('.*may refer to:.*',filter(lambda x: x in string.printable, str(e)))):
697 | 		deflist = ">Definitions for few of those terms:"
698 | 		for idx, val in enumerate(filter(lambda x: x in string.printable, str(e)).split('may refer to: \n')[1].split('\n')):
699 | 		  deflist = deflist + "\n\n>1. **"+val.strip()+"**: "+ wikipedia.summary(val,auto_suggest=False,sentences=1)
700 | 		  if idx > 3:
701 | 		    break
702 | 		#comment = "*Oops,* ***"+process_brackets_syntax(url_string).strip()+"*** *landed me on a disambiguation page.*\n\n---"+deflist+"\n\n---\n\nAnd the remaining list:\n\n"+str(e).replace('\n','\n\n>')+"\n\n---\n\n"
703 | 		summary = "*Oops,* ***"+process_brackets_syntax(url_string).strip()+"*** *landed me on a disambiguation page.*\n\n---\n\n"+deflist+"\n\n---\n\n"
704 | 		#log("ASKING FOR DISAMBIGUATION")
705 | 	      else:
706 | 		#log("INTERPRETATION FAIL: %s"%term)
707 | 		try:
708 | 		  terms = "\""+term+"\""
709 | 		  suggest = wikipedia.search(terms,results=1)[0]
710 | 		  trialsummary = wikipedia.summary(suggest,auto_suggest=True)
711 | 		  comment = "*Nearest match for* ***"+term.trim()+"*** *is* ***"+suggest+"*** :\n\n---\n\n>"+trialsummary+"\n\n---\n\n"
712 | 		  #log("SUGGESTING %s"%suggest)
713 | 		except:
714 | 		  comment = "*Sorry, couldn't find a wikipedia article about that or maybe I couldn't process that due to Wikipedia server errors.*\n\n---\n\n"
715 | 		  #log("COULD NOT SUGGEST FOR %s"%term)
716 | 		post_reply(comment,post)
717 | 		continue
718 | 	  continue
719 | 	data = strip_wiki(data)
720 | 	data = re.sub("Cite error: There are ref tags on this page, but the references will not show without a \{\{reflist\}\} template \(see the help page\)\.", '', data)
721 | 	#truncateddata = truncate(data,1000)
722 | 	if data.__len__() < 50:
723 | 	  #log("TOO SMALL INTRODUCTION PARAGRAPH")
724 | 	  continue
725 | 	#success("TEXT PACKAGED")
726 | 	
727 | 	### Fetch page image from wikipedia
728 | 	try:
729 | 	  ### Extract image url
730 | 	  try:
731 | 	    page_image = urllib.unquote(page_image.decode('utf-8','ignore'))
732 | 	  except:
733 | 	    raise Exception("no page image")
734 | 	  if page_image.endswith("ogg") or page_image == "":
735 | 	    raise Exception("no image")
736 | 	  url = ("https://en.wikipedia.org/w/api.php?action=query&titles=File:"+page_image+"&prop=imageinfo&iiprop=url|mediatype&iiurlwidth=640&format=xml")
737 | 	  socket.setdefaulttimeout(30)
738 | 	  wi_api_data = urllib2.urlopen(url).read()
739 | 	  wisoup = BeautifulSoup(wi_api_data)
740 | 	  image_url = wisoup.ii['thumburl']
741 | 	  image_source_url = wisoup.ii['descriptionurl']
742 | 	  image_source_url = re.sub(r'\)','\)',image_source_url)
743 | 	  image_source_url = re.sub(r'\(','\(',image_source_url)
744 | 	  global image_source_markdown
745 | 	  image_source_markdown = ("[^(i)]("+image_source_url+")")
746 | 	  
747 | 	  ### Upload to imgur
748 | 	  uploaded_image = im.upload_image(url=image_url, title=page_image)
749 | 	  
750 | 	  ### Extract caption from already fetched sectiondata
751 | 	  try:
752 | 	    caption_div = sectionnsoup.find("div", { "class" : "thumbcaption" })
753 | 	    if caption_div is None:
754 | 	      raise Exception("caption not packaged: no caption found in section 0")
755 | 	    if page_image not in str(caption_div.find("div", { "class" : "magnify" })):
756 | 	      raise Exception("caption not packaged: page image not in section 0")
757 | 	    discard = caption_div.find("div", { "class" : "magnify" }).extract()
758 | 	    caption = caption_div.text.strip()
759 | 	    caption = strip_wiki(caption)
760 | 	    caption = re.sub(r'\)','\)',caption)
761 | 	    caption = re.sub(r'\(','\(',caption)
762 | 	    caption = re.sub(r'\*','',caption)
763 | 	    caption = re.sub(r'\n',' ',caption)
764 | 	    if caption != "":
765 | 	      caption_markdown = (" - *"+caption+"*")
766 | 	      caption_div = None
767 | 	      #success("CAPTION PACKAGED")
768 | 	    else:
769 | 	      raise Exception("caption not packaged: no caption found in section 0")
770 | 	  except Exception as e:
771 | 	    if str(e) == "caption not packaged: page image has no caption":
772 | 	      pic_markdown = "Image"
773 | 	    elif str(e) == "caption not packaged: page image not in section 0":
774 | 	      pic_markdown = "Image from article"
775 | 	    caption_markdown = ""
776 | 	    #log(e)
777 | 	  image_markdown = ("====\n\n>[**"+pic_markdown+"**]("+uploaded_image.link.replace('http://','https://')+") "+image_source_markdown+caption_markdown)
778 | 	  #success("IMAGE PACKAGED VIA %s"%uploaded_image.link)
779 | 	except Exception as e:
780 | 	  image_markdown = ""
781 | 	  #traceback.print_exc()
782 | 	  #log("IMAGE: %s"%str(e).strip().replace('\n',''))
783 | 	  
784 | 	###Interesting articles
785 | 	try:
786 | 	  intlist = wikipedia.search(article_name_terminal,results=5)
787 | 	  if intlist.__len__() > 1:
788 | 	    if article_name_terminal in intlist:
789 | 	      intlist.remove(article_name_terminal)
790 | 	    interesting_list = ""
791 | 	    for topic in intlist:
792 | 	      try:
793 | 		topicurl = wikipedia.page(topic,auto_suggest=False).url.replace('(','\(').replace(')','\)')
794 | 	      except:
795 | 		continue
796 | 	      topic = topic.replace(' ',' ^').replace(' ^(',' ^\(')
797 | 	      interesting_list = interesting_list + " [^" + topic + "]" + "(" +topicurl.replace('http://','https://')+ ") ^|"
798 | 	    interesting_markdown = "^Interesting:"+interesting_list.strip('^|')
799 | 	    #success("%s INTERESTING ARTICLE LINKS PACKAGED"%intlist.__len__())
800 | 	  else:
801 | 	    raise Exception("no suggestions")
802 | 	except Exception as e:
803 | 	  interesting_markdown = ""
804 | 	  #traceback.print_exc()
805 | 	  #log("INTERESTING ARTICLE LINKS NOT PACKAGED: %s"%str(e).strip().replace('\n',''))
806 | 	
807 | 	###NSFW tagging
808 | 	#badwords = getnsfw(data)
809 | 	badwords = None #mark all articles as sfw for now
810 | 	if badwords:
811 | 	  badlist = ''
812 | 	  for word in badwords:
813 | 	    badlist = badlist + word + ',%20'
814 | 	  nsfwurl = "/message/compose?to=%28This%20is%20a%20placeholder%29/r/autowikibot&subject="+str(len(badwords))+"%20NSFW%20words%20are%20present%20in%20this%20comment:&message="+badlist.strip(',%20')+"%0a%0aIf%20you%20think%20any%20of%20word/s%20above%20is%20SFW,%20forward%20this%20message%20to%20/r/autowikibot%20%28keep%20the%20subject%20unchanged%29%0a%0acontext:"+str(post.permalink)
815 | 	  nsfwtag = " [](#nsfw-start)**^NSFW** [^^(?)]("+nsfwurl+")[](#nsfw-end)"
816 | 	  #success("FOUND %s NSFW WORDS"%str(len(badwords)))
817 | 	else:
818 | 	  nsfwtag = " [](#sfw)"
819 | 	
820 | 	post_markdown = bit_comment_start+" [**"+article_name_terminal+"**](https://en.wikipedia.org/wiki/"+url_string_for_fetch.replace(')','\)')+"):"+nsfwtag+" \n\n---\n\n>"+data+"\n\n>"+image_markdown+"\n\n---\n\n"+interesting_markdown+"\n\n"
821 | 	a = post_reply(post_markdown,post)
822 | 	image_markdown = ""
823 | 	if not a:
824 | 	  continue
825 | 	
826 |   except KeyboardInterrupt:
827 |     save_changing_variables('exit dump')
828 |     warn("EXITING")
829 |     break
830 |   except Exception as e: 
831 |     traceback.print_exc()
832 |     warn("GLOBAL: %s"%e)
833 |     time.sleep(3)
834 |     continue
835 |   
836 | 


--------------------------------------------------------------------------------