├── .gitignore
├── requirements.txt
├── aaa.py
├── setup.py
├── acrobot
    ├── acrobot.py
    └── __init__.py
├── Makefile
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.db
2 | alpha.txt
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | twitter_bot_utils>=0.10.0
2 | requests


--------------------------------------------------------------------------------
/aaa.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from acrobot import Acrobot
 3 | 
 4 | def main(db):
 5 |     '''Init the acrobot by downloading AAA'''
 6 |     A = Acrobot(db)
 7 |     curs = A.conn.execute("SELECT COUNT(*) FROM acronyms WHERE acronym='AAA'")
 8 |     result = curs.fetchone()
 9 | 
10 |     if result[0] == 0:
11 |         print("fetching AAA")
12 |         A.get_acronyms('AAA')
13 |     else:
14 |         print("not fetching AAA")
15 | 
16 | if __name__ == '__main__':
17 |     main(sys.argv[1])
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | with open('requirements.txt') as f:
 4 |     requirements = [i.strip() for i in f.readlines()]
 5 | 
 6 | setup(
 7 |     name='acrobot',
 8 | 
 9 |     version='0.2',
10 | 
11 |     description='acronym bot',
12 | 
13 |     url='http://twitter.com/acrobot',
14 | 
15 |     author='Neil Freeman',
16 | 
17 |     author_email='contact@fakeisthenewreal.org',
18 | 
19 |     license='All rights reserved',
20 | 
21 |     packages=[
22 |         'acrobot',
23 |     ],
24 | 
25 |     entry_points={
26 |         'console_scripts': [
27 |             'acrobot=acrobot.acrobot:main',
28 |         ],
29 |     },
30 | 
31 |     install_requires=requirements,
32 | 
33 | )
34 | 


--------------------------------------------------------------------------------
/acrobot/acrobot.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | import twitter_bot_utils as tbu
 3 | from . import Acrobot, __version__ as version
 4 | 
 5 | def main():
 6 |     parent = tbu.args.parent(version=version)
 7 |     parser = ArgumentParser(parents=[parent])
 8 |     parser.add_argument('database')
 9 |     parser.set_defaults()
10 |     args = parser.parse_args()
11 | 
12 |     api = tbu.API(args)
13 |     bot = Acrobot(args.database, twitter=api, log=api.logger)
14 | 
15 |     try:
16 |         update = bot.compose()
17 |         if not args.dry_run:
18 |             api.update_status(**update)
19 |             bot.checkoff_page()
20 | 
21 |     except Exception as e:
22 |         api.logger.error("{}".format(e))
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PIP ?= pip3.5
 2 | PYTHON ?= python3.5
 3 | 
 4 | CREATE = CREATE TABLE tmp ( \
 5 | 	name VARCHAR(3) \
 6 | 	); \
 7 | 	CREATE TABLE acronyms ( \
 8 | 		acronym VARCHAR(3), \
 9 | 		link TEXT, \
10 | 		description TEXT, \
11 | 		tweeted VARCHAR(1) \
12 | 	)
13 | 
14 | .PHONY: all develop install
15 | 
16 | all: alpha.db
17 | 
18 | install develop: %: requirements.txt alpha.db
19 | 	$(PIP) -q install $(INSTALLFLAGS) -r $<
20 | 	$(PYTHON) setup.py $(SETUPFLAGS) $* $(INSTALLFLAGS)
21 | 
22 | alpha.db: alpha.txt
23 | 	sqlite3 $@ "$(CREATE);"
24 | 	sqlite3 $@ ".import '/dev/stdin' tmp" < $<
25 | 	sqlite3 $@ "CREATE TABLE combinations AS SELECT name, 0 tweeted FROM tmp;"
26 | 	sqlite3 $@ "DROP TABLE tmp;"
27 | 	$(PYTHON) aaa.py $@
28 | 
29 | alpha.txt:
30 | 	echo {A..Z}{A..Z}{A..Z} | tr ' ' '\n' > $@
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # acronym bot
 2 | 
 3 | Controls [@acronymlister](https://twitter.com/acronymlister), a Twitter bot that posts the meanings of three letter acronyms, as found on Wikipedia.
 4 | 
 5 | The command line tool requires a `bots.yaml` file, as described in [`twitter_bot_utils`](https://github.com/fitnr/twitter_bot_utils).
 6 | 
 7 | Requires Python 3.5, sqlite3. Initialize the database with `make`, install the package with `make install`.
 8 | 
 9 | ````
10 | usage: acrobot [-h] [-c PATH] [-u SCREEN_NAME] [-n] [-v] [-q] [-V] database
11 | 
12 | positional arguments:
13 |   database
14 | 
15 | optional arguments:
16 |   -h, --help            show this help message and exit
17 |   -c PATH, --config PATH
18 |                         bots config file (json or yaml)
19 |   -u SCREEN_NAME, --user SCREEN_NAME
20 |                         Twitter screen name
21 |   -n, --dry-run         Don't actually do anything
22 |   -v, --verbose         Run talkatively
23 |   -q, --quiet           Run quietly
24 |   -V, --version         show program's version number and exit
25 | ````
26 | 


--------------------------------------------------------------------------------
/acrobot/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import re
  3 | import logging
  4 | import sqlite3
  5 | import urllib
  6 | import requests
  7 | from twitter_bot_utils import helpers
  8 | 
  9 | __version__ = '0.2'
 10 | 
 11 | 
 12 | WIKI = 'en'
 13 | DISAMB_CAT = 'Category:Disambiguation pages'
 14 | WIKI_SYNTAX = r"^(=|'''|\{\{|In [^:]+:|\w+ may refer to:|$)"
 15 | WIKI_SPLIT = r'[\*\n]'
 16 | WIKI_LINK = r'\[\[([^|]+?)(?=[\]|])'
 17 | 
 18 | 
 19 | def format_line(line):
 20 |     '''
 21 |     From a line on a disambiguation page, return a link (possibly None) and a description
 22 |     Will return the first link it finds.
 23 |     '''
 24 |     match = re.search(WIKI_LINK, line)
 25 |     link = match.groups()[0] if match else ''
 26 |     desc_sans_link = re.sub(r"(?<=\[\[)[^|]+\|", '', line)
 27 |     description = re.sub(r"(\[\[|\]\]|'')", "", desc_sans_link)
 28 | 
 29 |     return link, description
 30 | 
 31 | 
 32 | def get_page_content(json):
 33 |     pages = list(json['query']['pages'].values())
 34 |     return pages[0]['revisions'][0]['*']
 35 | 
 36 | 
 37 | class Acrobot(object):
 38 | 
 39 |     link = ''
 40 |     kml = 'https://tools.wmflabs.org/kmlexport/'
 41 | 
 42 |     def __init__(self, database, log=None, twitter=None, lang=None):
 43 |         self.lang = lang or WIKI
 44 |         self.headers = {'user-agent': 'Acrobot/{}'.format(self.lang)}
 45 |         self.log = log or logging
 46 |         self.conn = sqlite3.connect(database)
 47 |         self.twitter = twitter
 48 | 
 49 |     @property
 50 |     def api(self):
 51 |         return "https://{}.wikipedia.org/w/api.php".format(self.lang)
 52 | 
 53 |     def compose(self):
 54 |         acronym, self.link, description = self.next_page()
 55 |         self.log.info('composing %s - %s', self.link, description)
 56 | 
 57 |         if acronym not in description and len(description) < 140:
 58 |             description = '{} is {}'.format(acronym, description)
 59 | 
 60 |         update = self.get_page_geo(self.link)
 61 | 
 62 |         update['status'] = helpers.shorten(description, ellipsis=True)
 63 | 
 64 |         self.log.debug('%s', update)
 65 | 
 66 |         return update
 67 | 
 68 |     def next_page(self):
 69 |         '''
 70 |         Pick the next page.
 71 |         Check off unused acronyms if need be
 72 |         '''
 73 |         c = self.conn.execute("SELECT acronym, link, description FROM acronyms WHERE tweeted != 1 LIMIT 1")
 74 |         row = c.fetchone()
 75 | 
 76 |         if row is None:
 77 |             self.log.debug("Couldn't find a row, checking off another")
 78 |             name = self.checkoff_get_next_combination()
 79 |             self.get_acronyms(name)
 80 |             self.follow(name)
 81 |             return self.next_page()
 82 | 
 83 |         return row
 84 | 
 85 |     def get_acronyms(self, combination):
 86 |         '''
 87 |         Visit wikipedia and download acronyms from a particular letter combination
 88 |         Get the acronyms for a letter combination and populate the acronyms DB
 89 |         '''
 90 |         self.log.debug('getting acronyms for %s', combination)
 91 | 
 92 |         params = {
 93 |             'format': 'json',
 94 |             'action': 'query',
 95 |             'titles': '{} (disambiguation)'.format(combination),
 96 |             'rvprop': 'content',
 97 |             'prop': 'revisions|categories',
 98 |             'clcategories': [DISAMB_CAT],
 99 |             "redirects": True
100 |         }
101 | 
102 |         r = requests.get(self.api, params=params, headers=self.headers)
103 |         json = r.json()
104 | 
105 |         try:
106 |             content = get_page_content(json)
107 | 
108 |             self.log.debug("Got %d chars of content for %s", len(content), combination)
109 | 
110 |             content = re.sub(r"\[\[Category:[^\]]+\]\]", "", content)
111 | 
112 |             rawlines = re.split(WIKI_SPLIT, content)
113 |             lines = [g.strip() for g in rawlines if not re.match(WIKI_SYNTAX, g) and '(disambiguation)' not in g]
114 | 
115 |         except KeyError:
116 |             # empty: make page as tweeted and move to the next one
117 |             self.log.info('No pages for %s' % combination)
118 |             name = self.checkoff_get_next_combination()
119 |             return self.get_acronyms(name)
120 | 
121 |         # not empty: send to database and you're done
122 | 
123 |         # values = list of (combination, page, description)
124 |         values = [format_line(x) for x in lines]
125 |         insert = "INSERT INTO acronyms VALUES ('{}', ?, ?, 0)".format(combination)
126 | 
127 |         curs = self.conn.cursor()
128 |         curs.executemany(insert, values)
129 |         self.conn.commit()
130 | 
131 |     def follow(self, screen_name):
132 |         if not self.twitter:
133 |             return
134 |         try:
135 |             self.twitter.create_friendship(screen_name=screen_name)
136 |             self.log.info('Following @%s', screen_name)
137 |         except Exception as e:
138 |             self.log.info('Error following @%s: %s', screen_name, e)
139 |             pass
140 | 
141 |     def checkoff_get_next_combination(self):
142 |         checkoff = """UPDATE combinations SET tweeted = 1 WHERE name=(
143 |             SELECT name FROM combinations WHERE tweeted != 1 LIMIT 1
144 |         )"""
145 |         curs = self.conn.cursor()
146 |         self.log.debug('checking off a row')
147 |         curs.execute(checkoff)
148 |         self.conn.commit()
149 | 
150 |         curs.execute("SELECT name FROM combinations WHERE tweeted != 1 LIMIT 1")
151 |         result = curs.fetchone()
152 |         self.log.info('Next combination: %s', result)
153 | 
154 |         return result[0]
155 | 
156 |     def checkoff_page(self):
157 |         self.conn.cursor().execute('UPDATE acronyms SET tweeted = 1 WHERE link=?', (self.link,))
158 |         self.conn.commit()
159 | 
160 |     def get_page_geo(self, page):
161 |         '''
162 |         Get the lat/lon of a Wikipedia page, if it exists.
163 |         Uses the kmlexport WMF labs utility and janky regex parsing
164 |         '''
165 |         self.log.debug('getting location of %s', page)
166 | 
167 |         r = requests.get(self.kml, params={'article': page}, headers=self.headers)
168 | 
169 |         if 'No geocoded items found' in r.text:
170 |             return {"lat": None, "long": None}
171 | 
172 |         try:
173 |             coord_pat = r'(?<=<coordinates>)(-?[\d.]+),(-?[\d.]+),?0?(?=</coordinates>)'
174 |             match = re.search(coord_pat, r.text)
175 |             x, y = match.groups()
176 |             x, y = float(x), float(y)
177 | 
178 |         except (AttributeError, KeyError, ValueError) as e:
179 |             self.log.error('Error finding geo on %s', page)
180 |             self.log.error('%s', e)
181 |             x, y = None, None
182 | 
183 |         return {"lat": y, "long": x}
184 | 


--------------------------------------------------------------------------------