├── .gitignore ├── requirements.txt ├── aaa.py ├── setup.py ├── acrobot ├── acrobot.py └── __init__.py ├── Makefile └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.db 2 | alpha.txt 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | twitter_bot_utils>=0.10.0 2 | requests -------------------------------------------------------------------------------- /aaa.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from acrobot import Acrobot 3 | 4 | def main(db): 5 | '''Init the acrobot by downloading AAA''' 6 | A = Acrobot(db) 7 | curs = A.conn.execute("SELECT COUNT(*) FROM acronyms WHERE acronym='AAA'") 8 | result = curs.fetchone() 9 | 10 | if result[0] == 0: 11 | print("fetching AAA") 12 | A.get_acronyms('AAA') 13 | else: 14 | print("not fetching AAA") 15 | 16 | if __name__ == '__main__': 17 | main(sys.argv[1]) 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open('requirements.txt') as f: 4 | requirements = [i.strip() for i in f.readlines()] 5 | 6 | setup( 7 | name='acrobot', 8 | 9 | version='0.2', 10 | 11 | description='acronym bot', 12 | 13 | url='http://twitter.com/acrobot', 14 | 15 | author='Neil Freeman', 16 | 17 | author_email='contact@fakeisthenewreal.org', 18 | 19 | license='All rights reserved', 20 | 21 | packages=[ 22 | 'acrobot', 23 | ], 24 | 25 | entry_points={ 26 | 'console_scripts': [ 27 | 'acrobot=acrobot.acrobot:main', 28 | ], 29 | }, 30 | 31 | install_requires=requirements, 32 | 33 | ) 34 | -------------------------------------------------------------------------------- /acrobot/acrobot.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import twitter_bot_utils as tbu 3 | from . import Acrobot, __version__ as version 4 | 5 | def main(): 6 | parent = tbu.args.parent(version=version) 7 | parser = ArgumentParser(parents=[parent]) 8 | parser.add_argument('database') 9 | parser.set_defaults() 10 | args = parser.parse_args() 11 | 12 | api = tbu.API(args) 13 | bot = Acrobot(args.database, twitter=api, log=api.logger) 14 | 15 | try: 16 | update = bot.compose() 17 | if not args.dry_run: 18 | api.update_status(**update) 19 | bot.checkoff_page() 20 | 21 | except Exception as e: 22 | api.logger.error("{}".format(e)) 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PIP ?= pip3.5 2 | PYTHON ?= python3.5 3 | 4 | CREATE = CREATE TABLE tmp ( \ 5 | name VARCHAR(3) \ 6 | ); \ 7 | CREATE TABLE acronyms ( \ 8 | acronym VARCHAR(3), \ 9 | link TEXT, \ 10 | description TEXT, \ 11 | tweeted VARCHAR(1) \ 12 | ) 13 | 14 | .PHONY: all develop install 15 | 16 | all: alpha.db 17 | 18 | install develop: %: requirements.txt alpha.db 19 | $(PIP) -q install $(INSTALLFLAGS) -r $< 20 | $(PYTHON) setup.py $(SETUPFLAGS) $* $(INSTALLFLAGS) 21 | 22 | alpha.db: alpha.txt 23 | sqlite3 $@ "$(CREATE);" 24 | sqlite3 $@ ".import '/dev/stdin' tmp" < $< 25 | sqlite3 $@ "CREATE TABLE combinations AS SELECT name, 0 tweeted FROM tmp;" 26 | sqlite3 $@ "DROP TABLE tmp;" 27 | $(PYTHON) aaa.py $@ 28 | 29 | alpha.txt: 30 | echo {A..Z}{A..Z}{A..Z} | tr ' ' '\n' > $@ 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # acronym bot 2 | 3 | Controls [@acronymlister](https://twitter.com/acronymlister), a Twitter bot that posts the meanings of three letter acronyms, as found on Wikipedia. 4 | 5 | The command line tool requires a `bots.yaml` file, as described in [`twitter_bot_utils`](https://github.com/fitnr/twitter_bot_utils). 6 | 7 | Requires Python 3.5, sqlite3. Initialize the database with `make`, install the package with `make install`. 8 | 9 | ```` 10 | usage: acrobot [-h] [-c PATH] [-u SCREEN_NAME] [-n] [-v] [-q] [-V] database 11 | 12 | positional arguments: 13 | database 14 | 15 | optional arguments: 16 | -h, --help show this help message and exit 17 | -c PATH, --config PATH 18 | bots config file (json or yaml) 19 | -u SCREEN_NAME, --user SCREEN_NAME 20 | Twitter screen name 21 | -n, --dry-run Don't actually do anything 22 | -v, --verbose Run talkatively 23 | -q, --quiet Run quietly 24 | -V, --version show program's version number and exit 25 | ```` 26 | -------------------------------------------------------------------------------- /acrobot/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import re 3 | import logging 4 | import sqlite3 5 | import urllib 6 | import requests 7 | from twitter_bot_utils import helpers 8 | 9 | __version__ = '0.2' 10 | 11 | 12 | WIKI = 'en' 13 | DISAMB_CAT = 'Category:Disambiguation pages' 14 | WIKI_SYNTAX = r"^(=|'''|\{\{|In [^:]+:|\w+ may refer to:|$)" 15 | WIKI_SPLIT = r'[\*\n]' 16 | WIKI_LINK = r'\[\[([^|]+?)(?=[\]|])' 17 | 18 | 19 | def format_line(line): 20 | ''' 21 | From a line on a disambiguation page, return a link (possibly None) and a description 22 | Will return the first link it finds. 23 | ''' 24 | match = re.search(WIKI_LINK, line) 25 | link = match.groups()[0] if match else '' 26 | desc_sans_link = re.sub(r"(?<=\[\[)[^|]+\|", '', line) 27 | description = re.sub(r"(\[\[|\]\]|'')", "", desc_sans_link) 28 | 29 | return link, description 30 | 31 | 32 | def get_page_content(json): 33 | pages = list(json['query']['pages'].values()) 34 | return pages[0]['revisions'][0]['*'] 35 | 36 | 37 | class Acrobot(object): 38 | 39 | link = '' 40 | kml = 'https://tools.wmflabs.org/kmlexport/' 41 | 42 | def __init__(self, database, log=None, twitter=None, lang=None): 43 | self.lang = lang or WIKI 44 | self.headers = {'user-agent': 'Acrobot/{}'.format(self.lang)} 45 | self.log = log or logging 46 | self.conn = sqlite3.connect(database) 47 | self.twitter = twitter 48 | 49 | @property 50 | def api(self): 51 | return "https://{}.wikipedia.org/w/api.php".format(self.lang) 52 | 53 | def compose(self): 54 | acronym, self.link, description = self.next_page() 55 | self.log.info('composing %s - %s', self.link, description) 56 | 57 | if acronym not in description and len(description) < 140: 58 | description = '{} is {}'.format(acronym, description) 59 | 60 | update = self.get_page_geo(self.link) 61 | 62 | update['status'] = helpers.shorten(description, ellipsis=True) 63 | 64 | self.log.debug('%s', update) 65 | 66 | return update 67 | 68 | def next_page(self): 69 | ''' 70 | Pick the next page. 71 | Check off unused acronyms if need be 72 | ''' 73 | c = self.conn.execute("SELECT acronym, link, description FROM acronyms WHERE tweeted != 1 LIMIT 1") 74 | row = c.fetchone() 75 | 76 | if row is None: 77 | self.log.debug("Couldn't find a row, checking off another") 78 | name = self.checkoff_get_next_combination() 79 | self.get_acronyms(name) 80 | self.follow(name) 81 | return self.next_page() 82 | 83 | return row 84 | 85 | def get_acronyms(self, combination): 86 | ''' 87 | Visit wikipedia and download acronyms from a particular letter combination 88 | Get the acronyms for a letter combination and populate the acronyms DB 89 | ''' 90 | self.log.debug('getting acronyms for %s', combination) 91 | 92 | params = { 93 | 'format': 'json', 94 | 'action': 'query', 95 | 'titles': '{} (disambiguation)'.format(combination), 96 | 'rvprop': 'content', 97 | 'prop': 'revisions|categories', 98 | 'clcategories': [DISAMB_CAT], 99 | "redirects": True 100 | } 101 | 102 | r = requests.get(self.api, params=params, headers=self.headers) 103 | json = r.json() 104 | 105 | try: 106 | content = get_page_content(json) 107 | 108 | self.log.debug("Got %d chars of content for %s", len(content), combination) 109 | 110 | content = re.sub(r"\[\[Category:[^\]]+\]\]", "", content) 111 | 112 | rawlines = re.split(WIKI_SPLIT, content) 113 | lines = [g.strip() for g in rawlines if not re.match(WIKI_SYNTAX, g) and '(disambiguation)' not in g] 114 | 115 | except KeyError: 116 | # empty: make page as tweeted and move to the next one 117 | self.log.info('No pages for %s' % combination) 118 | name = self.checkoff_get_next_combination() 119 | return self.get_acronyms(name) 120 | 121 | # not empty: send to database and you're done 122 | 123 | # values = list of (combination, page, description) 124 | values = [format_line(x) for x in lines] 125 | insert = "INSERT INTO acronyms VALUES ('{}', ?, ?, 0)".format(combination) 126 | 127 | curs = self.conn.cursor() 128 | curs.executemany(insert, values) 129 | self.conn.commit() 130 | 131 | def follow(self, screen_name): 132 | if not self.twitter: 133 | return 134 | try: 135 | self.twitter.create_friendship(screen_name=screen_name) 136 | self.log.info('Following @%s', screen_name) 137 | except Exception as e: 138 | self.log.info('Error following @%s: %s', screen_name, e) 139 | pass 140 | 141 | def checkoff_get_next_combination(self): 142 | checkoff = """UPDATE combinations SET tweeted = 1 WHERE name=( 143 | SELECT name FROM combinations WHERE tweeted != 1 LIMIT 1 144 | )""" 145 | curs = self.conn.cursor() 146 | self.log.debug('checking off a row') 147 | curs.execute(checkoff) 148 | self.conn.commit() 149 | 150 | curs.execute("SELECT name FROM combinations WHERE tweeted != 1 LIMIT 1") 151 | result = curs.fetchone() 152 | self.log.info('Next combination: %s', result) 153 | 154 | return result[0] 155 | 156 | def checkoff_page(self): 157 | self.conn.cursor().execute('UPDATE acronyms SET tweeted = 1 WHERE link=?', (self.link,)) 158 | self.conn.commit() 159 | 160 | def get_page_geo(self, page): 161 | ''' 162 | Get the lat/lon of a Wikipedia page, if it exists. 163 | Uses the kmlexport WMF labs utility and janky regex parsing 164 | ''' 165 | self.log.debug('getting location of %s', page) 166 | 167 | r = requests.get(self.kml, params={'article': page}, headers=self.headers) 168 | 169 | if 'No geocoded items found' in r.text: 170 | return {"lat": None, "long": None} 171 | 172 | try: 173 | coord_pat = r'(?<=)(-?[\d.]+),(-?[\d.]+),?0?(?=)' 174 | match = re.search(coord_pat, r.text) 175 | x, y = match.groups() 176 | x, y = float(x), float(y) 177 | 178 | except (AttributeError, KeyError, ValueError) as e: 179 | self.log.error('Error finding geo on %s', page) 180 | self.log.error('%s', e) 181 | x, y = None, None 182 | 183 | return {"lat": y, "long": x} 184 | --------------------------------------------------------------------------------