├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── app.py ├── config_template.py ├── data └── handles.csv ├── db.py ├── doc └── screenshot.jpg ├── requirements.txt ├── static └── style.css ├── templates ├── index.html ├── user.html └── word.html └── util ├── import_tweets.py ├── load_users.py └── make_db.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__ 3 | \#* 4 | *.pyc 5 | *~ 6 | config.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Paul Ford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # "May 14, 2016" Twitter search tool 2 | 3 | This was created to support [a collaborative project by Miranda July and Paul Ford with support from Starlee Kine as part of the Rhizome 7on7 project](https://vimeo.com/167171454). 4 | 5 | ![Here is a screenshot.](https://raw.githubusercontent.com/ftrain/may142016/master/doc/screenshot.jpg) 6 | 7 | This is a Flask app and some utilities that load data into SQLite. Standard caveats about speed of development and general incompetence apply/but has the merit of working okay. 8 | 9 | It makes it possible to search and browse historical tweets across a group of individuals. For example: Load in all the people in your Slack room, search their tweets for "dream," and recite their dreams back to them. 10 | 11 | ## Up and running: 12 | 13 | - python 3 14 | - put all the twitter handles in `data/handles.csv` 15 | - `pip install -r requirements.txt` 16 | - go get a twitter API key 17 | - copy `config_template.py` to `config.py` and modify per that API key 18 | 19 | - Run these things 20 | - `python -m util.make_db` # make the database 21 | - `python -m util.load_users` # load the users from `data/handles.csv` 22 | - `python -m util.import_tweets` # import the tweets from those handles 23 | 24 | Tweet importing will take a long time, many hours or days depending on your list of people, their prolixity, etc. 25 | 26 | Run the app: 27 | 28 | - `python app.py` 29 | 30 | ## Notes 31 | 32 | - Works fine under `gunicorn`. 33 | - If you start to mess with itwhile the tweets are loading into SQLite it can skip tweets because SQLite wants none of that. 34 | 35 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ftrain/may142016/45f96166cfab3f1e295df3e3a1fdd1b06f4d9bc7/__init__.py -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import * 2 | import nltk 3 | import db 4 | import re 5 | 6 | 7 | # Start up our Flask app 8 | app = Flask(__name__, static_url_path='/static') 9 | app.config.update(dict( 10 | DEBUG=True, 11 | )) 12 | 13 | 14 | # Catches twitter usernames in text 15 | twitter_username_re = re.compile(r'@([A-Za-z0-9_]+)') 16 | 17 | 18 | def handle_strip(text): 19 | return re.sub(twitter_username_re, '', text) 20 | 21 | 22 | def is_rt(tweet): 23 | """Add a boolean property to a tweet and set it.""" 24 | tweet.is_rt = None 25 | is_rt = re.match("RT", tweet.tweet_text) 26 | if is_rt: 27 | tweet.is_rt = True 28 | return tweet 29 | 30 | 31 | def get_words(tweets): 32 | """Given a set of tweets, return the most frequently-used words.""" 33 | tweets = filter(lambda x: not(x.is_rt), tweets) 34 | tokenized = [nltk.word_tokenize(handle_strip(t.tweet_text)) 35 | for t in tweets] 36 | words = [item for sublist in tokenized for item in sublist] 37 | longwords = filter(lambda x: len(x) > 6, words) 38 | lcwords = map(lambda x: x.lower(), longwords) 39 | fdist = nltk.FreqDist(lcwords) 40 | common = fdist.most_common(100) 41 | common = filter(lambda x: x[1] > 4, common) 42 | common = map(lambda x: [x[0], 6 + int(x[1]/3)], common) 43 | return common 44 | 45 | 46 | @app.route("/") 47 | def hello(): 48 | """List the users. Copied straight from the Flask tutorial.""" 49 | users = db.User.select()\ 50 | .order_by(db.User.screen_name) 51 | return render_template('index.html', 52 | users=users) 53 | 54 | 55 | @app.route("/name/") 56 | def tweets(screen_name): 57 | """Display information about a user.""" 58 | tweets = db.Tweet.select()\ 59 | .where(db.Tweet.user_screen_name == screen_name)\ 60 | .order_by(db.Tweet.tweet_timestamp) 61 | user = db.User.select().where(db.User.screen_name == screen_name).get() 62 | tweets = [is_rt(tweet) for tweet in tweets] 63 | words = get_words(tweets) 64 | return render_template('user.html', 65 | words=words, 66 | user=user, 67 | tweets=tweets) 68 | 69 | 70 | @app.route("/word", methods=['GET']) 71 | def words(): 72 | """Display search results for a search term.""" 73 | term = request.args.get('q', '') 74 | tweets = (db.Tweet 75 | .select() 76 | .join( 77 | db.FTSTweet, 78 | on=(db.Tweet.id == db.FTSTweet.tweet_id)) 79 | .where(db.FTSTweet.match(term)) 80 | .order_by(db.Tweet.user_screen_name, 81 | db.Tweet.tweet_timestamp)) 82 | tweets = [is_rt(tweet) for tweet in tweets] 83 | return render_template('word.html', 84 | term=term, 85 | tweets=tweets) 86 | 87 | 88 | if __name__ == "__main__": 89 | app.run() 90 | -------------------------------------------------------------------------------- /config_template.py: -------------------------------------------------------------------------------- 1 | # Twitter API configuration 2 | TWITTER_CONSUMER_KEY = 'INSERT_VALUE_HERE' 3 | TWITTER_CONSUMER_SECRET = 'INSERT_VALUE_HERE' 4 | TWITTER_ACCESS_TOKEN = 'INSERT_VALUE_HERE' 5 | TWITTER_ACCESS_TOKEN_SECRET = 'INSERT_VALUE_HERE' 6 | 7 | # We add a little fuzz because we're not in that much of a hurry 8 | TWITTER_API_FUZZ = 1.5 9 | 10 | TWITTER_API_DELAY = 1.0/(180.0 * 4.0/3600.0) * TWITTER_API_FUZZ 11 | -------------------------------------------------------------------------------- /data/handles.csv: -------------------------------------------------------------------------------- 1 | ftrain 2 | miranda_july 3 | StarleeKine -------------------------------------------------------------------------------- /db.py: -------------------------------------------------------------------------------- 1 | # Database setup 2 | 3 | from peewee import Model, CharField, TextField, IntegerField, DateField 4 | from playhouse.sqlite_ext import SqliteExtDatabase, FTSModel 5 | 6 | db = SqliteExtDatabase('db/rhizome.db', 7 | threadlocals=True) 8 | 9 | 10 | class User(Model): 11 | screen_name = CharField( 12 | unique=True, 13 | primary_key=True) 14 | real_name = CharField( 15 | null=True) 16 | avatar = CharField( 17 | default=None, 18 | null=True) 19 | friends = IntegerField( 20 | null=True) 21 | followers = IntegerField( 22 | null=True) 23 | background = CharField( 24 | default=None, 25 | null=True) 26 | bio = CharField( 27 | default=None, 28 | null=True, 29 | index=True) 30 | location = CharField( 31 | default=None, 32 | null=True) 33 | 34 | class Meta: 35 | database = db 36 | 37 | 38 | class Tweet(Model): 39 | id = IntegerField( 40 | primary_key=True) 41 | user_screen_name = CharField( 42 | index=True) 43 | tweet_text = TextField() 44 | tweet_timestamp = DateField( 45 | index=True) 46 | user_follower_ct = IntegerField() 47 | tweet_favorite_ct = IntegerField() 48 | tweet_retweet_ct = IntegerField() 49 | 50 | class Meta: 51 | database = db 52 | 53 | 54 | class Friendship(Model): 55 | user_screen_name = CharField( 56 | index=True) 57 | friend_screen_name = CharField( 58 | index=True) 59 | 60 | class Meta: 61 | database = db 62 | 63 | 64 | class Word(Model): 65 | user_screen_name = CharField( 66 | index=True) 67 | word = CharField( 68 | index=True) 69 | count = IntegerField() 70 | 71 | class Meta: 72 | database = db 73 | 74 | 75 | class FTSTweet(FTSModel): 76 | tweet_id = IntegerField() 77 | content = TextField() 78 | 79 | class Meta: 80 | database = db 81 | 82 | 83 | def create_tables(): 84 | db.connect() 85 | db.create_tables([User, 86 | Tweet, 87 | Friendship, 88 | Word, 89 | FTSTweet, ]) 90 | -------------------------------------------------------------------------------- /doc/screenshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ftrain/may142016/45f96166cfab3f1e295df3e3a1fdd1b06f4d9bc7/doc/screenshot.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==0.10.1 2 | itsdangerous==0.24 3 | Jinja2==2.8 4 | MarkupSafe==0.23 5 | nltk==3.2.1 6 | oauthlib==1.1.1 7 | peewee==2.8.1 8 | requests==2.10.0 9 | requests-oauthlib==0.6.1 10 | six==1.10.0 11 | tweepy==3.5.0 12 | Werkzeug==0.11.9 13 | -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | padding:4em; 3 | font-family:Helvetica; 4 | 5 | } 6 | table { 7 | border-collapse:collapse; 8 | } 9 | 10 | td.datetime { 11 | min-width:8em; 12 | text-align:right; 13 | font-size:9pt; 14 | } 15 | td { 16 | border-bottom:1px solid gray; 17 | font-size:12pt; 18 | padding:1em 0 1em 0; 19 | vertical-align:top; 20 | padding:1em; 21 | } 22 | td.text {max-width:500px;line-height:149%;} 23 | td a { 24 | text-decoration:none; 25 | } 26 | .rt-True { 27 | background:#eee; 28 | } 29 | .rt-True td { 30 | color:#AAA; 31 | } 32 | div.words { 33 | column-count:3; 34 | } 35 | .cloud { 36 | display:block; 37 | color:#77C; 38 | } 39 | input.search { 40 | font-size:20pt; 41 | margin-bottom:1em; 42 | } 43 | -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Spyware 4 | 5 | 6 | 7 |

The people in your neighborhood

8 |
9 | Search: 10 | 11 |
12 | 13 | {% for user in users %} 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | {% endfor %} 22 |
@{{ user.screen_name }}{{ user.real_name }}{{ user.location }}
23 | 24 | 25 | -------------------------------------------------------------------------------- /templates/user.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Tweets 4 | 5 | 6 | 7 |

[Home]

8 | 9 |

{{user.real_name}}

10 |
11 | Search: 12 | 13 |
14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 49 | 50 | 51 | 52 | 62 | 63 |
Avatar
Handle@{{user.screen_name}}
Where{{user.location}}
Bio{{user.bio}}
Background
Stats{{"{:,d}".format(user.friends)}} friends, {{"{:,d}".format(user.followers)}} followers
Words 43 |
44 | {% for word in words %} 45 | {{word.0}} 46 | {% endfor %} 47 |
48 |
Tweets 53 | 54 | {% for tweet in tweets %} 55 | 56 | 57 | 58 | 59 | {% endfor %} 60 |
{{tweet.tweet_timestamp}}{{ tweet.tweet_text|safe|urlize}}
61 |
64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /templates/word.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Tweets 4 | 5 | 6 | 7 | 8 |

[Home]

9 |

“{{term}}”

10 |
11 | Search: 12 | 13 |
14 | 15 | {% for tweet in tweets %} 16 | 17 | 18 | 19 | 20 | 21 | {% endfor %} 22 |
{{tweet.tweet_timestamp}}@{{tweet.user_screen_name}}{{tweet.tweet_text|safe|urlize}}
23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /util/import_tweets.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | import config 3 | import sys 4 | from time import sleep 5 | import db 6 | 7 | 8 | class TwitterSearchClient(object): 9 | """Set up a tweepy client""" 10 | def __init__(self, 11 | consumer_key, consumer_secret, 12 | access_token, access_secret): 13 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 14 | auth.set_access_token(access_token, access_secret) 15 | self.client = tweepy.API(auth) 16 | return None 17 | 18 | 19 | class TwitterSearchResponse(object): 20 | """Talk to Twitter.""" 21 | def __init__(self, client, user): 22 | self.client = client.client 23 | self.screen_name = user.screen_name 24 | 25 | def save(self): 26 | try: 27 | print("[search] [search_term: {}]".format(self.screen_name)) 28 | i = 0 29 | for page in tweepy.Cursor(self.client.user_timeline, 30 | screen_name=self.screen_name, 31 | count=200).pages(100): 32 | print("{}.".format(i)) 33 | i = i + 1 34 | sleep(config.TWITTER_API_DELAY) 35 | self.process_page(page) 36 | 37 | except tweepy.error.RateLimitError: 38 | print("[search] [error: rate limit] [{}]".format(self)) 39 | sleep(60) 40 | 41 | except tweepy.error.TweepError as e: 42 | print("[search] [error: tweepy] [{}]".format(e)) 43 | sleep(60) 44 | 45 | except: 46 | print("[search] [error: unknown] [{}]".format(sys.exc_info()[0])) 47 | sleep(60) 48 | 49 | def process_page(self, page): 50 | for item in page: 51 | print("saving tweet {}/{}".format(item.user.screen_name, 52 | item.id,)) 53 | db.Tweet.create( 54 | id=item.id, 55 | user_screen_name=item.user.screen_name, 56 | user_follower_ct=item.user.followers_count, 57 | tweet_text=item.text, 58 | tweet_timestamp=item.created_at, 59 | tweet_favorite_ct=item.favorite_count, 60 | tweet_retweet_ct=item.retweet_count,) 61 | db.FTSTweet.create( 62 | tweet_id=item.id, 63 | content=item.text,) 64 | 65 | 66 | def __main__(): 67 | client = TwitterSearchClient(config.TWITTER_CONSUMER_KEY, 68 | config.TWITTER_CONSUMER_SECRET, 69 | config.TWITTER_ACCESS_TOKEN, 70 | config.TWITTER_ACCESS_TOKEN_SECRET) 71 | users = db.User.select() 72 | for user in users: 73 | tsr = TwitterSearchResponse(client, user) 74 | tsr.save() 75 | 76 | __main__() 77 | -------------------------------------------------------------------------------- /util/load_users.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import db 3 | import tweepy 4 | import config 5 | from time import sleep 6 | 7 | 8 | class TwitterSearchClient(object): 9 | def __init__(self, 10 | consumer_key, consumer_secret, 11 | access_token, access_secret): 12 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 13 | auth.set_access_token(access_token, access_secret) 14 | self.client = tweepy.API(auth) 15 | return None 16 | 17 | 18 | def __main__(): 19 | print("Logging in to twitter") 20 | client = TwitterSearchClient(config.TWITTER_CONSUMER_KEY, 21 | config.TWITTER_CONSUMER_SECRET, 22 | config.TWITTER_ACCESS_TOKEN, 23 | config.TWITTER_ACCESS_TOKEN_SECRET) 24 | 25 | with open('data/handles.csv', 'rt') as csvfile: 26 | reader = csv.reader(csvfile) 27 | for row in reader: 28 | sleep(3) 29 | screen_name = row[0] 30 | print("fetching for {}".format(screen_name,)) 31 | user = client.client.get_user(screen_name=screen_name) 32 | 33 | # get user 34 | db.User.create( 35 | screen_name=row[0], 36 | real_name=user.name, 37 | bio=user.description, 38 | location=user.location, 39 | friends=user.friends_count, 40 | followers=user.followers_count, 41 | avatar=user.profile_image_url, 42 | background=user.profile_background_image_url,) 43 | 44 | 45 | __main__() 46 | -------------------------------------------------------------------------------- /util/make_db.py: -------------------------------------------------------------------------------- 1 | import db 2 | 3 | db.create_tables() 4 | --------------------------------------------------------------------------------