├── .gitignore ├── .DS_Store ├── 3_redis ├── dump.rdb ├── __pycache__ │ └── tweet_store.cpython-36.pyc ├── tweet_store.py └── twitter_filter.py ├── 6_flask ├── dump.rdb ├── __pycache__ │ ├── tweet.cpython-36.pyc │ └── tweet_store.cpython-36.pyc ├── twatcher.py ├── static │ └── styles.css ├── tweet.py ├── tweet_store.py ├── twitter_filter.py └── templates │ └── index.html ├── 4_design_front_end ├── pen_paper.jpg ├── styles.css └── index.html ├── 5_python_class ├── tweet.py ├── tweet_store.py └── twitter_filter.py ├── README.md ├── 1_twitter_stream ├── twitter_filter.py └── status.json └── 2_sentiment └── twitter_filter.py /.gitignore: -------------------------------------------------------------------------------- 1 | /config 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2datascience/twitter_filter/master/.DS_Store -------------------------------------------------------------------------------- /3_redis/dump.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2datascience/twitter_filter/master/3_redis/dump.rdb -------------------------------------------------------------------------------- /6_flask/dump.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2datascience/twitter_filter/master/6_flask/dump.rdb -------------------------------------------------------------------------------- /4_design_front_end/pen_paper.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2datascience/twitter_filter/master/4_design_front_end/pen_paper.jpg -------------------------------------------------------------------------------- /6_flask/__pycache__/tweet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2datascience/twitter_filter/master/6_flask/__pycache__/tweet.cpython-36.pyc -------------------------------------------------------------------------------- /3_redis/__pycache__/tweet_store.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2datascience/twitter_filter/master/3_redis/__pycache__/tweet_store.cpython-36.pyc -------------------------------------------------------------------------------- /6_flask/__pycache__/tweet_store.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k2datascience/twitter_filter/master/6_flask/__pycache__/tweet_store.cpython-36.pyc -------------------------------------------------------------------------------- /6_flask/twatcher.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template 2 | from tweet_store import TweetStore 3 | 4 | app = Flask(__name__) 5 | store = TweetStore() 6 | 7 | @app.route('/') 8 | def index(): 9 | tweets = store.tweets() 10 | return render_template('index.html', tweets=tweets) 11 | 12 | if __name__ == '__main__': 13 | app.run(debug=True) 14 | -------------------------------------------------------------------------------- /4_design_front_end/styles.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin-top: 50px; 3 | } 4 | 5 | .lrg-heading { 6 | font-size: 48px; 7 | } 8 | 9 | .description { 10 | font-size: 24px; 11 | color: gray; 12 | } 13 | 14 | .brand { 15 | color: yellow; 16 | } 17 | 18 | .positive { 19 | border: 1px solid rgba(0,164,0,.8) 20 | } 21 | 22 | .negative { 23 | border: 1px solid rgba(164,0,0,.8) 24 | } 25 | -------------------------------------------------------------------------------- /6_flask/static/styles.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin-top: 50px; 3 | } 4 | 5 | img { 6 | border-radius: 50%; 7 | } 8 | 9 | .lrg-heading { 10 | font-size: 48px; 11 | } 12 | 13 | .description { 14 | font-size: 24px; 15 | color: gray; 16 | } 17 | 18 | .brand { 19 | color: yellow; 20 | } 21 | 22 | .positive { 23 | border: 1px solid rgba(0,164,0,.8) 24 | } 25 | 26 | .negative { 27 | border: 1px solid rgba(164,0,0,.8) 28 | } 29 | -------------------------------------------------------------------------------- /6_flask/tweet.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | class Tweet: 4 | 5 | def __init__(self, data): 6 | self.data = data 7 | 8 | def user_link(self): 9 | return "http://twitter.com/{}".format(self.data['username']) 10 | 11 | def filtered_text(self): 12 | return self.filter_brands(self.filter_urls(self.data['text'])) 13 | 14 | def filter_brands(self, text): 15 | brands = ["@WarbyParker", "@Bonobos", "@Casper", "@Glossier", "@DollarShaveClub", "@Allbirds"] 16 | 17 | for brand in brands: 18 | if (brand in text): 19 | text = text.replace(brand, "{}".format(brand)) 20 | else: 21 | continue 22 | 23 | return text 24 | 25 | def filter_urls(self, text): 26 | return re.sub("(https?:\/\/\w+(\.\w+)+(\/[\w\+\-\,\%]+)*(\?[\w\[\]]+(=\w*)?(&\w+(=\w*)?)*)?(#\w+)?)", r'\1', text) 27 | -------------------------------------------------------------------------------- /5_python_class/tweet.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | class Tweet: 4 | 5 | def __init__(self, data): 6 | self.data = data 7 | 8 | def user_link(self): 9 | return "http://twitter.com/{}".format(self.data['username']) 10 | 11 | def filtered_text(self): 12 | return self.filter_brands(self.filter_urls(self.data['text'])) 13 | 14 | def filter_brands(self, text): 15 | brands = ["@WarbyParker", "@Bonobos", "@Casper", "@Glossier", "@DollarShaveClub", "@Allbirds"] 16 | 17 | for brand in brands: 18 | if (brand in text): 19 | text = text.replace(brand, "{}".format(brand)) 20 | else: 21 | continue 22 | 23 | return text 24 | 25 | def filter_urls(self, text): 26 | return re.sub("(https?:\/\/\w+(\.\w+)+(\/[\w\+\-\,\%]+)*(\?[\w\[\]]+(=\w*)?(&\w+(=\w*)?)*)?(#\w+)?)", r'\1', text) 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Streaming Twitter Filter 2 | 3 | Companion code for the tutorial series about building a streaming Twitter filter using Python and Redis. 4 | 5 | - [Udemy Course (free - coming soon)]() 6 | - [YouTube Playlist (coming soon)]() 7 | - [Medium Article (coming soon)]() 8 | 9 | ## File Organization 10 | 11 | - **1_twitter_stream**: Contains the Python script to download, filter and extract data from the Twitter Streaming API. 12 | - **2_sentiment**: Contains the updated Python script to calculate and store sentiment scores for each tweet. 13 | - **3_redis**: Contains the additional Python class to interface with a Redis server. 14 | - **4_design_front_end**: Contains the pen and paper mockup as well as the static front-end code. 15 | - **5_python_class**: Contains the Python class to pre-process each tweet prior to hitting the Flask app. 16 | - **6_flask**: Contains the Flask app to read the last 15 tweets from Redis and display them. 17 | -------------------------------------------------------------------------------- /3_redis/tweet_store.py: -------------------------------------------------------------------------------- 1 | import json 2 | import redis 3 | 4 | class TweetStore: 5 | 6 | # Redis Configuration 7 | redis_host = "localhost" 8 | redis_port = 6379 9 | redis_password = "" 10 | 11 | # Tweet Configuration 12 | redis_key = 'tweets' 13 | num_tweets = 20 14 | 15 | def __init__(self): 16 | self.db = r = redis.Redis( 17 | host=self.redis_host, 18 | port=self.redis_port, 19 | password=self.redis_password 20 | ) 21 | self.trim_count = 0 22 | 23 | def push(self, data): 24 | self.db.lpush(self.redis_key, json.dumps(data)) 25 | self.trim_count += 1 26 | 27 | # Periodically trim the list so it doesn't grow too large. 28 | if self.trim_count > 100: 29 | self.db.ltrim(self.redis_key, 0, self.num_tweets) 30 | self.trim_count = 0 31 | 32 | def tweets(self, limit=15): 33 | tweets = [] 34 | 35 | for item in self.db.lrange(self.redis_key, 0, limit-1): 36 | tweet_obj = json.loads(item) 37 | tweets.append(tweet_obj) 38 | 39 | return tweets 40 | -------------------------------------------------------------------------------- /6_flask/tweet_store.py: -------------------------------------------------------------------------------- 1 | import json 2 | import redis 3 | from tweet import Tweet 4 | 5 | class TweetStore: 6 | 7 | # Redis Configuration 8 | redis_host = "localhost" 9 | redis_port = 6379 10 | redis_password = "" 11 | 12 | # Tweet Configuration 13 | redis_key = 'tweets' 14 | num_tweets = 20 15 | trim_threshold = 100 16 | 17 | def __init__(self): 18 | self.db = r = redis.Redis( 19 | host=self.redis_host, 20 | port=self.redis_port, 21 | password=self.redis_password 22 | ) 23 | self.trim_count = 0 24 | 25 | def tweets(self, limit=15): 26 | tweets = [] 27 | 28 | for item in self.db.lrange(self.redis_key, 0, limit-1): 29 | tweet_obj = json.loads(item) 30 | tweets.append(Tweet(tweet_obj)) 31 | 32 | return tweets 33 | 34 | def push(self, data): 35 | self.db.lpush(self.redis_key, json.dumps(data)) 36 | self.trim_count += 1 37 | 38 | # Periodically trim the list so it doesn't grow too large. 39 | if self.trim_count > 100: 40 | self.db.ltrim(self.redis_key, 0, self.num_tweets) 41 | self.trim_count = 0 42 | -------------------------------------------------------------------------------- /5_python_class/tweet_store.py: -------------------------------------------------------------------------------- 1 | import json 2 | import redis 3 | from tweet import Tweet 4 | 5 | class TweetStore: 6 | 7 | # Redis Configuration 8 | redis_host = "localhost" 9 | redis_port = 6379 10 | redis_password = "" 11 | 12 | # Tweet Configuration 13 | redis_key = 'tweets' 14 | num_tweets = 20 15 | trim_threshold = 100 16 | 17 | def __init__(self): 18 | self.db = r = redis.Redis( 19 | host=self.redis_host, 20 | port=self.redis_port, 21 | password=self.redis_password 22 | ) 23 | self.trim_count = 0 24 | 25 | def tweets(self, limit=15): 26 | tweets = [] 27 | 28 | for item in self.db.lrange(self.redis_key, 0, limit-1): 29 | tweet_obj = json.loads(item) 30 | tweets.append(Tweet(tweet_obj)) 31 | 32 | return tweets 33 | 34 | def push(self, data): 35 | self.db.lpush(self.redis_key, json.dumps(data)) 36 | self.trim_count += 1 37 | 38 | # Periodically trim the list so it doesn't grow too large. 39 | if self.trim_count > 100: 40 | self.db.ltrim(self.redis_key, 0, self.num_tweets) 41 | self.trim_count = 0 42 | -------------------------------------------------------------------------------- /1_twitter_stream/twitter_filter.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | import datetime 3 | import json 4 | 5 | file_path = '../config/api.json' 6 | 7 | with open(file_path) as f: 8 | twitter_api = json.loads(f.read()) 9 | 10 | consumer_key = twitter_api['consumer_key'] 11 | consumer_secret = twitter_api['consumer_secret'] 12 | access_token = twitter_api['access_token'] 13 | access_token_secret = twitter_api['access_token_secret'] 14 | 15 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 16 | auth.set_access_token(access_token, access_token_secret) 17 | 18 | api = tweepy.API(auth) 19 | 20 | class StreamListener(tweepy.StreamListener): 21 | 22 | def on_status(self, status): 23 | 24 | if ('RT @' not in status.text): 25 | tweet_item = { 26 | 'id_str': status.id_str, 27 | 'text': status.text, 28 | 'username': status.user.screen_name, 29 | 'name': status.user.name, 30 | 'profile_image_url': status.user.profile_image_url, 31 | 'received_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 32 | } 33 | 34 | print(tweet_item) 35 | 36 | def on_error(self, status_code): 37 | if status_code == 420: 38 | return False 39 | 40 | stream_listener = StreamListener() 41 | stream = tweepy.Stream(auth=api.auth, listener=stream_listener) 42 | stream.filter(track=["@WarbyParker", "@Bonobos", "@Casper", "@Glossier", "@DollarShaveClub", "@Allbirds"]) 43 | -------------------------------------------------------------------------------- /2_sentiment/twitter_filter.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | import datetime 3 | from textblob import TextBlob 4 | import json 5 | 6 | file_path = '../config/api.json' 7 | 8 | with open(file_path) as f: 9 | twitter_api = json.loads(f.read()) 10 | 11 | consumer_key = twitter_api['consumer_key'] 12 | consumer_secret = twitter_api['consumer_secret'] 13 | access_token = twitter_api['access_token'] 14 | access_token_secret = twitter_api['access_token_secret'] 15 | 16 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 17 | auth.set_access_token(access_token, access_token_secret) 18 | 19 | api = tweepy.API(auth) 20 | 21 | class StreamListener(tweepy.StreamListener): 22 | 23 | def on_status(self, status): 24 | 25 | if ('RT @' not in status.text): 26 | blob = TextBlob(status.text) 27 | sent = blob.sentiment 28 | polarity = sent.polarity 29 | subjectivity = sent.subjectivity 30 | 31 | tweet_item = { 32 | 'id_str': status.id_str, 33 | 'text': status.text, 34 | 'polarity': polarity, 35 | 'subjectivity': subjectivity, 36 | 'username': status.user.screen_name, 37 | 'name': status.user.name, 38 | 'profile_image_url': status.user.profile_image_url, 39 | 'received_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 40 | } 41 | 42 | print(tweet_item) 43 | 44 | def on_error(self, status_code): 45 | if status_code == 420: 46 | return False 47 | 48 | stream_listener = StreamListener() 49 | stream = tweepy.Stream(auth=api.auth, listener=stream_listener) 50 | stream.filter(track=["@WarbyParker", "@Bonobos", "@Casper", "@Glossier", "@DollarShaveClub", "@Allbirds", "pizza"]) 51 | -------------------------------------------------------------------------------- /6_flask/twitter_filter.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | import datetime 3 | from textblob import TextBlob 4 | from tweet_store import TweetStore 5 | import json 6 | 7 | file_path = '../config/api.json' 8 | 9 | with open(file_path) as f: 10 | twitter_api = json.loads(f.read()) 11 | 12 | consumer_key = twitter_api['consumer_key'] 13 | consumer_secret = twitter_api['consumer_secret'] 14 | access_token = twitter_api['access_token'] 15 | access_token_secret = twitter_api['access_token_secret'] 16 | 17 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 18 | auth.set_access_token(access_token, access_token_secret) 19 | 20 | api = tweepy.API(auth) 21 | store = TweetStore() 22 | 23 | class StreamListener(tweepy.StreamListener): 24 | 25 | def on_status(self, status): 26 | 27 | if ('RT @' not in status.text): 28 | blob = TextBlob(status.text) 29 | sent = blob.sentiment 30 | polarity = sent.polarity 31 | subjectivity = sent.subjectivity 32 | 33 | tweet_item = { 34 | 'id_str': status.id_str, 35 | 'text': status.text, 36 | 'polarity': polarity, 37 | 'subjectivity': subjectivity, 38 | 'username': status.user.screen_name, 39 | 'name': status.user.name, 40 | 'profile_image_url': status.user.profile_image_url, 41 | 'received_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 42 | } 43 | 44 | store.push(tweet_item) 45 | print("Pushed to redis:", tweet_item) 46 | 47 | def on_error(self, status_code): 48 | if status_code == 420: 49 | return False 50 | 51 | stream_listener = StreamListener() 52 | stream = tweepy.Stream(auth=api.auth, listener=stream_listener) 53 | stream.filter(track=["@WarbyParker", "@Bonobos", "@Casper", "@Glossier", "@DollarShaveClub", "@Allbirds"]) 54 | -------------------------------------------------------------------------------- /5_python_class/twitter_filter.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | import datetime 3 | from textblob import TextBlob 4 | from tweet_store import TweetStore 5 | import json 6 | 7 | file_path = '../config/api.json' 8 | 9 | with open(file_path) as f: 10 | twitter_api = json.loads(f.read()) 11 | 12 | consumer_key = twitter_api['consumer_key'] 13 | consumer_secret = twitter_api['consumer_secret'] 14 | access_token = twitter_api['access_token'] 15 | access_token_secret = twitter_api['access_token_secret'] 16 | 17 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 18 | auth.set_access_token(access_token, access_token_secret) 19 | 20 | api = tweepy.API(auth) 21 | store = TweetStore() 22 | 23 | class StreamListener(tweepy.StreamListener): 24 | 25 | def on_status(self, status): 26 | 27 | if ('RT @' not in status.text): 28 | blob = TextBlob(status.text) 29 | sent = blob.sentiment 30 | polarity = sent.polarity 31 | subjectivity = sent.subjectivity 32 | 33 | tweet_item = { 34 | 'id_str': status.id_str, 35 | 'text': status.text, 36 | 'polarity': polarity, 37 | 'subjectivity': subjectivity, 38 | 'username': status.user.screen_name, 39 | 'name': status.user.name, 40 | 'profile_image_url': status.user.profile_image_url, 41 | 'received_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 42 | } 43 | 44 | store.push(tweet_item) 45 | print("Pushed to redis:", tweet_item) 46 | 47 | def on_error(self, status_code): 48 | if status_code == 420: 49 | return False 50 | 51 | stream_listener = StreamListener() 52 | stream = tweepy.Stream(auth=api.auth, listener=stream_listener) 53 | stream.filter(track=["@WarbyParker", "@Bonobos", "@Casper", "@Glossier", "@DollarShaveClub", "@Allbirds"]) 54 | -------------------------------------------------------------------------------- /3_redis/twitter_filter.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | import datetime 3 | from textblob import TextBlob 4 | from tweet_store import TweetStore 5 | import json 6 | 7 | file_path = '../config/api.json' 8 | 9 | with open(file_path) as f: 10 | twitter_api = json.loads(f.read()) 11 | 12 | consumer_key = twitter_api['consumer_key'] 13 | consumer_secret = twitter_api['consumer_secret'] 14 | access_token = twitter_api['access_token'] 15 | access_token_secret = twitter_api['access_token_secret'] 16 | 17 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 18 | auth.set_access_token(access_token, access_token_secret) 19 | 20 | api = tweepy.API(auth) 21 | store = TweetStore() 22 | 23 | class StreamListener(tweepy.StreamListener): 24 | 25 | def on_status(self, status): 26 | 27 | if ('RT @' not in status.text): 28 | blob = TextBlob(status.text) 29 | sent = blob.sentiment 30 | polarity = sent.polarity 31 | subjectivity = sent.subjectivity 32 | 33 | tweet_item = { 34 | 'id_str': status.id_str, 35 | 'text': status.text, 36 | 'polarity': polarity, 37 | 'subjectivity': subjectivity, 38 | 'username': status.user.screen_name, 39 | 'name': status.user.name, 40 | 'profile_image_url': status.user.profile_image_url, 41 | 'received_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 42 | } 43 | 44 | store.push(tweet_item) 45 | print("Pushed to redis:", tweet_item) 46 | 47 | def on_error(self, status_code): 48 | if status_code == 420: 49 | return False 50 | 51 | stream_listener = StreamListener() 52 | stream = tweepy.Stream(auth=api.auth, listener=stream_listener) 53 | stream.filter(track=["@WarbyParker", "@Bonobos", "@Casper", "@Glossier", "@DollarShaveClub", "@Allbirds", "pizza"]) 54 | -------------------------------------------------------------------------------- /6_flask/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |38 | {{ tweet.filtered_text()|safe }} 39 |
40 |37 | Just stopped by the new @WarbyParker store and got a fresh pair of spectacles! 38 |
39 |54 | Real Estate Announcement: WarbyParker leases storefront in DC Commons. 55 |
56 |71 | I fell down the stairs and broke my WarbyParker frames. NOOOOO!!!! 72 |
73 |