├── .gitignore ├── .ipynb_checkpoints └── make_compliant_with_TOS-checkpoint.ipynb ├── data.csv ├── make_compliant_with_TOS.ipynb ├── readme.md └── twitter_miner.py /.gitignore: -------------------------------------------------------------------------------- 1 | full.csv 2 | .DS_Store 3 | .ipynb_checkpoint -------------------------------------------------------------------------------- /.ipynb_checkpoints/make_compliant_with_TOS-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /make_compliant_with_TOS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 4, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "df = pd.read_csv(\"full.csv\", header=None)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 9, 28 | "metadata": { 29 | "collapsed": false 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "df[28].to_csv(\"data.csv\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 10, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "149790" 47 | ] 48 | }, 49 | "execution_count": 10, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "len(df)" 56 | ] 57 | } 58 | ], 59 | "metadata": { 60 | "anaconda-cloud": {}, 61 | "kernelspec": { 62 | "display_name": "Python [conda root]", 63 | "language": "python", 64 | "name": "conda-root-py" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": { 68 | "name": "ipython", 69 | "version": 3 70 | }, 71 | "file_extension": ".py", 72 | "mimetype": "text/x-python", 73 | "name": "python", 74 | "nbconvert_exporter": "python", 75 | "pygments_lexer": "ipython3", 76 | "version": "3.5.2" 77 | } 78 | }, 79 | "nbformat": 4, 80 | "nbformat_minor": 1 81 | } 82 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Second Presidential Debate Tweets 2 | 3 | This repo contains data on roughly 150,000 debate tweets. However, to make the data compliant with Twitter's terms of service, the public data only contains tweet IDs. A short python script to convert that list of tweet IDs into the full twitter data is coming soon. 4 | -------------------------------------------------------------------------------- /twitter_miner.py: -------------------------------------------------------------------------------- 1 | # EXAMPLE USAGE: python twitter_miner.py 'test.csv' \#hillary \#trump 2 | # This will monitor hashtags with #hillary and #trump and save tweets to test.csv 3 | # Uses a twitter app API key for generic twitter mining. 4 | # Note that if you want to mine hashtags, you have to use \ to escape the # in the command line 5 | 6 | #Import the necessary methods from tweepy library 7 | from tweepy.streaming import StreamListener 8 | from tweepy import OAuthHandler 9 | from tweepy import Stream 10 | import time 11 | import csv 12 | import sys 13 | 14 | #Variables that contains the user credentials to access Twitter API 15 | consumer_key = "" 16 | consumer_secret = "" 17 | access_token = "" 18 | access_token_secret = "" 19 | 20 | class StdOutListener(StreamListener): 21 | 22 | def __init__(self, api = None): 23 | self.api = api 24 | filename = sys.argv[1] 25 | csvFile = open(filename, 'w') 26 | 27 | def on_status(self, status): 28 | 29 | filename = sys.argv[1] 30 | csvFile = open(filename, 'a') 31 | 32 | csvWriter = csv.writer(csvFile) 33 | 34 | if not 'RT @' in status.text: 35 | try: 36 | csvWriter.writerow([status.text, 37 | status.created_at, 38 | status.geo, 39 | status.lang, 40 | status.place, 41 | status.coordinates, 42 | status.user.favourites_count, 43 | status.user.statuses_count, 44 | status.user.description, 45 | status.user.location, 46 | status.user.id, 47 | status.user.created_at, 48 | status.user.verified, 49 | status.user.following, 50 | status.user.url, 51 | status.user.listed_count, 52 | status.user.followers_count, 53 | status.user.default_profile_image, 54 | status.user.utc_offset, 55 | status.user.friends_count, 56 | status.user.default_profile, 57 | status.user.name, 58 | status.user.lang, 59 | status.user.screen_name, 60 | status.user.geo_enabled, 61 | status.user.profile_background_color, 62 | status.user.profile_image_url, 63 | status.user.time_zone, 64 | status.id, 65 | status.favorite_count, 66 | status.retweeted, 67 | status.source, 68 | status.favorited, 69 | status.retweet_count]) 70 | except Exception as e: 71 | print(e) 72 | pass 73 | 74 | csvFile.close() 75 | 76 | return 77 | 78 | def on_error(self, status_code): 79 | print('Encountered error with status code:', status_code) 80 | return # Don't kill the stream 81 | 82 | def on_delete(self, status_id, user_id): 83 | """Called when a delete notice arrives for a status""" 84 | print("Delete notice") 85 | return 86 | 87 | def on_limit(self, track): 88 | # If too many posts match our filter criteria and only a subset is sent to us 89 | print("!!! Limitation notice received") 90 | return True 91 | 92 | def on_timeout(self): 93 | print(sys.stderr, 'Timeout...') 94 | time.sleep(10) 95 | return True 96 | 97 | l = StdOutListener() 98 | auth = OAuthHandler(consumer_key, consumer_secret) 99 | auth.set_access_token(access_token, access_token_secret) 100 | stream = Stream(auth, l) 101 | 102 | #This line filter Twitter Streams to capture data by the keywords 103 | stream.filter(track=sys.argv[2:]) 104 | --------------------------------------------------------------------------------