├── .gitignore
├── .ipynb_checkpoints
    └── make_compliant_with_TOS-checkpoint.ipynb
├── data.csv
├── make_compliant_with_TOS.ipynb
├── readme.md
└── twitter_miner.py


/.gitignore:
--------------------------------------------------------------------------------
1 | full.csv
2 | .DS_Store
3 | .ipynb_checkpoint


--------------------------------------------------------------------------------
/.ipynb_checkpoints/make_compliant_with_TOS-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/make_compliant_with_TOS.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import pandas as pd"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": 4,
17 |    "metadata": {
18 |     "collapsed": true
19 |    },
20 |    "outputs": [],
21 |    "source": [
22 |     "df = pd.read_csv(\"full.csv\", header=None)"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "code",
27 |    "execution_count": 9,
28 |    "metadata": {
29 |     "collapsed": false
30 |    },
31 |    "outputs": [],
32 |    "source": [
33 |     "df[28].to_csv(\"data.csv\")"
34 |    ]
35 |   },
36 |   {
37 |    "cell_type": "code",
38 |    "execution_count": 10,
39 |    "metadata": {
40 |     "collapsed": false
41 |    },
42 |    "outputs": [
43 |     {
44 |      "data": {
45 |       "text/plain": [
46 |        "149790"
47 |       ]
48 |      },
49 |      "execution_count": 10,
50 |      "metadata": {},
51 |      "output_type": "execute_result"
52 |     }
53 |    ],
54 |    "source": [
55 |     "len(df)"
56 |    ]
57 |   }
58 |  ],
59 |  "metadata": {
60 |   "anaconda-cloud": {},
61 |   "kernelspec": {
62 |    "display_name": "Python [conda root]",
63 |    "language": "python",
64 |    "name": "conda-root-py"
65 |   },
66 |   "language_info": {
67 |    "codemirror_mode": {
68 |     "name": "ipython",
69 |     "version": 3
70 |    },
71 |    "file_extension": ".py",
72 |    "mimetype": "text/x-python",
73 |    "name": "python",
74 |    "nbconvert_exporter": "python",
75 |    "pygments_lexer": "ipython3",
76 |    "version": "3.5.2"
77 |   }
78 |  },
79 |  "nbformat": 4,
80 |  "nbformat_minor": 1
81 | }
82 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Second Presidential Debate Tweets
2 | 
3 | This repo contains data on roughly 150,000 debate tweets. However, to make the data compliant with Twitter's terms of service, the public data only contains tweet IDs. A short python script to convert that list of tweet IDs into the full twitter data is coming soon.
4 | 


--------------------------------------------------------------------------------
/twitter_miner.py:
--------------------------------------------------------------------------------
  1 | # EXAMPLE USAGE: python twitter_miner.py 'test.csv' \#hillary \#trump
  2 | # This will monitor hashtags with #hillary and #trump and save tweets to test.csv
  3 | # Uses a twitter app API key for generic twitter mining.
  4 | # Note that if you want to mine hashtags, you have to use \ to escape the # in the command line
  5 | 
  6 | #Import the necessary methods from tweepy library
  7 | from tweepy.streaming import StreamListener
  8 | from tweepy import OAuthHandler
  9 | from tweepy import Stream
 10 | import time
 11 | import csv
 12 | import sys
 13 | 
 14 | #Variables that contains the user credentials to access Twitter API
 15 | consumer_key = ""
 16 | consumer_secret = ""
 17 | access_token = ""
 18 | access_token_secret = ""
 19 | 
 20 | class StdOutListener(StreamListener):
 21 | 
 22 |     def __init__(self, api = None):
 23 |         self.api = api
 24 |         filename = sys.argv[1]
 25 |         csvFile = open(filename, 'w')
 26 | 
 27 |     def on_status(self, status):
 28 | 
 29 |         filename = sys.argv[1]
 30 |         csvFile = open(filename, 'a')
 31 | 
 32 |         csvWriter = csv.writer(csvFile)
 33 | 
 34 |         if not 'RT @' in status.text:
 35 |             try:
 36 |                 csvWriter.writerow([status.text,
 37 |                                     status.created_at,
 38 |                                     status.geo,
 39 |                                     status.lang,
 40 |                                     status.place,
 41 |                                     status.coordinates,
 42 |                                     status.user.favourites_count,
 43 |                                     status.user.statuses_count,
 44 |                                     status.user.description,
 45 |                                     status.user.location,
 46 |                                     status.user.id,
 47 |                                     status.user.created_at,
 48 |                                     status.user.verified,
 49 |                                     status.user.following,
 50 |                                     status.user.url,
 51 |                                     status.user.listed_count,
 52 |                                     status.user.followers_count,
 53 |                                     status.user.default_profile_image,
 54 |                                     status.user.utc_offset,
 55 |                                     status.user.friends_count,
 56 |                                     status.user.default_profile,
 57 |                                     status.user.name,
 58 |                                     status.user.lang,
 59 |                                     status.user.screen_name,
 60 |                                     status.user.geo_enabled,
 61 |                                     status.user.profile_background_color,
 62 |                                     status.user.profile_image_url,
 63 |                                     status.user.time_zone,
 64 |                                     status.id,
 65 |                                     status.favorite_count,
 66 |                                     status.retweeted,
 67 |                                     status.source,
 68 |                                     status.favorited,
 69 |                                     status.retweet_count])
 70 |             except Exception as e:
 71 |                 print(e)
 72 |                 pass
 73 | 
 74 |         csvFile.close()
 75 | 
 76 |         return
 77 | 
 78 |     def on_error(self, status_code):
 79 |         print('Encountered error with status code:', status_code)
 80 |         return # Don't kill the stream
 81 | 
 82 |     def on_delete(self, status_id, user_id):
 83 |         """Called when a delete notice arrives for a status"""
 84 |         print("Delete notice")
 85 |         return
 86 | 
 87 |     def on_limit(self, track):
 88 |         # If too many posts match our filter criteria and only a subset is sent to us
 89 |         print("!!! Limitation notice received")
 90 |         return True
 91 | 
 92 |     def on_timeout(self):
 93 |         print(sys.stderr, 'Timeout...')
 94 |         time.sleep(10)
 95 |         return True
 96 | 
 97 | l = StdOutListener()
 98 | auth = OAuthHandler(consumer_key, consumer_secret)
 99 | auth.set_access_token(access_token, access_token_secret)
100 | stream = Stream(auth, l)
101 | 
102 | #This line filter Twitter Streams to capture data by the keywords
103 | stream.filter(track=sys.argv[2:])
104 | 


--------------------------------------------------------------------------------