├── .gitignore ├── Docker └── Using_postgresql │ ├── Dockerfile │ ├── README.md │ ├── docker-compose.yml │ └── sample_code.py ├── External Resources └── learning-resources.md ├── Facebook └── Facebook.ipynb ├── R └── cdx_tutorial.Rmd ├── README.md ├── Twitter ├── Basic_Twiter_Analysis.ipynb ├── Building_a_Graph_Twitter.ipynb ├── Clustering_twitter.ipynb ├── Intro_Collecting_Tweets.ipynb ├── Python_and_maps.ipynb ├── StreamingTweetsFromTwitter.ipynb └── Twitter_Gettingpast_32K_Limit.ipynb └── aws └── AWS_Boto3_s3_intro.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | .venv/ 86 | venv/ 87 | ENV/ 88 | 89 | # Spyder project settings 90 | .spyderproject 91 | 92 | # Rope project settings 93 | .ropeproject -------------------------------------------------------------------------------- /Docker/Using_postgresql/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alejandrox1/ubuntu_miniconda 2 | 3 | RUN apt-get update -y 4 | 5 | RUN pip install sqlalchemy 6 | RUN pip install psycopg2 7 | RUN pip install kafka==1.3.3 8 | RUN pip install ipython 9 | 10 | WORKDIR /app 11 | 12 | # default entrypoint "/bin/bash -c" 13 | -------------------------------------------------------------------------------- /Docker/Using_postgresql/README.md: -------------------------------------------------------------------------------- 1 | # Using Postgresql with Docker :-) 2 | 3 | Hello everyone, this will be a nice and quick hands on tutorial on basic SQLian 4 | operations and the wonderfullness of Docker. 5 | 6 | The only thing you need to get this thing going (and anything else) is to have 7 | docker installed in your machine -> [installing Docker](https://docs.docker.com/engine/installation/). 8 | 9 | # Getting started 10 | [Let's get it started](https://www.youtube.com/watch?v=IKqV7DB8Iwg#t=08s).. 11 | 12 | ## Getting the code 13 | Open up a terminal and... 14 | ``` 15 | git clone https://github.com/Data4Democracy/tutorials 16 | cd tutorials/Docker/Using_postgresql 17 | ``` 18 | 19 | The above commands will place you in the postgres + docker world we are going 20 | to need. 21 | 22 | ## Gettting a PostgreSQL DB running 23 | Now, do 24 | ``` 25 | docker-compose up --build -d && docker-compose ps 26 | ``` 27 | 28 | you will see we have two containers running: `postgresql` + `app`. 29 | [postgresql](https://hub.docker.com/_/postgres/) is the official docker image 30 | and app is the thing that is being defined in our `Dockerfile`. 31 | 32 | 33 | ## Connecting to Postgres 34 | In your terminal type: 35 | ``` 36 | docker exec -it app bash 37 | ``` 38 | 39 | This will take you to a bash shell [INSIDE your docker container!](https://tinyurl.com/yb8aqrkp) 40 | 41 | Finally type 42 | ``` 43 | ipython 44 | ``` 45 | 46 | ### Using sqlalchemy 47 | Here is where you can deviate and try this awesome [tutorial](https://www.compose.com/articles/using-json-extensions-in-postgresql-from-python-2/) 48 | or go along with this other example (go through both!): 49 | 50 | ``` 51 | Python 3.6.1 |Continuum Analytics, Inc.| (default, May 11 2017, 13:09:58) 52 | Type 'copyright', 'credits' or 'license' for more information 53 | IPython 6.1.0 -- An enhanced Interactive Python. Type '?' for help. 54 | 55 | In [1]: import json 56 | ...: import sqlalchemy 57 | ...: from sqlalchemy.sql import select 58 | ...: from sqlalchemy import Column, Text 59 | ...: 60 | ...: 61 | ...: connection_string = 'postgresql://user:pass@postgresql/mydatabase' 62 | ...: 63 | ...: db = sqlalchemy.create_engine(connection_string) 64 | ...: engine = db.connect() 65 | ...: meta = sqlalchemy.MetaData(engine) 66 | ...: meta.reflect(bind=engine) 67 | ...: 68 | 69 | In [2]: # list all tables 70 | ...: # there shouldn't be any... yet! 71 | ...: meta.tables 72 | Out[2]: immutabledict({}) 73 | 74 | In [3]: # create table 75 | ...: table = sqlalchemy.Table("twitterusers", 76 | ...: meta, 77 | ...: Column('screen_name', Text, primary_key=True), 78 | ...: Column('last_scraped', Text), 79 | ...: extend_existing=True) 80 | ...: table.create(engine) 81 | ...: 82 | 83 | In [4]: # some sample data to be stored 84 | ...: entries = [ 85 | ...: {'screen_name': 'katie', 'last_scraped': 'today'}, 86 | ...: {'screen_name': 'hunter', 'last_scraped': 'yesterday'}, 87 | ...: {'screen_name': 'felix', 'last_scraped': 'last week'}, 88 | ...: {'screen_name': 'audie', 'last_scraped': 'last year'}, 89 | ...: ] 90 | 91 | In [5]: # Insert data 92 | ...: record = sqlalchemy.table("twitterusers", 93 | ...: Column('screen_name', Text), 94 | ...: Column('last_scraped', Text)) 95 | ...: for entry in entries: 96 | ...: statement = record.insert().values( 97 | ...: screen_name = entry['screen_name'], 98 | ...: last_scraped = entry['last_scraped'], 99 | ...: ) 100 | ...: engine.execute(statement) 101 | ...: 102 | 103 | In [6]: # Look up data 104 | ...: table = meta.tables['twitterusers'] 105 | ...: res = engine.execute(select([table.c.screen_name, table.c.last_scraped])) 106 | ...: rows = res.fetchall() 107 | ...: 108 | 109 | In [7]: rows 110 | Out[7]: 111 | [('katie', 'today'), 112 | ('hunter', 'yesterday'), 113 | ('felix', 'last week'), 114 | ('audie', 'last year')] 115 | 116 | In [8]: # Updating entries 117 | ...: t = table.update().values(last_scraped='this century').where(table.c.screen_name=='hunter') 118 | ...: engine.execute(t) 119 | ...: 120 | 121 | Out[8]: 122 | 123 | In [9]: # Look up data 124 | ...: table = meta.tables['twitterusers'] 125 | ...: res = engine.execute(select([table.c.screen_name, table.c.last_scraped])) 126 | ...: rows = res.fetchall() 127 | ...: rows 128 | ...: 129 | 130 | Out[9]: 131 | [('katie', 'today'), 132 | ('felix', 'last week'), 133 | ('audie', 'last year'), 134 | ('hunter', 'this century')] 135 | 136 | ``` 137 | -------------------------------------------------------------------------------- /Docker/Using_postgresql/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | networks: 3 | postgresnet: 4 | 5 | services: 6 | postgresql: 7 | image: postgres:9.6 8 | hostname: postgresql 9 | container_name: postgresql 10 | environment: 11 | - POSTGRES_USER=user 12 | - POSTGRES_PASSWORD=pass 13 | - POSTGRES_DB=mydatabase 14 | networks: 15 | - postgresnet 16 | 17 | app: 18 | build: . 19 | hostname: app 20 | container_name: app 21 | command: sleep 1h 22 | volumes: 23 | - .:/app 24 | depends_on: 25 | - postgresql 26 | networks: 27 | - postgresnet 28 | -------------------------------------------------------------------------------- /Docker/Using_postgresql/sample_code.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sqlalchemy 3 | from sqlalchemy.sql import select 4 | from sqlalchemy import Column, Text 5 | 6 | 7 | connection_string = 'postgresql://user:pass@postgresql/mydatabase' 8 | 9 | db = sqlalchemy.create_engine(connection_string) 10 | engine = db.connect() 11 | meta = sqlalchemy.MetaData(engine) 12 | meta.reflect(bind=engine) 13 | 14 | # list all tables 15 | # there shouldn't be any... yet! 16 | meta.tables 17 | 18 | 19 | # create table 20 | table = sqlalchemy.Table("twitterusers", 21 | meta, 22 | Column('screen_name', Text, primary_key=True), 23 | Column('last_scraped', Text), 24 | extend_existing=True) 25 | table.create(engine) 26 | 27 | # some sample data to be stored 28 | entries = [ 29 | {'screen_name': 'katie', 'last_scraped': 'today'}, 30 | {'screen_name': 'hunter', 'last_scraped': 'yesterday'}, 31 | {'screen_name': 'felix', 'last_scraped': 'last week'}, 32 | {'screen_name': 'audie', 'last_scraped': 'last year'}, 33 | ] 34 | 35 | # Insert data 36 | record = sqlalchemy.table("twitterusers", 37 | Column('screen_name', Text), 38 | Column('last_scraped', Text)) 39 | for entry in entries: 40 | statement = record.insert().values( 41 | screen_name = entry['screen_name'], 42 | last_scraped = entry['last_scraped'], 43 | ) 44 | engine.execute(statement) 45 | 46 | # Updating entries 47 | t = table.update().values(last_scraped='guajiro').where(table.c.screen_name=='hunter') 48 | engine.execute(t) 49 | 50 | #find_user = table.select().where(table.c.screen_name=='hunter') 51 | 52 | 53 | # Look up data 54 | table = meta.tables['twitterusers'] 55 | res = engine.execute(select([table.c.screen_name, table.c.last_scraped])) 56 | rows = res.fetchall() 57 | -------------------------------------------------------------------------------- /External Resources/learning-resources.md: -------------------------------------------------------------------------------- 1 | # Learning Resources 2 | 3 | A simple reference of resources that members of D4D have found useful in learning some of the languages, platforms, libraries, and methods applicable to D4D projects. 4 | 5 | ## Python 6 | ### Online 7 | * [CheckiO](https://checkio.org/) - online game for Python coders 8 | * [Enki](https://www.enki.com) - mobile app with 5 minute "workouts" for multiple programming topics 9 | * [Exercism](http://exercism.io/) - Download and solve practice problems in over 30 different languages 10 | * [PyData YouTube](https://www.youtube.com/user/PyDataTV) - All pydata conference talks are recorded so there are many great talks. Each conference has a playlist. 11 | * [DataCamp](https://www.datacamp.com/) - Tutorials, videos & blogs. It is a subscription site but there is also lots of great free content. Focus on Data science in R & Python. 12 | * [Open Source Data Science Masters](http://datasciencemasters.org/) - Clare Corthell (who is also a member of D4d!) put together an amazing guide help you plan a circulum to learn data science with links to many free / cheap resoruces. 13 | 14 | ### Books 15 | * [Practical Programming: An Introduction to Computer Science Using Python 3 (Pragmatic Programmers)](https://www.amazon.com/Practical-Programming-Introduction-Pragmatic-Programmers/dp/1937785459) 16 | * [Data Structures and Algorithms in Python](https://www.amazon.com/Structures-Algorithms-Python-Michael-Goodrich/dp/1118290275) 17 | * [Python Data Science Handbook](https://github.com/jakevdp/PythonDataScienceHandbook) - Entire book is available as Jupyter notebooks for free but please consider supporting the author if you can. 18 | 19 | 20 | ## Git 21 | * Enki - https://www.enki.com - mobile app with 5 minute "workouts" for multiple programming topics 22 | * On Demand Training - https://services.github.com/on-demand/ - Github provided walkthrough 23 | * On Demand Path - https://services.github.com/on-demand/path/ - Syllabus with Videos for walkthrough (for skipping around) 24 | * [Udacity Git/GitHub course](https://www.udacity.com/course/how-to-use-git-and-github--ud775) 25 | 26 | ## Podcasts 27 | * [Partially Derivative](http://partiallyderivative.com/) 28 | * [Talk Python to Me](https://talkpython.fm/) 29 | * [Becoming a data scientist](http://www.becomingadatascientist.com/) 30 | * [Data Skeptic](https://www.dataskeptic.com/) 31 | * [Code Newbie](http://www.codenewbie.org/) 32 | -------------------------------------------------------------------------------- /R/cdx_tutorial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Analyzing website history with R and the Wayback Machine API" 3 | output: 4 | html_document: default 5 | --- 6 | 7 | One way to track a disinformation campaign is to get a space-station-level view of the change history of a number of websites involved in the campaign. When sites appear, rise to prominence, or start adding and changing high volumes of content at key historical moments, it tells us we should look more closely at those sites at those times. It can also help direct our attention to possible associations between sites, including common ownership, common ideology, common source material, or bot activity. 8 | 9 | The Internet Archive Wayback Machine's API gives us a great tool for investigating those changes. It doesn't catch *every* change on every site. But when looking at weekly/monthly time resolutions for frequently visited (and thus frequently scraped) sites, we can get a good general idea of where the major activity is happening. 10 | 11 | For an example of the kind of analysis you can do with this data, see my blog post, ['(Mis)information and the Trump administration'](http://pushpullfork.com/2017/02/misinformation-trump-administration/). 12 | 13 | In what follows, I'll walk through how to download, clean, and analyze this data using R and TidyVerse data analysis tools. 14 | 15 | ## Download and clean data 16 | 17 | First we need to load a few libraries. 18 | 19 | ```{r} 20 | library(jsonlite) 21 | library(tidyverse) 22 | library(lubridate) 23 | library(magrittr) 24 | ``` 25 | 26 | Downloading the data from the Wayback Machine API is super-simple. To download the change log for TheRebel.media (a relatively small digest, so good for a tutorial), use the following single line. 27 | 28 | ```{r} 29 | therebel <- fromJSON('http://web.archive.org/cdx/search/cdx?url=therebel.media&matchType=domain&output=json&collapse=digest') 30 | ``` 31 | 32 | This line queries the Wayback Machine CDX Server API for snapshot records pertaining to ```therebel.media```. the ```matchType=domain``` parameter asks it for everything on the domain (including subdomains, which is good for sites with mobile versions on their own subdomains). ```output=json``` surprisingly enough outputs the data in json format (though pretty flat, and thus very easy to parse with tidy data tools). ```collapse=digest``` will collapse all adjacent versions of a page with the same content into one entry. This is helpful when a new snapshot is created, but not every page changes. Only the page *changes* found by the crawler will be included in the result. 33 | 34 | For a complete list of query parameters and options, see their [GitHub documentation](https://github.com/internetarchive/wayback/tree/master/wayback-cdx-server). 35 | 36 | To get the downloaded JSON into a tibble (tidy data frame), use the following. 37 | 38 | ```{r} 39 | sites <- therebel[-1,] %>% 40 | as_tibble() %>% 41 | select(urlkey = 1, timestamp = 2, original = 3, mimetype = 4, statuscode = 5, digest = 6, length = 7) %>% 42 | filter(statuscode == '200') %>% 43 | mutate(date = ymd(substr(timestamp, 1, 8))) 44 | ``` 45 | 46 | This will eliminate the first row (containing the header, but it isn't parsed as such by fromJSON), name the columns appropriately, filter out redirects and broken links, and make a lubridate-friendly date stamp, making it much easier to analyze changes over time. 47 | 48 | Let's say we want to add a couple other sites to the analysis. We simply call them the same way... 49 | 50 | ```{r} 51 | rickwells <- fromJSON('http://web.archive.org/cdx/search/cdx?url=rickwells.us&matchType=domain&output=json&collapse=digest') 52 | truthfeed <- fromJSON('http://web.archive.org/cdx/search/cdx?url=truthfeed.com&matchType=domain&output=json&collapse=digest') 53 | breitbart <- fromJSON('http://web.archive.org/cdx/search/cdx?url=breitbart.com&matchType=domain&output=json&collapse=digest') 54 | ``` 55 | 56 | Then join them together. 57 | 58 | ```{r} 59 | sites <- truthfeed[-1,] %>% 60 | as_tibble() %>% 61 | select(urlkey = 1, timestamp = 2, original = 3, mimetype = 4, statuscode = 5, digest = 6, length = 7) %>% 62 | filter(statuscode == '200') %>% 63 | mutate(site = 'truthfeed.com') %>% 64 | full_join(breitbart[-1,] %>% 65 | as_tibble() %>% 66 | select(urlkey = 1, timestamp = 2, original = 3, mimetype = 4, statuscode = 5, digest = 6, length = 7) %>% 67 | filter(statuscode == '200') %>% 68 | mutate(site = 'breitbart.com')) %>% 69 | full_join(rickwells[-1,] %>% 70 | as_tibble() %>% 71 | select(urlkey = 1, timestamp = 2, original = 3, mimetype = 4, statuscode = 5, digest = 6, length = 7) %>% 72 | filter(statuscode == '200') %>% 73 | mutate(site = 'rickwells.com')) %>% 74 | full_join(therebel[-1,] %>% 75 | as_tibble() %>% 76 | select(urlkey = 1, timestamp = 2, original = 3, mimetype = 4, statuscode = 5, digest = 6, length = 7) %>% 77 | filter(statuscode == '200') %>% 78 | mutate(site = 'therebel.media')) %>% 79 | mutate(date = ymd(substr(timestamp, 1, 8))) 80 | ``` 81 | 82 | Note that we can save the date processing for last and do it all at once. Also note the use of ```mutate()``` to add a ```site``` field to the data frame, identifying the source of each data set. (That's not strictly necessary, given the data CDX gives us, but it's cleaner and easier to work with.) 83 | 84 | Now we can do some interesting visualizations. 85 | 86 | ## Visualizations 87 | 88 | Let's look at the annual additions and changes to these sites over the course of their history. (Note that Wayback Machine tracks additions and changes, but does not provide ready information about page deletions.) 89 | 90 | ```{r} 91 | sites %>% 92 | mutate(time_floor = floor_date(date, unit = "1 year")) %>% 93 | group_by(time_floor, site) %>% 94 | summarize(count = n()) %>% 95 | ggplot(aes(time_floor, count, color = site)) + 96 | geom_line() + 97 | xlab('Date') + 98 | ylab('Pages added or changed') + 99 | ggtitle(paste('Page additions and changes found by the Wayback Machine on\nright-wing sites, by year', sep = '')) 100 | ``` 101 | 102 | We can also do this by month by changing the ```time_floor``` unit. 103 | 104 | ```{r} 105 | sites %>% 106 | mutate(time_floor = floor_date(date, unit = "1 month")) %>% 107 | group_by(time_floor, site) %>% 108 | summarize(count = n()) %>% 109 | ggplot(aes(time_floor, count, color = site)) + 110 | geom_line() + 111 | xlab('Date') + 112 | ylab('Pages added or changed') + 113 | ggtitle(paste('Page additions and changes found by the Wayback Machine on\nright-wing sites, by month', sep = '')) 114 | ``` 115 | 116 | And we can hone in on a date range by setting a filter. 117 | 118 | ```{r} 119 | sites %>% 120 | filter(date >= '2015-01-01') %>% 121 | mutate(time_floor = floor_date(date, unit = "1 month")) %>% 122 | group_by(time_floor, site) %>% 123 | summarize(count = n()) %>% 124 | ggplot(aes(time_floor, count, color = site)) + 125 | geom_line() + 126 | xlab('Date') + 127 | ylab('Pages added or changed') + 128 | ggtitle(paste('Page additions and changes found by the Wayback Machine on\nright-wing sites, by month', sep = '')) 129 | ``` 130 | 131 | If instead of comparing sites, we want to see aggregate totals, we can change ```geom_line()``` to ```geom_col()``` and ```color``` to ```fill```. 132 | 133 | ```{r} 134 | sites %>% 135 | filter(date >= '2015-01-01') %>% 136 | mutate(time_floor = floor_date(date, unit = "1 month")) %>% 137 | group_by(time_floor, site) %>% 138 | summarize(count = n()) %>% 139 | ggplot(aes(time_floor, count, fill = site)) + 140 | geom_col() + 141 | xlab('Date') + 142 | ylab('Pages added or changed') + 143 | ggtitle(paste('Page additions and changes found by the Wayback Machine on\nright-wing sites, by month', sep = '')) 144 | ``` 145 | 146 | Hmm, looking at data from these four sites, we can see the rise of activity (identified by the Wayback Machine scraper) during the 2016 primary season, the appearance of truthFeed around the time Trump clinched the GOP nomination, a fall off in activity on rickwells.com and therebel.media after the RNC, and the most activity from Breitbart and Truthfeed after the election. Curious... 147 | 148 | That's just a few things you can do with this data. For other ideas, see [my blog post](http://pushpullfork.com/2017/02/misinformation-trump-administration/). Have fun! -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | 3 | **Slack:** [#tutorials](https://datafordemocracy.slack.com/messages/tutorials/) 4 | 5 | **Project Description:** A place for tutorials relevant to D4D projects. 6 | 7 | **Project Leads:** 8 | * [@alarcj](https://datafordemocracy.slack.com/messages/@alarcj/) 9 | * [@grichardson](https://datafordemocracy.slack.com/messages/@grichardson/) 10 | 11 | # List of Tutorials 12 | ## External Resources 13 | * [External Learning Resources](https://github.com/Data4Democracy/tutorials/blob/master/External%20Resources/learning-resources.md) 14 | Reference of resources that members of D4D have found useful in learning some of the languages, platforms, libraries, and methods applicable to D4D projects. 15 | 16 | ## AWS 17 | * [AWS S3 using boto3](https://github.com/Data4Democracy/tutorials/blob/master/aws/AWS_Boto3_s3_intro.ipynb) 18 | How to interact with S3 bucket from python using boto3 library. 19 | 20 | - helpful tutorial for setting up first IAM user and adding permissions before beginning tutorial above: (https://linuxacademy.com/howtoguides/posts/show/topic/14209-automating-aws-with-python-and-boto3) 21 | 22 | ## Twitter 23 | * [Intro Collecting Tweets](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Intro_Collecting_Tweets.ipynb) 24 | Getting started using the Twitter API for downloading Tweets. 25 | * [Twitter Getting Past the 32K Limit](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Twitter_Gettingpast_32K_Limit.ipynb) 26 | How to obtain all the Tweets from a given user. 27 | * [Streaming Tweets From Twitter](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/StreamingTweetsFromTwitter.ipynb) 28 | Great example on how to Stream (listen to live events) Tweets. 29 | * [Basic Twitter Analysis](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Basic_Twiter_Analysis.ipynb) 30 | Simple word frequency analysis and tokenization. 31 | * [Interactive Maps with Python's Folium](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Python_and_maps.ipynb) 32 | Interactive Maps of Tweets (excuse the sample size for this tutorial). 33 | * [Clustering Twitter](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Clustering_twitter.ipynb) 34 | Clustering a user's followers by using KMeans. 35 | * [Building a Graph with Twitter](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Building_a_Graph_Twitter.ipynb) 36 | Short tutorial on how to build a Graph of Twitter friends and followers using Networkx. 37 | 38 | ## Tutorials by Project 39 | * [Assemble](https://github.com/Data4Democracy/assemble) 40 | * [Twitter](https://github.com/Data4Democracy/tutorials/tree/master/Twitter) All of it! 41 | * [Internal Displacement](https://github.com/Data4Democracy/internal-displacement) 42 | * [Interactive Maps with Python's Folium](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Python_and_maps.ipynb) 43 | * [USA Dashboard](https://github.com/Data4Democracy/usa-dashboard) 44 | * [Interactive Maps with Python's Folium](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Python_and_maps.ipynb) 45 | * [Immigration Connect](https://github.com/Data4Democracy/immigration-connect) 46 | * [Interactive Maps with Python's Folium](https://github.com/Data4Democracy/tutorials/blob/master/Twitter/Python_and_maps.ipynb) 47 | 48 | ## git and GitHub 49 | * If you're new to using git and GitHub, [download git](https://git-scm.com) and create a GitHub account, then head over to our [GitHub playground repo](https://github.com/Data4Democracy/github-playground). Follow the instructions in the `README` to learn how it's done! 50 | 51 | # Looking for Something? 52 | * If you have some cool ideas for tutorial or want to see a tutorial that is not yet present please feel free to add it to our [Wish List](https://docs.google.com/spreadsheets/d/1o_821rVkR-8yz_dMBEN6Srl7tgXzrw-K8Nsqk-xkAmU/edit#gid=0) 53 | 54 | # Contributing 55 | * If you want to contribute your own tutorials or have any comments please open up an [issue](https://github.com/Data4Democracy/tutorials/issues). 56 | This way we can avoid duplicates. 57 | 58 | * If you you happen to have a cool idea for a tutorial or want to request a tutorial on a given subject/tool please feel free to open up an [issue](https://github.com/Data4Democracy/tutorials/issues) requesting it. 59 | -------------------------------------------------------------------------------- /Twitter/Basic_Twiter_Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Twiter Analysis\n", 8 | "\n", 9 | "* For more information look at [Intro_Collecting_tweets](https://github.com/Data4Democracy/assemble/blob/master/tutorials/Intro_Collecting_Tweets.ipynb)\n", 10 | "\n", 11 | "* This notebook is inspired from [Marco Bonzanini](https://marcobonzanini.com/)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 15, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import sys\n", 23 | "import time\n", 24 | "import json\n", 25 | "from collections import Counter\n", 26 | "from collections import defaultdict" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Frequency Analysis" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 30, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "def get_hashtags(tweet):\n", 45 | " \"\"\"Get all hastags from a Tweet.\n", 46 | " \n", 47 | " Instead of using tweet['entities'], for example,\n", 48 | " we use get() which will not raise a KeyError if\n", 49 | " said field is not present.\n", 50 | " \n", 51 | " Return: list(hashtags)\n", 52 | " \"\"\"\n", 53 | " entities = tweet.get('entities', {})\n", 54 | " hastags = entities.get('hashtags', [])\n", 55 | " return [hashtag['text'].lower() for hashtag in hastags]\n", 56 | "\n", 57 | "def get_mentions(tweet):\n", 58 | " \"\"\"Get all user mentions from a Tweet.\n", 59 | " \n", 60 | " Return: list(mentions)\n", 61 | " \"\"\"\n", 62 | " entities = tweet.get('entities', {})\n", 63 | " mentions = entities.get('user_mentions', [])\n", 64 | " return [mention['screen_name'] for mention in mentions]\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 34, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# This was obtained using the procedure showed in `intro_collecting_tweets` notebook\n", 76 | "\n", 77 | "fname = 'usr_timeline_kdnuggets.jsonl'" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 31, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "Out of 3200 tweets total, 0.950 % of the tweets had at least 1 hashtag.\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "with open(fname, 'r') as f:\n", 97 | " hashtagCount = defaultdict(int)\n", 98 | " for line in f:\n", 99 | " tweet = json.loads(line)\n", 100 | " tweetHashtags = get_hashtags(tweet)\n", 101 | " numHashtags = len(tweetHashtags)\n", 102 | " \n", 103 | " hashtagCount[numHashtags] += 1\n", 104 | " \n", 105 | " tweets_Hashtags = sum([count for numTags, count in hashtagCount.items() if numTags>0])\n", 106 | " tweets_noHashtags = hashtagCount[0]\n", 107 | " tweetsTotal = tweets_Hashtags + tweets_noHashtags\n", 108 | " \n", 109 | " print('Out of {0} tweets total, {1:.3f} % of the tweets had at least 1 hashtag.'.format(\n", 110 | " tweetsTotal, tweets_Hashtags/tweetsTotal))" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 32, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "Out of 3200 tweets total, 0.222 % of the tweets had at least 1 mention.\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "with open(fname, 'r') as f:\n", 130 | " mentionCount = defaultdict(int)\n", 131 | " for line in f:\n", 132 | " tweet = json.loads(line)\n", 133 | " tweetmentions = get_mentions(tweet)\n", 134 | " nummentions = len(tweetmentions)\n", 135 | " \n", 136 | " mentionCount[nummentions] += 1\n", 137 | " \n", 138 | " tweets_Mentions = sum([count for numMentions, count in mentionCount.items() if numMentions>0])\n", 139 | " tweets_noMentions = mentionCount[0]\n", 140 | " tweetsTotal = tweets_Mentions + tweets_noMentions\n", 141 | " \n", 142 | " print('Out of {0} tweets total, {1:.3f} % of the tweets had at least 1 mention.'.format(\n", 143 | " tweetsTotal, tweets_Mentions/tweetsTotal))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## Most Common Mentions and Hashtags" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 14, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | " 1242 --- kdn \n", 165 | " 842 --- datascience \n", 166 | " 696 --- machinelearning \n", 167 | " 445 --- icymi \n", 168 | " 408 --- deeplearning \n", 169 | " 368 --- bigdata \n", 170 | " 207 --- analytics \n", 171 | " 187 --- ai \n", 172 | " 165 --- python \n", 173 | " 136 --- datascientist \n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "with open(fname, 'r') as f:\n", 179 | " hashtags = Counter()\n", 180 | " for line in f:\n", 181 | " tweet = json.loads(line)\n", 182 | " tweetHashtags = get_hashtags(tweet)\n", 183 | " hashtags.update(tweetHashtags)\n", 184 | "\n", 185 | "for hashtag, count in hashtags.most_common(10):\n", 186 | " print('{0:5} --- {1:20}'.format(count, hashtag))" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 33, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | " 39 --- kdnuggets \n", 201 | " 28 --- KDnuggetsJobs \n", 202 | " 21 --- mattmayo13 \n", 203 | " 17 --- odsc \n", 204 | " 14 --- OReillyMedia \n", 205 | " 13 --- DJ44 \n", 206 | " 12 --- bigdataconf \n", 207 | " 12 --- Microsoft \n", 208 | " 11 --- DeepMindAI \n", 209 | " 11 --- jameskobielus \n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "with open(fname, 'r') as f:\n", 215 | " mentions = Counter()\n", 216 | " for line in f:\n", 217 | " tweet = json.loads(line)\n", 218 | " tweetmentions = get_mentions(tweet)\n", 219 | " mentions.update(tweetmentions)\n", 220 | "\n", 221 | "for mention, count in mentions.most_common(10):\n", 222 | " print('{0:5} --- {1:20}'.format(count, mention))" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "# Text Analysis" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 37, 235 | "metadata": { 236 | "collapsed": false 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "import string\n", 241 | "from nltk.tokenize import TweetTokenizer\n", 242 | "from nltk.corpus import stopwords" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 53, 248 | "metadata": { 249 | "collapsed": false 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "def process(text, tokenizer=TweetTokenizer(), stopwords=[]):\n", 254 | " \"\"\"Process the text of a tweet:\n", 255 | " -Lowercase\n", 256 | " -Tokenize\n", 257 | " -Stopword removal\n", 258 | " -Digits removal\n", 259 | " \n", 260 | " Return: list(strings)\n", 261 | " \"\"\"\n", 262 | " text = text.lower()\n", 263 | " tokens = tokenizer.tokenize(text)\n", 264 | " tokens = normalize_contractions(tokens)\n", 265 | " return [token for token in tokens if token not in stopwords and not token.isdigit()]\n", 266 | "\n", 267 | "def normalize_contractions(tokens):\n", 268 | " \"\"\"Normalize english contractions.\n", 269 | " \n", 270 | " Return: generator\n", 271 | " \"\"\"\n", 272 | " token_map = {\n", 273 | " \"i'm\" : \"i am\",\n", 274 | " \"you're\" : \"you are\",\n", 275 | " \"it's\" : \"it is\",\n", 276 | " \"we'll\" : \"we will\",\n", 277 | " }\n", 278 | " for token in tokens:\n", 279 | " if token in token_map.keys():\n", 280 | " for item in token_map[token].split():\n", 281 | " yield item\n", 282 | " else:\n", 283 | " yield token" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 54, 289 | "metadata": { 290 | "collapsed": true 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "fname = 'usr_timeline_kdnuggets.jsonl'" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 55, 300 | "metadata": { 301 | "collapsed": false 302 | }, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | " 1242 --- #kdn \n", 309 | " 842 --- #datascience \n", 310 | " 696 --- #machinelearning \n", 311 | " 445 --- #icymi \n", 312 | " 408 --- #deeplearning \n", 313 | " 368 --- #bigdata \n", 314 | " 339 --- top \n", 315 | " 274 --- data \n", 316 | " 207 --- #analytics \n", 317 | " 187 --- #ai \n", 318 | " 165 --- #python \n", 319 | " 136 --- #datascientist \n", 320 | " 135 --- kdnuggets \n", 321 | " 124 --- #datamining \n", 322 | " 120 --- #neuralnetworks \n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "tweet_tokenizer = TweetTokenizer()\n", 328 | "\n", 329 | "punct = list(string.punctuation)\n", 330 | "stopword_list = stopwords.words('english') + punct + ['rt', 'via', '...', '…']\n", 331 | "\n", 332 | "twit = Counter()\n", 333 | "with open(fname, 'r') as f:\n", 334 | " for line in f:\n", 335 | " tweet = json.loads(line)\n", 336 | " tokens = process(text=tweet['text'],\n", 337 | " tokenizer=tweet_tokenizer,\n", 338 | " stopwords=stopword_list)\n", 339 | " twit.update(tokens)\n", 340 | " \n", 341 | "for token, count in twit.most_common(15):\n", 342 | " print('{0:5} --- {1:20}'.format(count, token))" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": { 349 | "collapsed": true 350 | }, 351 | "outputs": [], 352 | "source": [] 353 | } 354 | ], 355 | "metadata": { 356 | "anaconda-cloud": {}, 357 | "kernelspec": { 358 | "display_name": "Python [default]", 359 | "language": "python", 360 | "name": "python3" 361 | }, 362 | "language_info": { 363 | "codemirror_mode": { 364 | "name": "ipython", 365 | "version": 3 366 | }, 367 | "file_extension": ".py", 368 | "mimetype": "text/x-python", 369 | "name": "python", 370 | "nbconvert_exporter": "python", 371 | "pygments_lexer": "ipython3", 372 | "version": "3.5.2" 373 | } 374 | }, 375 | "nbformat": 4, 376 | "nbformat_minor": 1 377 | } 378 | -------------------------------------------------------------------------------- /Twitter/Clustering_twitter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Communities in Twitter\n", 8 | "\n", 9 | "\n", 10 | "* Adapted from [Mastering Social Media Mining with Python](https://www.packtpub.com/big-data-and-business-intelligence/mastering-social-media-mining-python).\n", 11 | "\n", 12 | "* For more information look at [Intro_Collecting_tweets](https://github.com/Data4Democracy/assemble/blob/master/tutorials/Intro_Collecting_Tweets.ipynb)" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 18, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import os\n", 24 | "import sys\n", 25 | "import json\n", 26 | "import time\n", 27 | "import math\n", 28 | "\n", 29 | "from tweepy import API\n", 30 | "from tweepy import OAuthHandler\n", 31 | "\n", 32 | "from tweepy import Cursor\n", 33 | "from collections import defaultdict\n", 34 | "\n", 35 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 36 | "from sklearn.cluster import KMeans" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "# Authentication" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 25, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "consumer_key = 'xxxxxxxxxxxxxxxxxxxx'\n", 55 | "consumer_secret = 'xxxxxxxxxxxxxxxxxxxx' \n", 56 | "access_token = 'xxxxxxxxxxxxxxxxxxxx'\n", 57 | "access_secret = 'xxxxxxxxxxxxxxxxxxxx'" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 20, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "def get_twitter_auth():\n", 69 | " \"\"\"Setup Twitter Authentication.\n", 70 | " \n", 71 | " Return: tweepy.OAuthHandler object\n", 72 | " \"\"\"\n", 73 | " auth = OAuthHandler(consumer_key, consumer_secret)\n", 74 | " auth.set_access_token(access_token, access_secret)\n", 75 | " return auth\n", 76 | " \n", 77 | "def get_twitter_client():\n", 78 | " \"\"\"Setup Twitter API Client.\n", 79 | " \n", 80 | " Return: tweepy.API object\n", 81 | " \"\"\"\n", 82 | " auth = get_twitter_auth()\n", 83 | " client = API(auth)\n", 84 | " return client\n", 85 | "\n", 86 | "client = get_twitter_client()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "# Getting the Data\n", 94 | "\n", 95 | "## Input Parameter" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 21, 101 | "metadata": { 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "screen_name = 'X1alejandro3x'" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 22, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def paginate(items, n):\n", 118 | " \"\"\"Generate n-sized chunks for items.\"\"\"\n", 119 | " for i in range(0, len(items), n):\n", 120 | " yield items[i:i+n]" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 23, 126 | "metadata": { 127 | "collapsed": true 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "# Make directory\n", 132 | "dirname = 'users/{}'.format(screen_name)\n", 133 | "try:\n", 134 | " os.makedirs(dirname, mode=0o755, exist_ok=True)\n", 135 | "except OSError:\n", 136 | " print('Directory {} already exists.'.format(dirname))\n", 137 | " \n", 138 | "# Max num of requests per window\n", 139 | "MAX_FRIENDS = 15000\n", 140 | "max_pages = math.ceil(MAX_FRIENDS / 5000)\n", 141 | " \n", 142 | "# get followers for a given user\n", 143 | "fname = 'users/{}/followers.jsonl'.format(screen_name)\n", 144 | "with open(fname, 'w') as f:\n", 145 | " for followers in Cursor(client.followers_ids, screen_name=screen_name).pages(max_pages):\n", 146 | " for chunk in paginate(followers, 100):\n", 147 | " users = client.lookup_users(user_ids=chunk)\n", 148 | " for user in users:\n", 149 | " f.write(json.dumps(user._json)+'\\n')\n", 150 | " if len(followers) == 5000:\n", 151 | " print(\"More results available. Sleeping for 60 seconds to avoid rate limit\")\n", 152 | " time.sleep(60)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "## KMeans Clustering" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "* `max_df` : Features appearing in more than than `max_df`% of the documents will be ignored (i.e., stop words).\n", 167 | "* `min_df` : Features that occur in less than `min_df` documnets will be ignored." 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 24, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "Data dimensions: (1387, 200)\n", 182 | "---------------------------------------------------- Cluster 0\n", 183 | "Data scientist & cinephile – I tweet mainly about #DataViz – Creator of https://t.co/9bjdRdRrCz & https://t.co/2bqanRlP9q\n", 184 | "AllSight is the first Customer Intelligence Management system that manages and synthesizes ALL data to provide the 'big picture' for your customer.\n", 185 | "Leading data-driven business\n", 186 | "Data scientist (@adobe) by day, electronics enthusiast by night, and physicist at heart.\n", 187 | "Data Scientist/Machine Learning Engineer\n", 188 | "Student. Love data #MachineLearning #DataScience #InfoSec #AI\n", 189 | "Data Scientist @docenthealth; Alumni @TelecomPTech; Writing a book on Amazon Machine Learning for @PacktPub; #NLProc #Algorithms #MachineLearning\n", 190 | "Data Science Software Development, Social Media, Digital Marketing, Predictive Analytics, Machine Learning, AI, PCA, R, Python, Hadoop, Spark, Tableau, NLP\n", 191 | "Extracting Actionable Intelligence from Data - The Only True Benchmarking Opportunity For Heads\n", 192 | "Of Data Insights From Europe's Most Progressive Companies\n", 193 | "Research scientist | Trying to spend more time outdoors\n", 194 | "\n", 195 | "\n", 196 | "\n", 197 | "---------------------------------------------------- Cluster 1\n", 198 | "SEO for Growth: The Ultimate Guide for Marketers, Web Designers & Entrepreneurs, a search engine optimization book by John Jantsch & Phil Singleton.\n", 199 | "How #DataVisualization Drives Business Impact\n", 200 | "Turn any #software into a #cloud solution\n", 201 | "\n", 202 | "Transforme les logiciels #legacy en solution #Cloud #SaaS sans redéveloppement.\n", 203 | "Our Mission is to promote food safety by increasing awareness of food borne illness & formation of partnerships within the food industry.\n", 204 | "perdida entre páginas de libros, y ahogada en una taza de café .\n", 205 | "CEO @ubisend. A chatbot and AI messaging platform to help companies attract readers, drive leads and close sales #startup #conversationalmarketing #ai #chatbots\n", 206 | "Chatbot Developer. Passion for using machine learning and artificial intelligence to solve complex problems. Chat with us: https://t.co/QSTMJwaTTF\n", 207 | "Verified reviews and personalised recommendations of the best places to eat.\n", 208 | "\n", 209 | "https://t.co/6cyRQOyFHs\n", 210 | "World's #1 tech job site\n", 211 | "Bringing to you all the latest in the world of business intelligence #BI #BigData\n", 212 | "\n", 213 | "\n", 214 | "\n", 215 | "---------------------------------------------------- Cluster 2\n", 216 | "Software engineer, Traveller, Entrepreneur, Science enthusiast\n", 217 | "The current state of actual science and mathematics--not the popularized stuff. Join us.\n", 218 | "Scientist in NASA's Earth Sciences Division, AAAS STP Fellow. Science Advocate - Specializing in Ecology, Evolution, Remote Sensing, and Geography\n", 219 | "Complexity science nerd, pattern interruptor. #IBMer. @NECSI affiliate. Founding Editor, contributor @d4emergence. Views my own.\n", 220 | "Mathematician-Economics-Statistics-Computer Science-Triathlon-Quantative-Finance\n", 221 | "Theoretical physicist. Science blogger. Amateur chess and go player. Professional chocolate eater.\n", 222 | "Physicist pretending to be a chemist, sometimes the other way around. University lecturer doing research in Computational Materials Science.\n", 223 | "Lifelong student and lover of science. Most recently, a dog mom and absolutely loving it!\n", 224 | "BA Political Science May 2000 MBAE February 2014 Medical Scientist Training Program Candidate for MD/PhD 2014. Fluent in French and Spanish, German, Japanese.\n", 225 | "Interested in #science, #GPCR and drug discovery. Any expressed opinion is solely my own.\n", 226 | "\n", 227 | "\n", 228 | "\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "k = 3 # Number of clusters\n", 234 | "max_features = 200 # Max number of features\n", 235 | "max_ngram = 4 # Upper boundary for ngrams to be extracted\n", 236 | "\n", 237 | "max_df = 0.8 # Max document freq for a feature \n", 238 | "min_df = 2 # Min document freq for a feature \n", 239 | "min_ngram = 1 # Lower boundary for ngrams to be extracted\n", 240 | "use_idf = True # True==TF-IDF, False==TF\n", 241 | "\n", 242 | "with open(fname) as f:\n", 243 | " users = []\n", 244 | " for line in f:\n", 245 | " profile = json.loads(line)\n", 246 | " users.append(profile['description'])\n", 247 | " \n", 248 | " vectorizer = TfidfVectorizer(max_df=max_df,\n", 249 | " min_df=min_df,\n", 250 | " max_features=max_features,\n", 251 | " stop_words='english',\n", 252 | " ngram_range=(min_ngram, max_ngram),\n", 253 | " use_idf=use_idf)\n", 254 | " X = vectorizer.fit_transform(users)\n", 255 | " print('Data dimensions: {}'.format(X.shape))\n", 256 | " \n", 257 | " # perform clustering\n", 258 | " km = KMeans(n_clusters=k)\n", 259 | " km.fit(X)\n", 260 | " clusters = defaultdict(list)\n", 261 | " for i, label in enumerate(km.labels_):\n", 262 | " clusters[label].append(users[i])\n", 263 | " \n", 264 | " for label, descriptions in clusters.items():\n", 265 | " print('---------------------------------------------------- Cluster {}'.format(label))\n", 266 | " for desc in descriptions[:10]:\n", 267 | " print(desc)\n", 268 | " print('\\n\\n')" 269 | ] 270 | } 271 | ], 272 | "metadata": { 273 | "kernelspec": { 274 | "display_name": "Python [default]", 275 | "language": "python", 276 | "name": "python3" 277 | }, 278 | "language_info": { 279 | "codemirror_mode": { 280 | "name": "ipython", 281 | "version": 3 282 | }, 283 | "file_extension": ".py", 284 | "mimetype": "text/x-python", 285 | "name": "python", 286 | "nbconvert_exporter": "python", 287 | "pygments_lexer": "ipython3", 288 | "version": "3.5.2" 289 | } 290 | }, 291 | "nbformat": 4, 292 | "nbformat_minor": 1 293 | } 294 | -------------------------------------------------------------------------------- /Twitter/Intro_Collecting_Tweets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 83, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import sys\n", 12 | "import json\n", 13 | "import time\n", 14 | "import string\n", 15 | "\n", 16 | "from tweepy import API\n", 17 | "from tweepy import OAuthHandler\n", 18 | "\n", 19 | "from tweepy import Cursor\n", 20 | "\n", 21 | "from tweepy import Stream\n", 22 | "from tweepy.streaming import StreamListener" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Accessing Twitter Data\n", 30 | "* Authentication\n", 31 | " * Performed using `Open Authorization (OAuth)` which involves communication between the user, the consumer (app) and the resource provider (Twitter in this case). \n", 32 | "* Data Collection\n", 33 | "* Data Cleaning\n", 34 | "\n", 35 | "\n", 36 | "## API\n", 37 | "* REST (Representational State Transfer) APIs \n", 38 | " * Allow us to read already existing Tweets. Keep in mind that there are limits on har far back in time we can search.\n", 39 | " * Useful for looking at a certain user.\n", 40 | "* Streaming APIs\n", 41 | " * Allow us to retrieve all the tweets that have been published since we opened an HTTP connection.\n", 42 | " * useful for listening to a live event.\n", 43 | " \n", 44 | "`Tweepy` is a Python client used to implement different types of calls to the desired API." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Your twitter API keys and tokens \n", 52 | "\n", 53 | "In order to access Twitter data we need to go to the [Application Management Page](https://apps.twitter.com/) to register your APP.\n", 54 | "\n", 55 | "Your [Keys and Access Tokens](https://dev.twitter.com/oauth/overview) will be available once your app is registered.\n", 56 | "\n", 57 | "For more information look [here](https://dev.twitter.com/)." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 1, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "# One option is to place your comsumer key, secrets, access token and secret in a local file \n", 69 | "# that we will call `config.py`.\n", 70 | "from config import *\n", 71 | "\n", 72 | "# contents of config.py\n", 73 | "\n", 74 | "# consumer_key = 'XXXXXXXXXXXXXXXXXXXXX'\n", 75 | "# consumer_secret = 'XXXXXXXXXXXXXXXXXXXXX' \n", 76 | "# access_token = 'XXXXXXXXXXXXXXXXXXXXX'\n", 77 | "# access_secret = 'XXXXXXXXXXXXXXXXXXXXX'" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 85, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# Authentication\n", 89 | "def get_twitter_auth():\n", 90 | " \"\"\"Setup Twitter Authentication.\n", 91 | " \n", 92 | " Return: tweepy.OAuthHandler object\n", 93 | " \"\"\"\n", 94 | " auth = OAuthHandler(consumer_key, consumer_secret)\n", 95 | " auth.set_access_token(access_token, access_secret)\n", 96 | " return auth\n", 97 | " \n", 98 | "def get_twitter_client():\n", 99 | " \"\"\"Setup Twitter API Client.\n", 100 | " \n", 101 | " Return: tweepy.API object\n", 102 | " \"\"\"\n", 103 | " auth = get_twitter_auth()\n", 104 | " client = API(auth)\n", 105 | " return client\n", 106 | "\n", 107 | "client = get_twitter_client()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "# REST API" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Going through Timelines" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 86, 127 | "metadata": { 128 | "collapsed": false, 129 | "scrolled": true 130 | }, 131 | "outputs": [ 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "1.) RT @TimOfLegend: If you could make a human centipede using only 3-headed dogs, it could be a fun way to teach fractals to kids.\n", 137 | "\n", 138 | "2.) Trump, Putin, & the hidden history of how Russia interfered in the US Presidential election: https://t.co/BxfjkMDLev #RussiaHacking #Estonia\n", 139 | "\n", 140 | "3.) I might have to try something like this in my DSP class.\n", 141 | "\n", 142 | "https://t.co/9c3dwVzVe0\n", 143 | "\n", 144 | "4.) When it comes to #Healthcare, #BigData is a Big Deal: https://t.co/yDg6tdtvwQ #Analytics #HealthIT #DataScience… https://t.co/SdBVKE96Cn\n", 145 | "\n", 146 | "5.) TV dilemma? #NFCChampionship or #MythBustersSearch testing a classic w/explosives. We've got encores, https://t.co/EmwEYkpkfn & your DVR.\n", 147 | "\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "time.sleep(900) # sleep for 15 mins.\n", 153 | "\n", 154 | "for i, status in enumerate(Cursor(client.home_timeline).items(5)): \n", 155 | " print('{0}.) {1}\\n'.format(i+1, status.text))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "## Getting Tweets" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 90, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "\"\"\"If you get: TweepError: Twitter error response: status code = 429\n", 174 | "Then you have exceeded your\n", 175 | "\"\"\"\n", 176 | "with open('home_timeline.jsonl','w') as f:\n", 177 | " for page in Cursor(client.home_timeline, count=200).pages(4): # limit of 800 for you\n", 178 | " for status in page:\n", 179 | " f.write(json.dumps(status._json)+'\\n')\n", 180 | " \n", 181 | "#with open('home_timeline.txt','w') as f:\n", 182 | "# for page in Cursor(client.home_timeline, count=200).pages(4): # limit of 3200 for other user\n", 183 | "# for status in page:\n", 184 | "# f.write(json.dumps(status.text) +'\\n')" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 95, 190 | "metadata": { 191 | "collapsed": false 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "user = 'kdnuggets'\n", 196 | "fname = 'usr_timeline_{}.jsonl'.format(user)\n", 197 | "with open(fname, 'w') as f:\n", 198 | " for page in Cursor(client.user_timeline, screen_name=user, count=200).pages(16): # limit of 3200 for other user\n", 199 | " for status in page:\n", 200 | " f.write(json.dumps(status._json)+'\\n')" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "# Streaming API" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "* `MyListener` is an extension of `tweepy.StreamListener`. \n", 215 | "\n", 216 | "* This basic implementation overides the two methods `on_data()` and `on_error()`.\n", 217 | "\n", 218 | "* Notice that the return type of the class methods is a Boolean - `True` continues streaming, `False` stops the streaming." 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 111, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "#time.sleep(900) # sleep for 15 mins.\n", 230 | "\n", 231 | "class MyListener(StreamListener):\n", 232 | " \"\"\"Streaming Twitter Data.\"\"\"\n", 233 | " \n", 234 | " def __init__(self, fname):\n", 235 | " safe_fname = self.format_filename(fname)\n", 236 | " self.outfile = 'stream_{}.jsonl'.format(safe_fname)\n", 237 | " \n", 238 | " @staticmethod\n", 239 | " def format_filename(fname):\n", 240 | " \"\"\"Convert fname into safe string for file name.\n", 241 | " \n", 242 | " Return: string\n", 243 | " \"\"\"\n", 244 | " return ''.join(MyListener.convert_valid(one_char) for one_char in fname)\n", 245 | " \n", 246 | " @staticmethod\n", 247 | " def convert_valid(one_char):\n", 248 | " \"\"\"Convert invalid characters into '_'\n", 249 | " \n", 250 | " Return: string\n", 251 | " \"\"\"\n", 252 | " valid_chars = '-_.{0}{1}'.format(string.ascii_letters, string.digits)\n", 253 | " if one_char in valid_chars:\n", 254 | " return one_char\n", 255 | " else:\n", 256 | " return '_'\n", 257 | " \n", 258 | " def on_data(self, data):\n", 259 | " try:\n", 260 | " with open(self.outfile, 'a') as f:\n", 261 | " f.write(data)\n", 262 | " return True\n", 263 | " except BaseException as e:\n", 264 | " sys.stderr.write('Error on_data: {}'.format(e))\n", 265 | " time.sleep(5)\n", 266 | " return True\n", 267 | " \n", 268 | " def on_error(self, status):\n", 269 | " if status==420:\n", 270 | " sys.stderr.write('Rate limit exceeded\\n'.format(status))\n", 271 | " return False\n", 272 | " else:\n", 273 | " sys.stderr.write('Error\\n'.format(status))\n", 274 | " return True\n", 275 | " " 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 124, 281 | "metadata": { 282 | "collapsed": false 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "query = ['#Python', '#DataScience']\n", 287 | "query_name = ''.join(query)\n", 288 | "\n", 289 | "auth = get_twitter_auth()\n", 290 | "twitter_stream = Stream(auth, MyListener(query_name))\n", 291 | "\n", 292 | "twitter_stream.filter(track=query, async=True)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "# Visualization" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 135, 305 | "metadata": { 306 | "collapsed": true 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "import numpy as np\n", 311 | "import pandas as pd\n", 312 | "import matplotlib.pyplot as plt\n", 313 | "import matplotlib.dates as mdates" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 136, 319 | "metadata": { 320 | "collapsed": false 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "fname = 'stream__Python_DataScience.jsonl'\n", 325 | "with open(fname, 'r') as f:\n", 326 | " all_dates = []\n", 327 | " for line in f:\n", 328 | " tweet = json.loads(line)\n", 329 | " all_dates.append(tweet.get('created_at'))\n", 330 | " \n", 331 | " idx = pd.DatetimeIndex(all_dates)\n", 332 | " ones = np.ones(len(all_dates))\n", 333 | " my_series = pd.Series(ones, index=idx)\n", 334 | " per_minute = my_series.resample('1Min').sum().fillna(0)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 145, 340 | "metadata": { 341 | "collapsed": false 342 | }, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAFyCAYAAAAqDqo5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzsnXuclVX1/9+Li8KAJGiIICPCaN5RGCTyRmWQTJ78laZm\nFyFvBemX+gJpJmRpQZkmZN/qS1qao5Zf0RhJstJCZxQZtFBRZwQBFRBFRIf77N8f+zwyc+bMzLk8\n133W+/V6XsN5znOevT6zDnPW2XuttcUYg6IoiqIoStR0idoARVEURVEU0KBEURRFUZSYoEGJoiiK\noiixQIMSRVEURVFigQYliqIoiqLEAg1KFEVRFEWJBRqUKIqiKIoSCzQoURRFURQlFmhQoiiKoihK\nLNCgRFEUJQREpJeINIvIt6K2RVHiigYlipIH6Q+Vzo49InJa1LZ6iMh+IjJTRD7m4z1/1IH2r/g1\njoOY9KEoSha6RW2AoiSML2U8/ipwRvq8tDj/QmgWdU4fYCawDXjCx/sa4GvAzozzfo7hDMaY90Wk\nJ7AralsUJa5oUKIoeWCMuavlYxEZA5xhjKmOyKRckM4vKZh7jTFNORsi0h1oNsbsCdCm2GKMyQzg\nFEVpgS7fKEoAiEgXEdkiIj9sca6biLwvIjvS35i98zNFZJuI7NPi3LEicr+IvCUiTSLypIh8Oss4\n/URknoisTd/3pZY5CyLyEWANdlbjxy2WWaannx8kIneIyDoR2S4ir4vI/4nIQB9+B8ekx7pcRGaI\nyCqgCTg0/XzP9DLQK+mxV4vIdSLSLeM+PUXkFyKyKf07vVdEhmbmZ4jIn0Tk31ns+KmIbM1y/mIR\nWZ7+/b4pIr8TkYMyrnlaRJ4QkeEi8s/0tWtEZEqW+5WJyA0i8nJaz2sico+IDEo/nzWnRETKReRO\nEdmYfh88KyJfzHL//xaRF9I2vCUidSJydmd+UJQkoTMlihIAxphmEakFWuaWjAR6As3AR4F/pM+f\nAjzlfYsWkROAfwKvADdgl10uABaKyGeMMX9JX9cbWAL0A/4HeC093k9F5EBjzNXA68A3gbnA3cDC\n9JjL0z8fxAYJt2CDlwHAeGBQ+rWdcaCIvN/i8W5jzJaMa6ZgvwDNS2vfKiJdgYeB49O2N6R/P1cD\nQ4CWeSl3ASngt0A98Gng/2ibm9FerkabPA4R+REwDbgT+CVwMHAlMEpERhpjtrV47QCgJm3HXVhf\n/FxElhtjHk/frzvwV2A0cAdQB+wPnAkcgfVNG0RkMLAUeBf4GbA5rfVOEelhjPlt+rqpwJz0vW8E\nyoATgFHAgnZ0K0ryMMbooYceBR7YD/s97Tz3XWxAsU/68X8Dz2MDgmvT57oCW4EftnjdEuBJoEuL\nc4L98HqmxbkfYj/EBmeM+zNgO/Dh9ONB2GBgesZ1B6XPf6MA3T9KvzbzeL7FNcekz20A9st4/eXA\nDmB4xvlvAXuAY9OPT07f44aM6+5PX/etFuf+CPw7i60/Ad5t8fjI9GsnZ1w3Mn1+SotzS9PnUi3O\nlQFvA79tce6b6esmdfA765XW0tLmu4FGoFfGtQ8Cb3jvAWzA80TU73c99Aj60OUbRQmOfwH7Yr89\nA5yaPvev9L/BfhD2Sp9DRAYAHwPuAfqKyAEicgBwALAYOE5E+qZfew7wd6DJuy597SPAPtgZmI54\nD/tB+gkR6VOAPgNUYRN9vWNiluvuMsZkLp+cg531WJdh+9+xAdjH09dNSI8zN+P1P6fwXJlzgd3A\ngoyxXwXWthjbY6Mx5kHvgbE5NPXA0BbXfA5Ya9IzG7mQXqZKYQOsHhm2PAz0xwZ2AO8AQ0XkuHyE\nKkrS0OUbRQmOJ7GVKV4wcjLwX9gZgonpJYxTsYGBV7FyePrnT4CfZrmnwX5YbQYq0tf/vw6uaxdj\nq0G+C1wPbEwvN/0ZuMMY82aOGh8znSe6rs5y7nDgECDbOC1tLweajDFvZFzzYo72ZaMC6I4NQLKN\nnXl+TZbrNmOXZTyGASvytGMw0AOYip0hymaL93v4IbAIeFZEXsQGLX8wxizNc0xFiTUalChKQBhj\ndojIMuA0EXkAm/vxT2ygUgaMwM5m/LvFTII3e3kDe3NOMlkjIt4sQQ1wczvXrczBxjkich9wNjaX\n5AbgKhE53RjzfGevz5FtWc51AZ7C5pBkm/FYXcA47eWUdM0y9nbsLE+2sTNzYtqrFCq2qsnz9W+A\ne9u5ZjmAMeZZETkCOAvrp/OBb4rIDGNMtuBVURKJBiWKEiz/Ar6OXRJYZ4xZAyAiDcDp2KDkzhbX\nN6Z/7jDG/L2jG4vIaqCss+vopFmXMaYRmzx5Y7pa51nsjM6lndy3GBqBg4wx7QVeHq8CZSJycMZs\nyZFZrt2MTS7NZEiWsfcFXjDGrM/R3s5oBI7O8zXrsLNm5OBDjDHvAdVAtdhKrUXY/jMalCjOoDkl\nihIs/wL2Ayan/+2xBLgYO3vywXljzDps5cZkETkw82YZ5+4FxkqW7rEi0ldEvP/fXnXM/hnXlEmL\nMuQ0jenr9+1cWlHcCxwhIhdkPpEune2RfvgQdkbiiozLrqRtsNUIDBSRD3I9RGQItlqnJX9M/5yZ\nZWxpkbOTD/cB5SLytVxfYIzZgV0uu1BEhmWx5cAW/+6X8dqd2CWsbpkl1IqSZPTNrCjB8jj2w/MI\nbHKmxz+xSaGG1sEK2MqUx4AVIvK/wCpsyerJQF9sOTHYpZYq4K8i8lvgGWwAdDw28bI/Nh9ji4i8\nAnxJRF7FJk0+C3wI+LOI3IvtQLsH+EL6/N1+/QLa4dfYZNffi+2/UocNhI7GJqKOBl4yxjwhIn8G\npotIf+BpbJnt4Cz3vAP4PlAjIr/ABmFfx1Y8fZD/YYx5TkRuAK5OzwzVYAOxCmx+zuy0ffnwK+BC\n4FcicmpaTx9sQPSDDmaEvo1NbK4XkV9jA40DsaW+laR7ugCPi8hKbJ7SRqyPLwbuMcbsztNWRYkv\nUZf/6KFHkg9sVcjuTq55FlvtcWyLc0OxQcAL7bxmGPB7bFnoduwyxv3AZzKu6w38GHgZm7uxHhvQ\nfBOQFtedgv1A35Yedzr2w28e9kN7K7bM9V/AWTno/lFaU1kH1xyTHuvSdp7vjs0peS5t15tALTAD\n6Nniup7AL4BN2IDqHuAwMspr09dWtbjff7BBxk+ALVnGPw87Y7UVm0eyAruMdWiLa5YCj2d57R+B\nZzPOlaV98UraZ2uxS3MD08/3Sv8+pma8bgC2V8qa9OvWYZdmLmxxzZS0b97EBlArsQFYj6j/D+ih\nh5+HGKN7QymKkixEpBc2mPhvY8zPorZHURR/iDynRGwL6mfFto/ekm7p/OmMa64T2/66SUT+KiIV\nUdmrKIqiKEowRB6UYKc4Z2DLI0dimyc9ICJHAYjIDOzU5aXASdipy4ezJOgpiqIoipJgYrl8IyJv\nYadlbxOR14GfGGNuSj/XB9u2+qvGmPZq+xVFcZj08s272L8TN0Vtj6Io/hCr6pt0CeMXsAljT4jI\nYdgksL951xhj3hWRJ4ExtN9wSFEUhzHGvE/bpmiKoiScWAQlInIsNuu+BzZ57f8ZY14UkTHYkskN\nGS/ZgA1W2rvfAdiuh6ux2eyKoiiKouRGD2zTwYeNMW+FOXAsghJsedtwbH8Er3dBm4ZQeTAe+IMf\nhimKoihKiXIhcFeYA8YiKDG2+c8r6YfLReQkbMfGOdhujgfRerbkINJ7QrTDaoA777yTo446ynd7\nszF16lRuusm9pW1XdYFqSyJh6XrnHfjkJ6GqCq67LvDhAHd9Bqotabzwwgt86UtfgsL2oCqKWAQl\nWegC7GuMWSUi64FPAv+GDxJdR2ObKbXHdoCjjjqKESNGBG0rAB/60IdCGytMXNUFqi2JhKXrqafs\nz3fegbB+ja76DFRbggk9/SHyoCTd7nkRtpvhftjpotOBcelLbgauSW9gthr4Abbj4QOhG9sB69f7\nta9XvHBVF6i2JBKWroaG1j/DwFWfgWpTcifyoAS7P8fvsHt7bMHOiIwz6V0zjd1avQy7t8T+2FbL\nZxq7IVVseO2116I2IRBc1QWqLYmEpcsLRjZsgK1bYb/9gh/TVZ+BalNyJ/KgxBhzcQ7XzAJmBW5M\nEYwcOTJqEwLBVV2g2pJIWLoaG6FHD9i+HV55BYYPD35MV30Gqk3JnTh0dHWCCy5oswO7E7iqC1Rb\nEglLV0MDjB27999h4KrPQLUpuRPLjq7FIiIjgGXLli1zOQFJUZSAOOgg+PrX4aab4OqrYcaMqC1S\nlPCor6/3ZoBGGmPqwxxbZ0oURVFasHUrbNwIFRX2aGyM2iJFKR00KPGJiRMnRm1CILiqC1RbEglD\nlxeEeEFJWMs3rvoMVJuSOxqU+MS4ceM6vyiBuKoLVFsSCUOXF4RUVMCwYeEFJa76DFSbkjuaU6Io\nitKC2bPhhhts47TbboOLL4amJluNoyilgOaUKIqixISGBjtLImJ/GgOrVkVtlaKUBhqUKIqitMAL\nSsAu33jnFEUJHg1KfGLJkiVRmxAIruoC1ZZEwtDV2Lg3GDn4YOjZM5wKHFd9BqpNyR0NSnxizpw5\nUZsQCK7qAtWWRILWtW0brF27d6akS5fwkl1d9RmoNiV3NNHVJ5qamigrKwtlrDBxVReotiQStK7n\nn4djjoHHHoPTTrPnzj7btpv/y18CGxZw12eg2pKGJro6gGtvSg9XdYFqSyJB6/KWabzlGwivgZqr\nPgPVpuSOBiWKoihpGhpsDsnBB+89V1EBq1fDrl2RmaUoJYMGJYqiKGkaGuwsSZcWfxmHDYPdu2HN\nmujsUpRSQYMSn5g2bVrUJgSCq7pAtSWRoHW1rLzx8JJeg17CcdVnoNqU3NGgxCfKy8ujNiEQXNUF\nqi2JBK2rZY8Sj8GDoXv34CtwXPUZqDYld7T6RlEUBZsz0rMnzJsHl1/e+rkjjoDPfAZ+9rNobFOU\nMNHqG0VRlIhZswb27Gm7fAPhVeAoSqmjQYmiKAqtdwfOpKJCW80rShhoUOITK1eujNqEQHBVF6i2\nJBKkroYGmzsyeHDb54YNszMlzc2BDe+sz0C1KbmjQYlPTJ8+PWoTAsFVXaDakkiQuhobYcgQ6Nat\n7XMVFbBjB7z+emDDO+szUG1K7mhQ4hPz5s2L2oRAcFUXqLYkEqSubJU3Ht75IJdwXPUZqDYldzQo\n8QlXy8Jc1QWqLYkEqaujoGTIEBAJNihx1Weg2pTc0aBEUZSSp7kZXnkle+UNwL77Qnm5VuAoStBo\nUKIoSsnz2ms2Z6S9mRLQChxFCQMNSnxi9uzZUZsQCK7qAtWWRILS1VE5sMewYcEGJa76DFSbkjsa\nlPhEU1NT1CYEgqu6QLUlkaB0NTbanJEhQ9q/xmugFlQTbFd9BqpNyR1tM68oSsnzne/A3XfD6tXt\nX3P//fC5z8GGDdC/f2imKUroaJt5RVGUCOmo8sbDS4LVvBJFCQ4NShRFKXkaG9uvvPHwntcKHEUJ\nDg1KfGLTpk1RmxAIruoC1ZZEgtBlTG4zJb16wcEHBzdT4qrPQLUpuaNBiU9MmjQpahMCwVVdoNqS\nSBC6Nm6E997rPCiBYCtwXPUZqDYldzQo8YlZs2ZFbUIguKoLVFsSCUKXtxzT2fIN7K3ACQJXfQaq\nTckdDUp8wtUqH1d1gWpLIkHo8mY+cg1KgpopcdVnoNqU3NGgRFGUkqahweaK9OrV+bXDhsFbb8E7\n7wRvl6KUIhqUKIpS0uRSeePh5Z1oBY6iBIMGJT4xf/78qE0IBFd1gWpLIkHoyqXyxiPIXiWu+gxU\nm5I7GpT4RH19qE3vQsNVXaDakkgQuvIJSvr2hX79gglKXPUZqDYld7TNvKIoJcvmzTbIqK6G88/P\n7TWjR8Mxx8BvfxusbYoSFdpmXlEUJQK83JBcZ0q8a7XVvKIEgwYliqKULPmUA3sE2UBNUUqdyIMS\nEblKRJ4SkXdFZIOI3C8iR2Rcc5uINGccD0Vls6IobtDYaJdv+vbN/TUVFfDGG/D++8HZpSilSuRB\nCXAqMBcYDZwBdAcWi0jPjOsWAQcBA9LHBWEa2RmpVCpqEwLBVV2g2pKI37rySXL18K5/5RVfTXHW\nZ6DalNzpFrUBxpgJLR+LyEXARmAksKTFUzuMMW+GaFpeTJkyJWoTAsFVXaDakojfugoJSlqWBR93\nnH+2uOozUG1K7sRhpiST/QEDvJ1xfmx6eWeliNwqIv0isK1dxo0bF7UJgeCqLlBtScRvXfk0TvPo\n3x969/a/gZqrPgPVpuRO5DMlLRERAW4Glhhjnm/x1CLgPmAVMAz4EfCQiIwxLtY0K4oSOO+/b3ND\n8p0pEdEKHEUJirjNlNwKHA206hhgjLnXGLPQGPOcMeZB4DPAScDYjm42YcIEUqlUq2PMmDEsWLCg\n1XWLFy/Oui44efLkNt366uvrSaVSbNq0qdX5mTNnMnv27Fbn1qxZQyqVYuXKla3Oz507l2nTprU6\n19TURCqVYsmSJa3OV1dXM3HixDa2nXfeeapDdaiOInR4Mx0PPJC/jpYVOFHr8Ei6P1RHNDqqq6s/\n+GwcMGAAqVSKqVOntnlNaBhjYnEA84BXgfIcr98IXNLOcyMAs2zZMhMW999/f2hjhYmruoxRbUnE\nT13/93/GgDHr1+f/2hkzjBkyxDdTjDHu+swY1ZY0li1bZrBpFCNMyLFALGZKRGQe8Fng48aYNTlc\nfwhwAPBG0LblSnV1ddQmBIKrukC1JRE/dTU02NyQ/v3zf21FBaxZAzt2+GaOsz4D1abkTuRt5kXk\nVmx5bwp4qcVTW4wx20WkFzATm1OyHqgAZgO9gOONMbuy3FPbzCuK0iGXXQZPPQXLl+f/2n/8Az7x\nCVi5Ej7yEf9tU5QoKfU285cDfYBHgddbHF9IP78HOB54AHgR+A2wFDgtW0CiKIqSC4VU3nh4ybF+\nV+AoSqkTefWNMabDwMgYsx34dEjmKIpSIjQ05L4JXyaDBsG++2oFjqL4TRxmShRFUUJlxw6bE5Jv\nObBHly4wdKgGJYriNxqU+ES2sisXcFUXqLYk4peu1avBmMKXb8AGNH4u37jqM1BtSu5oUOITrnb1\nc1UXqLYk4pcub4aj0JkS77V+zpS46jNQbUruRF59EwRafaN0xubN9ptyv1htVqCExc9/DjNmQFOT\nXYophF/8AqZOhW3boGtXf+1TlCgp9eobRQmdiRPhS1+K2golKhobbU5IoQEJ2JmSXbtg7Vr/7FKU\nUify6htFCZvmZvjnP+1PY+xeJkppUcjuwJl4r29ogCFDijZJURR0psQ3MvcccAUXdb38sl2+2bJl\nibN9Jlz0G/iny4+gpLzcLtv4lVfiqs9AtSm5o0GJT8yZMydqEwLBRV21td6/5vD001FaEhwu+g38\n0bV7t62+KabyBqB7dztD4ldg66rPQLUpuaNBiU/cfffdUZsQCC7qqquDo4+G8vK7nQ1KXPQb+KNr\n7VqbC1LsTAn4W4Hjqs9AtSm5ozklPlFWVha1CYHgoq7aWhgzBrZsKXM2KHHRb+CPLj/KgT2GDbP5\nSX7gqs9AtSm5ozMlSkmxdSusWAEf/SiMGgXLlsGePVFbpYRJY6PNBSkvL/5eXgM1BzsrKEokaFCi\nlBRLl9qqmzFjoLIS3nsPXnqp89cp7uBVy3TvXvy9Kipsn5I33ij+XoqiaFDiG9OmTYvahEBwTVdd\nHfTpA0cdBfffb7W5uITjmt88/NDlR+WNh5cs60deias+A9Wm5I4GJT5R7sdccAxxTVdtLYwebZtm\nHXFEOYcf7mZQ4prfPPzQ1dhYfOWNx9Chts+NHxU4rvoMVJuSO9pmXikZjIH+/eHrX4frrrPnvvhF\nePVVePzxaG1TwqG5GXr3huuvty3i/aC8HL78ZXtPRXEBbTOvKCHQ2AibNtkkV4/KSli+3PauUNzn\njTdsDohfyzdgZ1383JhPUUoZDUqUkqGuzv4cPXrvucpK+yH1wgvR2KSEi7fM4tfyDeytwFEUpXg0\nKPGJlStXRm1CILikq7YWjjgCDjjAPl65ciUnnmhzAlzLK3HJby0pVldDg/X30KE+GcTeBmrFroS7\n6jNQbUruaFDiE9OnT4/ahEBwSVddnS0F9pg+fTr77WcrcZYujc6uIHDJby0pVldDAxxyCPTo4ZNB\n2FmXLVvgrbeKu4+rPgPVpuSOBiU+MW/evKhNCARXdDU1wbPPts4n8bRVVro3U+KK3zIpVpeflTce\nXn5KsUs4rvoMVJuSOxqU+ISrZWGu6Hr6adu5teVMiaetstIGLDt3RmRcALjit0yK1eVnjxIPv3qV\nuOozUG1K7mhQopQEdXXQqxccc0zb5yorbUCyYkX4dinhYUwwQcl++9lSc63AUZTi0aBEKQlqa+Gk\nk6Bbli0ohw+3e6G4toSjtOatt+Ddd/1fvgGtwFEUv9CgxCdmz54dtQmB4IIuY+xMSct8EtirrazM\nzqC4FJS44LdsFKPLz92BM/EqcIrBVZ+BalNyR4MSn2hqaorahEBwQderr8L69a3zSaC1NteSXV3w\nWzaK0eUFDUHMlPjRQM1Vn4FqU3JH28wrznP33XDBBbBhg137z8YvfwlXXAFbt/pbLqrEh+9/H269\n1b4P/Oauu+DCC21pcJ8+/t9fUcJE28wrSoDU1tpvsu0FJACjRtlW888+G55dSrgEkeTq4VdZsKKU\nOhqUKM6TLZ8kk+OOg+7d3VrCUVoTZFDiV1mwopQ6GpT4xKZNm6I2IRCSrmv7drvhXmY+CbTWtu++\ncPzx7gQlSfdbexSjK4jGaR79+sH++xc3U+Kqz0C1KbmjQYlPTJo0KWoTAiHpuurrYdeu7DMlmdpc\nSnZNut/ao1BdW7bAm28GN1MiUnwFjqs+A9Wm5I4GJT4xa9asqE0IhKTrqq2Fnj3tLEgmmdoqK+H5\n5+H998OxLUiS7rf2KFSXN4MRVFACxVfguOozUG1K7mhQ4hOuVvkkXVddnQ02undv+1ymtspKaG6G\nZ54JybgASbrf2qNQXV5QEtTyDRTfQM1Vn4FqU3JHgxLFaWprO09y9TjmGJtb4soSjrKXhgab89Gv\nX3BjVFTAunWwbVtwYyiK62hQojjLunXw2mvZk1yz0b07nHCCBiUu4lXeiAQ3hjcL88orwY2hKK6j\nQYlPzJ8/P2oTAiHJumpr7c/2ZkqyaRs1CpYuDdCokEiy3zqiUF1BVt54FNurxFWfgWpTckeDEp+o\nrw+16V1oJFlXXR0ceigcfHD257Npq6yEF1+0G7clmST7rSMK1RVkjxKPAQPsPkqFJru66jNQbUru\naJt5xVlOPhkGD7Zt5nPluefg2GPhH/+AsWMDM00JkW3bbLBw221w0UXBjnX88XDKKbadvaIkFW0z\nryg+s3MnLFuWez6Jx5FH2g8wzStxBy/HI+jlGyi+AkdRSh0NShQneeYZ2LEj98obj65dYcQIDUpc\nwltOCXr5xhtDW80rSuFoUKI4SW2tLe898cT8X+tSZ1fFBgllZTbnI2iGDYNXX7VdhBVFyZ/IgxIR\nuUpEnhKRd0Vkg4jcLyJHZLnuOhF5XUSaROSvIhLC957cSaVSUZsQCEnVVVdnZzz22af9a9rTVllp\np+A3bw7IuBBIqt86oxBdXuVNkOXAHhUVsGePDUzyxVWfgWpTcifyoAQ4FZgLjAbOALoDi0Wkp3eB\niMwApgCXAicB7wMPi0gHHznhMmXKlKhNCISk6qqt7TyfpD1tlZX257JlPhsVIkn1W2cUoiuMyhsP\nb5xClnBc9RmoNiV3Yld9IyIHAhuB04wxS9LnXgd+Yoy5Kf24D7AB+Kox5t4s99DqmxLmjTdg4EC4\n914499z8X9/cDH37wne+A1dd5b99SrgMHQrnnANz5gQ/1p49dq+ln/0M9LNKSSpafdOa/QEDvA0g\nIocBA4C/eRcYY94FngTyrK1QSoG6Ovsz38objy5dYORIzStxgZ077VJKGJU3YBOlhw7VChxFKZRY\nBSUiIsDNwBJjzPPp0wOwQcqGjMs3pJ9TlFbU1cGgQXDIIYXfQ5Nd3eDVV+3MV1jLN6AVOIpSDLEK\nSoBbgaOB86M2JF8WLFgQtQmBkERdueSTQMfaKithzRrYuNFHw0IkiX7LhXx1hVkO7DFsWGFBias+\nA9Wm5E5sghIRmQdMAMYaY95o8dR6QICDMl5yUPq5dpkwYQKpVKrVMWbMmDZvosWLF2fNoJ48eXKb\nfQ3q6+tJpVJs2rSp1fnvfve7zJ49u9W5NWvWkEqlWLlyZavzc+fOZdq0aa3ONTU1kUqlWLJkSavz\n1dXVTJw4sY1t5513XiA6Zs6c2UpHdXV1onTs2mVnOLZsmdmpP6qrq9vVccwxTUCK3/8+Xv7IpsOj\npQ5PW9T+KFaHh6fjlltuyUtHY6PdaPGQQ8LTUVFhG7b9/Of5/f+YOnVquzpaEid/5Pq+uuKKK5zQ\nkc0fN910U6J1VFdXf/DZOGDAAFKpVNb3YljEItE1HZB8FjjdGNNmj80OEl2/Yoz5Y5brNdG1RFm2\nzM5yPP44fOxjhd/HGDjgAJg6Fb73Pf/sU8Llv/4L/vIXyPi7HiiLFsGECXbpqLw8vHEVxS9KOtFV\nRG4FLgS+CLwvIgeljx4tLrsZuEZEzhKR44DfA+uAB8K3WIkzdXX2m3EhTdNaIqJ5JS4QZjmwh5dU\nq3klipI/kQclwOVAH+BR4PUWxxe8C4wxc7C9TH6FrbrpCZxpjNkZtrFKvKmthRNOsGWZxaJBSfLx\nGqeFyZAhtoJLK3AUJX8iD0qMMV2MMV2zHL/PuG6WMWagMabMGDPeGKPfQ5Q21NUVXgqcyahR8Prr\n9lCSx549Nrcj7JmSffaBQw/VmRJFKYTIgxJXyJZM5AJJ0rVxo/12musmfJ1p8zq7JnG2JEl+y4d8\ndK1bZ/uUhB2UQGEVOK76DFSbkjsalPjEuHHjojYhEJKk68kn7c9cZ0o603bIIdC/fzKDkiT5LR/y\n0eUtn4S9fAM2EMp3+cZVn4FqU3InFtU3fqPVN6XJd78L8+fbNvN+bb5WVWUrcR56yJ/7KeHx61/D\n178O27Z0rEzHAAAgAElEQVR1vDFjENx4I8ycCVu3hrMRoKL4SUlX3yiKX3hN0/z8EPCSXR2M3Z2n\nsdHmdoQdkICdnXn//eQ231OUqNCgRHGCPXvgqadyzyfJlcpKePNNWLvW3/sqwdPQEM3SDRS3W7Ci\nlDIalPhEZic9V0iKrhUr7DfTfCpvctFmZzCTl1eSFL/lSz66ouhR4jF06F4bcsVVn4FqU3JHgxKf\nmBPGvugRkBRddXV2h1YviMiFXLQNHGiPpAUlSfFbvuSqyxi7fBNVUFJWZt83+SS7uuozUG1K7mii\nq080NTVRVlYWylhhkhRdF10E//mPbTOfK7lqO/tsOwvz178Wbl/YJMVv+ZKrrvXr4eCD4f77rf+i\n4PTT7W7Vd92V2/Wu+gxUW9LQRFcHcO1N6ZEUXXV1+eeT5KoticmuSfFbvuSqK4rdgTOpqMhv+cZV\nn4FqU3JHgxIl8bz9Nrz4on+dXDOprIR33rHdQZVk4C2beLkdUTBsmLaaV5R80aBESTxe0zS/K288\nkprsWso0NNicjii/xFZU2ID57bejs0FRkoYGJT4xbdq0qE0IhCToqq2FAw/Mv/wzV20f/rDtd5Gk\noCQJfiuEXHVFWXnj4Y2f62yJqz4D1abkjgYlPlFeXh61CYGQBF1ePkm+TdPy0Za0HYOT4LdCyFVX\nlJU3Hl6QnGtQ4qrPQLUpuVNQ9Y2IfBn4ozFmu/8mFY+2mS8dmpuhb1+YMQOuvjq4cX78Y7jhBptb\n0kVD+djTrx/8938H+57IhQ9/GK68Eq65Jlo7FCUfklh9cxOwXkR+JSIn+WmQouTDCy/Au+8Gl0/i\nUVlp9zF5+eVgx1GK5+23YfPm6GdKoLDdghWllCk0KBkIXAIcAjwuIitE5Nsi8mH/TFOUzqmttTMX\no0YFO46X7Lp0abDjKMXjLZfEISgpZLdgRSllCgpKjDE7jTF/NMZUAeXAHcDXgHUi8n8iUiVSWntj\nrly5MmoTAiHuuurq4NhjYb/98n9tPtr69rUfMEnJK4m73wolF13ezERU+960JJ9eJa76DFSbkjtF\nr44bY94AHgH+ARigEqgGXhaRU4u9f1KYPn161CYEQtx11dYWvnSTr7YkJbvG3W+FkouuhgZbjfWh\nD4VgUCcMG2a7y773XufXuuozUG1K7hQclIjIgSLyXyLyLPA40B84GzgUGAQsAH7vi5UJYN68eVGb\nEAhx1vXOO/D884U3TctXW2UlLF8Ou3cXNl6YxNlvxZCLrjhU3nh4duTSeM9Vn4FqU3KnoKBERO4H\nXgMuxy7dDDbGnGuM+YuxbAXmYAOUksDVsrA46/LyOwqdKclXW2UlNDVBEmZr4+y3YshFV0NDPJZu\nYG9QkssSjqs+A9Wm5E6hMyXvAmcYY440xvzUGPNmlmveBA4v3DRF6ZjaWpvrccQR4Yx34om2F0pS\nlnBKlTg0TvM48ECb76QVOIqSG4Umun7VGPOvTq4xxhjNO1cCo64ORo8Or29Inz7wkY9oUBJn3nsP\nNmyIT1AiohU4ipIPhS7f3CQik7OcnywiNxZvVvKYPXt21CYEQlx1NTfboKSYTfgK0ZaUZNe4+q1Y\nOtPlffjHZfkGcq/AcdVnoNqU3Cn0O+a5wBNZztcB5xVuTnJpamqK2oRAiKuul1+2DbKKaZpWiLZR\no+CZZ2DnzsLHDYO4+q1YOtPlffjHZaYEcm+g5qrPQLUpuVNom/ntwDGZyzMiUgGsMMb08Mm+gtA2\n8+5z++0waZINTMIs/XziCTj5ZKivtzkmSryYMweuv95WZsWlU9L8+XDJJbBtG+y7b9TWKErnJLHN\nfCMwPsv58cCqws1RlNyoq4Ojjgq/F8UJJ9gcliQs4ZQiXuVNXAISsLM2xsAq/cuoKJ1SaFByM/AT\nEfmeiJycPq4FZgM/9888RclObW1x+SSFUlYGxxyjQUlciVPljYeX36IVOIrSOYVW3/wG+A7wDeBf\n6eNi4ApjzP/4Z15y2LRpU9QmBEIcdW3dCitWFL8JX6HakpDsGke/+UFnuuLUOM1j4EDo0aPzChxX\nfQaqTcmdgospjTFzjTEHY7u39jPGlBtjfuufacli0qRJUZsQCHHUtXSprb4pdqakUG2VlfCf/8D2\n7cWNHyRx9JsfdKRr+3ZYuzZelTdgl/tySXZ11Weg2pTc8WXvG2PMO34Yk2RmzZoVtQmBEEdddXW2\nZ8hRRxV3n0K1VVbCrl02MIkrcfSbH3Ska9Uqm7sRt5kSyC0ocdVnoNqU3Cm0T8mHReQ2EVkjIttF\nZGfLw28jk4CrVT5x1FVb60/TtEK1HX88dOsW7yWcOPrNDzrS5S2PxDEoyaWBmqs+A9Wm5E63Al93\nOzAM+AnwBnZ3YEUJHGPsTMnXvx6dDT162MBk6dJo7VBa09BgfXPwwVFb0paKCjuTs3u3DWgVRclO\nof89TgNOM8Ys99MYRemMxkbYtKn4JNdiqay0MzZKfPDKgcPadiAfhg2zAcmaNTB0aNTWKEp8KfS/\n7zp0dqQV8+fPj9qEQIibrro6+3P06OLvVYy2ykp47jm7a3AciZvf/KIjXXGsvPHw7OpoCcdVn4Fq\nU3Kn0KBkKvAjETnET2OSTH19qE3vQiNuumpr7a7ABxxQ/L2K0VZZaSuAnnmmeDuCIG5+84uOdHkz\nJXGkvNwu23SU7Oqqz0C1KblTaJv5N4H9gO7Au8Culs8bY/r7Yl2BaJt5dxk5Eo47zraZj5KdO20F\n0Jw5cMUV0dqi2KWRnj3hllvim+dz+OGQSsGNJbllqZIkomwzX2hOyXd8tUJRcqCpCZ591u4jEjX7\n7APDh8e7AqeUWLPGBiZxXb6B3CpwFKXUKSgoMcboIpoSOk8/DXv2RNNePhuVlfCPf0RthQLx3B04\nk4oKfb8oSmcUnKcuIkNEZJaI3CEi/dPnxolIkS2tFCU7dXXQq5fdeyYOVFbCypW27b0SLQ0NNmdj\n8OCoLWmfYcPsTElzc9SWKEp8KbR52qnAc8DpwBeA3umnRgLX+WNaskilUlGbEAhx0lVbCyed5F+f\nh2K1jRpl+6bEMc8tTn7zk/Z0NTbCYYfFuwdIRYVthf/GG9mfd9VnoNqU3Cl0pmQ2MMsY83GgZQfX\nvwERd5CIhilTpkRtQiDERZfXNM3P/iTFajvySLtrcBzzSuLiN79pT1ccdwfOxLOvvQocV30Gqk3J\nnUKDkuOBP2U5vxH4cL43E5FTReRBEXlNRJpFJJXx/G3p8y2Phwq0PRDGjRsXtQmBEBddr74K69f7\nm09SrLZu3eDEE+MZlMTFb37Tnq44lwN7HHYYiLQflLjqM1BtSu4UGpRsAQZkOT8ceK2A+/UCngG+\nQftN2RYBB6XHHQBcUMA4SkLxs2man1RWxjMoKSWam+GVV+I/U7LvvjbnRStwFKV9Cg1K7gF+LCIf\nJh1EiMho4EbgznxvZoz5izHmWmPMA4C0c9kOY8ybxpiN6WNLgbYrCaS21n4T7h9pB5y2VFbab76b\nN0dtSeny+us2VyPuQQlYGzvbLVhRSplCg5KrgFeA17FJrs8DTwBLgR/4Y1obxorIBhFZKSK3iki/\ngMYpiAULFkRtQiDERZff+STgj7bKSvszbsmucfGb32TT5X3Ix335BqyN7QUlrvoMVJuSOwUFJcaY\nHcaYicARwNnAJOAYY8wFxpjdfhqYZhHwFeATwHRs1c9DItLerEroVFdXR21CIMRB1/btsHy5//1J\n/NB2xBHQu3f8lnDi4LcgyKarsdHmahx2WAQG5YnXQC1bI21XfQbuanv5ZXe1RUVR+2kaY1YZYx40\nxtxljFnpl1FZxrnXGLPQGPOcMeZB4DPAScDYjl43YcIEUqlUq2PMmDFtItvFixdnLeuaPHlym82W\n6uvrSaVSbNq0qdX5I488ktmzZ7c6t2bNGlKpFCtXtv7VzJ07l2nTprU619TURCqVYsmSJa3OV1dX\nM3HixDa2nXfeeYHomDlzZisd99xzT+Q66uth1y547LHCdUBbf9xzzz1F6+jSxba+nzcvHH9k0+HR\nUoenLa7vq1x1eHg6vvnNb7bR8fOfT6S83OZsxF3H4MFNvPtuioUL2/qjrKysjW1x1ZHv+wrazigk\nUUdLfyxebL+UnHDC1xKto7q6+oPPxgEDBpBKpZg6dWqb14RFoXvf/Lqj540xlxZskEgzcHY6+Ojo\nuo3Ad40xv8nynO594xA33gjf+x5s2QLdu0dtTVumTYM//hFWr47aktLk3HPh7bfhb3+L2pLOefZZ\nOOEEeOKJ+HQmVvKnudn2Kaqvh8mTYd68qC3ylyj3vil0puTgjKMcOBPbSC1bVY6vpHcnPgBopw2R\n4hJ1dTZ3I44BCVjbXn0V3nwzaktKk8bGZCS5wt68F63ASTb33WcDklGjoKYm+3KcUhiF5pSclXF8\nGjgM27vk0XzvJyK9RGS4iJyQPjU0/Xhw+rk5IjJaRA4VkU8CC4CXgIcLsV9JFrW1/ie5+omX7Lps\nWbR2lCLGJKNxmkfv3jBggFbgJJndu+Gaa+DMM+H737czpM8/H7VV7lBUTklL0gmuPwGmdXZtFiqB\n5cAybInxjUA98H1gD7ZZ2wPAi8BvsFU+pxljdhVvuT+0t46adKLWtW4dvPZaMFPdfmkbOhT23z9e\nya5R+y0oMnW9+abdeygJlTce7VXguOozcEvb7bfDSy/B9dfD2LHQtetEamqitsod/N4p4jAg70l2\nY8xjdBwgfbpgi0LC1a5+UeuqrbU/g5gp8UubSPyaqEXtt6DI1OUtgyRlpgSsrS++2Pa8qz4Dd7Rt\n2wazZsH559tuzgDDh4+jpgamT4/UNGcoKCgRkTmZp7C5JSkKaJ7mAhdc4GaD2ah11dXBoYfCwQf7\nf28/tVVWwh13+Ha7oonab0GRqStJPUo8KirI+s3aVZ+BO9puvRU2bIAftOjGdemlFzB5sm2g2Ldv\ndLa5QqHLN2MyjpOAHsB3gCv9MU1RgmmaFgSVlXaZqb0dYJVgaGiwORq9ekVtSe4MGwabNtlqMiU5\nbNkCN9wAX/ta65m5CRNgzx54WDMcfaHQRNdTM47TjTHnGGNujVOeh5Jsdu60yaNJKJ30kl3jtIRT\nCiSp8sbDs1crcJLFjTdCUxNce23r84MHw/Dh2We/lPzxLdG11MlsWuMKUep65hnYsSO4mRI/tZWX\nw4c/HJ+gpFTej0mqvPHw7M1MdnXVZ5B8bRs2wM9+BldcAQMHtn5uyZIlVFXBokV2xkQpjoKCEhFZ\nKiJP5XL4bXBcmTMnM83GDaLUVVtru3R6CWV+46e2uCW7lsr7saEhWfkkYPMO+vZtG5S46jNIvrYb\nboBu3WDGjLbPzZkzh6oqeOstePLJ8G1zjUJnSv4BfASb4FqXPkifexTbP8Q7SoK77747ahMCIUpd\ndXUwYgTss08w9/dbmxeUxKGRUim8H995x34QJG2mBPbugdMSV30Gyda2ejX88pe2uqZflm1g7777\nbkaPhgMO0CUcPyg0KNkf+IUxZpQx5or0cRIwD+hnjPmed/hnarzJtm+FC0Spq7Y22HwSv7VVVsLG\njba3StSUwvsxieXAHhUVbWdKXPUZJFvbrFk2GLmynRKOsrIyuna1zdQ0KCmeQoOSLwC3ZTl/O3Bu\nwdYoSpo33rCt25NQeeOhya7hksRyYI/2Gqgp8WLFCvj97+3eW51VeFVV2b2N1q4NxzZXKTQo2QFk\n+7j4aPo5RSmKuvSCYBIqbzwGDrT9VDQoCYfGRvsNNom9ISoq4PXXbTWHEl+uuQaGDIFLLun82vHj\noWtXeOihwM1ymkKDkluAX4nIz0Tk/PRxE/BL4Of+mZccMreSdoWodNXVwaBBcMghwY0RhLa4JLuW\nwvsxiZU3Hp7dr7yy95yrPoNkaqurgwcegOuu6zivzdPWty+cfLIu4RRLoX1KrgcuBk4Gfp0+PgZc\nmn6u5CgvL4/ahECISlfQ+SQQjLZRo+KR7FoK78ckVt54eHa3XMJx1WeQPG3GwFVXwXHHQWfNaFtq\nq6qCv/3NtqNXCkNM1H89A0BERgDLli1bxogRI6I2R8mTXbvgQx+yrZy//e2orcmPRYtsh8fGRrtR\nnxIcgwbZ7prXXRe1JfljDOy3n91lNmnv8VJg8WK7HPPgg3DWWbm/7rnn4Nhj7RLOmWcGZ1/Q1NfX\nM3LkSICRxpj6MMcuuHmaiPQRkYtE5DoR6Zs+N1xEAtilRCkl/v1v+00jSfkkHvb/cTyWcFzm/fdt\nTkZSl29EslfgKNHT3GxnST72MfjMZ/J77dFH2xwUXcIpnEKbpx0LvARcC1wFeKlm5wE/9sc0pVSp\nq4Pu3YNrmhYk/fvb7q4alASLl4uR1OUb0AqcuHLffVBfDz/6kQ0e80HELuHU1ES/hJtUCp0puQm4\nCxgGbG9xvgY4rVijksjKlSujNiEQotBVWwsnnAA9ewY7TlDa4pDs6vr7Mck9SjwyG6i56jNIjrbd\nu23FzZlnwmk5fpJlaquqsg3Xnn/ef/tKgUKDklHAraZtQsprQEku30yfPj1qEwIhCl11deEs3QSl\nrbLSbiTY3BzI7XPC9fdjQwP07m1nppJKRYXtxbNzp33sqs8gOdpuvx1eegmuz6NcI1Pb2LH2C5Uu\n4RRGoUHJLqB3lvMVwKbCzUku8+bNi9qEQAhb18aN9ttjGE3TgtJWWQnvvhvt1Lzr70ev8ibf6fU4\nMWyYDVxXr7aPXfUZJEPbtm22e+v55+e3dJyprWdPOOMMDUoKpdCg5M/A90SkW/qxEZFB2HyS//PF\nsoSRtJK3XAlbl7ehVRgzJUFpi0Oyq+vvx8bGZC/dwF77vSUcV30GydB2662wfn3+1VzZtFVVweOP\nw+bNPhlXQhQalHwb6AesB3oCfwdeweaXXO2PaUopUlcHBx0Ehx4atSWF06+f/Ra8dGnUlrhLkhun\neRxyiN0FW5Ndo2fLFrsT8MUXw+GHF3+/CRNgzx54uGS2pPWPbp1f0hZjzGbg4yJyOjAcu5RTDzyc\nJc9EUXLGa5qW5Gl5iEeyq6vs3Alr1iS78gagSxc47DANSuLAjTfalv/XXuvP/QYPhuHD7RLO+ef7\nc89SIe+ZEhHpLiIPi8jhxpjHjDG3GGNuMMb8pZQDktmzZ0dtQiCEqWvPHnjqqfA24QtSW2WlLSvc\nsyewITrE5ffj6tU2FyPpMyXQugLHVZ9BvLVt2AA/+xlccYXdvypf2tNWVWWbKUb1NyCp5B2UGGN2\nASOBkg1AstHk6M5aYepascI2xQqraVqQ2ior7TevqCohXX4/ejMLrgQlnh5XfQbx1nb99dCtG8yY\nUdjr29NWVQVvvbU3T07JjYLazIvIz4H3jDHf9d+k4tE288nkV7+CyZPt+m5n24THnXffta3yb78d\nvvrVqK1xi1tugenTbdDXpeCe1PFg3jz41rds5UfXrlFbU3qsXg1HHGGrbq72ORtyzx6bH3fZZfmV\nGMeBJLaZN8AUEXlSRH4hInNaHn4aqJQOtbV2HTbpAQlAnz7wkY9oXkkQePsKJT0gATtTsmsXrFsX\ntSWlycyZNjH9yiv9v3fXrrYJm5YG50dBia7Y5Zt/p/99fMZzuqyjFERdHXzyk1Fb4R+a7BoMLlTe\neHg6GhqSXXGWRFasgDvugLlzg/siVFUFd94Ja9fa5Felc/L6riEiQ0VEjDGndnCUZJv5TZvc7BkX\nlq4NG+DFF+0mWGERtLbKSnjmGftNOGxcfj96jdNc4NBD7TfqhgZ3fQbx1HbNNXbzvEsuKe4+HWkb\nP97696GHihujlMh3AvRl4MPeAxG5R0QO8tekZDJp0qSoTQiEsHQtWmTLgD/1qVCGA4LXNmoUbN9u\ntzMPG1ffjxMnTmLVKndmSrp3t4FJY6O7PoP4aaurgwcesI3S9tmnuHt1pK1vXzj5ZF3CyYd8g5LM\n7hETAAcyAIpn1qxZUZsQCGHpWrgQTjop3L1MgtZ2wgk27yGKJRxX34+XXTaLXbvcCUpgbwWOqz6D\neGkzBq66Co47Di64oPj7daatqgr+9jebzKx0jgOpYvHA1SqfMHTt3AmLF9v/vGEStLZeveDoo6MJ\nSlx9P/boYXW5snwDVktDg7s+g3hp++tf4dFHbUWMHxVPnWmrqrKVYo8+WvxYpUC+QYmhbSKrJrYq\nRbFkCWzdGn5QEgaa7OovjY32g8SlpFCvgVrptp4Mj+ZmO0vysY/BZz4TzphHH21zV3QJJzfyrb4R\n4HYR2ZF+3AP4HxF5v+VFxpjP+WGcUhosXGg7KeazM2dSqKyEP/wBduyw+5woxdHQYP/Ad+8etSX+\nUVFhv0mvXw8HHxy1NW5z33220/Jjj4W3lYWI/cJVU2MrfZK+hUbQ5DtT8jtgI7AlfdwJvN7isXeU\nHPPnz4/ahEAIQ1dNjd3AKuz/rGFoq6y01Tf/+U/gQ7XC1ffjI4/Md2rpBvYuRd18s5s+g3i8H3fv\nthU3Z54Jp/lYI5qLtqoq26jt+ef9G9dV8gpKjDETczmCMjbO1NeH2vQuNILW9fLL8NJL0SzdhOGz\n44+3LazDXsJx9f24Zk29U0muYBvBATz9tJs+g3i8H2+/3f6t8bu7ai7axo6Fnj11CScXCmozH3e0\nzXxyuPlmu+fEW29B795RWxMMJ54II0fC//5v1JYkG2Ns8vD118PUqVFb4y+DB9vtCH74w6gtcZNt\n2+Dww+HUU6G6OhobUim7hcZjj0Uzfj4ksc28ovhCTY39FuFqQAK2X8nSpVFbkXzeeMN+uLi2fAN7\nK3CUYLj1Vpuzc9110dlQVQWPPw6bN0dnQxLQoESJjK1b7bcGF6tuWlJZaRuo/eMfUVuSXIyBu+6y\n/3Zt+Qbst/jHH4dVq6K2xD22bIEbboCLL7a/56iYMMFu0vfww9HZkAQ0KFEi45FHbBKo60HJ+efD\n6afDGWfYpYfm5qgtShbvvQdf/jJMmwbf+AYcdVTUFvnPt75lK4pGjLCdRhX/+OlPbXXT974XrR2D\nB9sNRzWvpGM0KPGJVCoVtQmBEKSuhQvhyCOjm44Py2d9+tjmcNdcY/8wTpgAb74Z7JiuvB9XrLDL\nXw88YGdK1q5NOVlSedRR8JGPpPj4x+Hss+Hb345mz6SgiOr9uGED3HQTXHEFDBoUzBj5aKuqsltq\n7NkTjC0uoEGJT0yZMiVqEwIhKF3NzXaTqihnScL0Wdeu8P3v26nb+nqb/LpkSXDjufB+vP12u/VA\n9+62eumCC9zQ1R5Tp07hvvvsh+gtt9jZtbVro7bKH6Ly2/XX2+q3GTOCGyMfbVVVNqn/ySeDsyfx\nGGOcO4ARgFm2bJlR4snTTxsDxvz971FbEj7r1hlz6qnGdO1qzOzZxuzZE7VF8eL994256CL7/pg0\nyT4uNWprjSkvN6ZfP2NqaqK2JpmsWmVM9+7GXH991JbsZfduYw44wJirr47ako5ZtmyZ1719hAn5\n81tnSpRIWLjQLmucckrUloTPoEHw97/bHIkZM2yp4FtvRW1VPFi5EkaPhnvusTMl8+dDWVnUVoXP\nRz8Ky5fbduhVVbY1+u7dUVuVLGbOhH794Moro7ZkL1272uZtmlfSPrEISkTkVBF5UEReE5FmEWmz\nSCci14nI6yLSJCJ/FREHc/BLh5oaGD/erXbh+dCtG/zoR/b3UFtrExzr6qK2KlruustWKu3ZY0uo\nv/rVqC2Kln79bC7N7Nnwk5/AJz4Br78etVXJYMUKuOMOm8PVK2b72FdVwbPPurM05zexCEqAXsAz\nwDfIssGfiMwApgCXAicB7wMPi8g+YRrZEQsWLIjahEAIQteGDfZDJ+qqmzj4bMIE+4140CDb2Onm\nm/3ZmC0O2nJl+3a47DK48EL43OfgqafgmGOyX5skXfmSTVuXLjB9ut1h9pVX4IQT7C63SSNsv11z\njd0j6ZJLgh8rX23jx9sZk4ceCsighBOLoMQY8xdjzLXGmAewm/5lciXwA2PMQmPMCuArwEDg7DDt\n7IjqqNoEBkwQuhYtsvvcnHmm77fOi7j4rLzc9mu58krbqfTzn4d33inunnHR1hkNDTBmDPz+9/Cb\n38DvftdxI72k6CqEjrSdcooNXk880X6ozZyZrAqOMP1WV2dnmK67DvYJ4Wtrvtr69oWTT9YlnHYJ\nO4mlswNoBlItHh+WPnd8xnWPAje1cw9NdI0xn/+8MaNHR21FPLn/fmM+9CFjDjvMJgO7zL33GrPf\nfsYcfrgxzzwTtTXJYM8eY37wA2O6dDHmE58w5o03orYoXjQ3GzN2rDHHHWeTSuPK7NnG9OxpTFNT\n1JZkRxNdO2YA9pezIeP8hvRzSoLYudP27Ih66SaunH22/UZ8wAE2yfEXv/BnOSdO7NgB3/wmfOEL\ndvnq6adtUymlc7p0sUsTjzxid5w98UTtFOzx3nt2+fPRR20pcNeuUVvUPlVVdsuERx+N2pL4kYSg\npGAmTJhAKpVqdYwZM6bNGuDixYuzNsCZPHlym22p6+vrSaVSbNq0qdX5mTNnMnv27Fbn1qxZQyqV\nYuXKla3Oz507l2nTprU619TURCqVYklG84rq6momTmy78fJ5552XSB1Lltj28l5QklQdmfip47DD\nbA+Tyy6DKVNmMnz4bN59N3k6WuL5Y9UquxTx61/DD36whqamFK+/njwdLYnCHzfdlGL5ctt07Ywz\n7EZ+3/hG8nQU649rr53J5ZfPZtIkGDDAdsY9++w1/OY38dbRv/8mhgzZu4QTpT+qq6s/+GwcMGAA\nqVSKqVHueBn21ExnB7p84zRTpxozcKCdZlU6x6UljgULSmdpKix27zbm2muNETFm/HhjNm6M2qJw\nePVVu4w1bJgxYN9T3/++MatXR21Z7kyebMyQIfH8W6jLNx1gjFkFrAc+6Z0TkT7AaOCJqOzKJFs0\n6gJ+66qpsVP2cWgVngSfnXsuLFtmyxpHj7bJoLks58RJ265dtm362Wfbstb6erC7oudPnHT5TSHa\nvOqSScMAABqmSURBVE7Bf/lLOJ2CC8UPv23bZsvGP/UpW1nzox/ZWbdHH7UJ09deC4ceWvQweVOo\ntqoqWL3aLsMpe4lFUCIivURkuIickD41NP14cPrxzcA1InKWiBwH/B5YB8Rm66px48ZFbUIg+Kmr\noQFeeik++SRJ8dnhh9teJhddBJdeCl/5il0/74i4aFuzBk47zbZNv+kmuO8+2H//wu8XF11BUIy2\nceNsLtLQoTB2LMyZE6+NHwvVZoytprnsMrs8c+GFNidp/nxYv9422Dv9dJtrExWFahs7Fnr21Cqc\nNoQ9NZPtAE7HLtHsyTh+2+KaWcDrQBPwMFDRwf10+SaG3HyzMfvsY8zWrVFbklz+8AdjevUy5sgj\njVmxImprOmbhQtsmvbzcmLq6qK0pDXbtMuY737FLGlVVxmzaFLVFhfHaa8b8+Mf2fQ7GHHKIMd/9\nrjEvvxy1Zf5y1lnGnHZa1Fa0peSXb4wxjxljuhhjumYck1pcM8sYM9AYU2aMGW+MaYjSZiV/Fi60\n3w466kOhdMwXv2irVbp1s7vn/u53UVvUlt274Tvfgc98xlYQLV9ul56U4Elyp+AdO+BPf7IzqYMH\nw6xZdjlq8WK7zPHDH0KFY328q6rg8cdh8+aoLYkPsQhKFPfZutU2CIvL0k2SOfJIu8vo+efbJZ1J\nk6CpKWqrLK+9Bh//OPz0p3YJ4YEHbLt0JVy8TsEDB9pOwTfdFM/ScmNsLsw3v2ltPfdcuw/UrbfC\nG2/szSGJc3lvMUyYYJvgPfxw1JbEBw1KfCKzFMsV/NL1yCM24TFOQUmSfVZWBr/9Ldx2G9x9t52J\naFk5GIW2xYttG/RVq2zy4bRp/q/1J9lnneG3Nq9T8BVX2FJZPzoFF0qmto0bbaA0fLhNev7Tn+Di\ni23Sp5dDUkzuUZgU47fBg+3vQPNKWhD2elEYBxHklJx11lmhjRUmfumaNMmuD8cJV3z2n//Y322v\nXjbnxJhwte3ebcz3vhdOWaorPstGkNqi7hR81llnmZ07bVn4Zz9rTLduxnTvbsw55xhTU2NzYZJK\nsX67+mpjDjggXh1oo8wpERPHOb0iEZERwLJly5YxYsSIUMZsamqizME91v3Q1dxsN5y78EI7rR8X\nXPLZe+/B5ZfDH/5gv2WefXYTPXoEr23PHts987HH7F4jV10VbCWESz7LJGhtq1bZLrr//rfNOwnp\nTyO7dsGf/9zEPfeUsXGjHfeii2x+1AEHhGNDkBTrtyeesHvhPP64zcGKA/X19Yy0dfsjjTH1YY7d\nLczBXMbVP5R+6Fq+3JbvxWnpBtzyWe/edqv200+36/O/+lV42gYMgL/9zSYxB41LPsskaG1ep+Bp\n02zfmDA58MAyvvQlmDgRjj8+3LGDpli/jR5tg7OamvgEJVGiQYkSOAsXQp8+ttGREhwidqv2z38e\n3n47vHEPPtg2d1Piz7772p4xM2bYZmRhUV4ezo69SaRrV7tjek2NnXUsdTQoUQKnpsZut969e9SW\nlAb9+mnFi9IxgwZFbYHSkqoquPNOWLvWJr+WMlp94xOZGyS5QrG6NmyApUvjt3QD7voM3NXmqi5Q\nbUnFD23jx9sZk4ce8sGghKNBiU+Ul5dHbUIgFKtr0SK7rHDmmT4Z5COu+gzc1eaqLlBtScUPbX37\n2mRXLQ1Gq2+UYDnnHFi3LjldJRVFUaJgzhzbxfatt+yeOFESZfWNzpQogbFzp22oFcelG0VRlDhR\nVWWTjx99NGpLokWDEiUwliyx7eU1KFEURemYo4+GIUN0CUeDEp9Y2bLHt0MUo2vhQrufxYkn+miQ\nj7jqM3BXm6u6QLUlFb+0idgvcDU18dynKCw0KPGJ6dOnR21CIBSjq6bGbjgl4qNBPuKqz8Bdba7q\nAtWWVPzUVlVld0R+/nnfbpk4NCjxiXnz5kVtQiAUqquhAV56Kd5LN676DNzV5qouUG1JxU9tY8fa\nJNdSXsLRoMQnXC15K1RXTY3t4HjGGT4b5COu+gzc1eaqLlBtScVPbT172r+ZGpQois8sXGij/t69\no7ZEURQlOVRV2c35Nm+O2pJo0KBE8Z2tW+2usXFeulEURYkjEybY3bcffjhqS6JBgxKfmD17dtQm\nBEIhuh55xG5XHvegxFWfgbvaXNUFqi2p+K1t8GAYPrx0l3A0KPGJpqamqE0IhEJ0LVwIRx4Jw4YF\nYJCPuOozcFebq7pAtSWVILRVVdktOvbs8f3WsUfbzCu+0txsdyC98EL46U+jtkZRFCV5PPGE3Qvn\n8cfhYx8Lf3xtM684w/LlsH59/JduFEVR4sro0XDAAaW5hKNBieIrNTXQpw+cckrUliiKoiSTrl3t\nzuoalCgFs2nTpqhNCIR8dS1cCOPHQ/fuARnkI676DNzV5qouUG1JJShtVVXw7LOwdm0gt48tGpT4\nxKRJk6I2IRDy0bVhAyxdmpylG1d9Bu5qc1UXqLakEpS28ePtjMlDDwVy+9iiQYlPzJo1K2oTAiEf\nXYsW2X1uzjwzOHv8xFWfgbvaXNUFqi2pBKWtb1+b7FpqSzhafaP4xjnnwLp1UFcXtSWKoijJZ84c\nmDUL3nrLtqAPC62+URLPzp2weHFylm4URVHiTlUVbNsGjz4atSXhoUGJ4gtLltj28hqUKIqi+MPR\nR8OQIaW1hKNBiU/Mnz8/ahMCIVddCxfCwIFw4okBG+QjrvoM3NXmqi5QbUklSG0i9ovewoXgYKZF\nVjQo8Yn6+lCX3UIjV101NXYjKZGADfIRV30G7mpzVReotqQStLaqKnj1VXj++UCHiQ2a6KoUTUMD\nHH443H8/nH121NYoiqK4w7ZttrvrrFkwfXo4Y2qiq5Joampgn33gjDOitkRRFMUteva0f1tLJa9E\ngxKlaBYuhLFjoXfvqC1RFEVxj6oquznf5s1RWxI83aI2QEk2W7fCY4/pjsCKoihBMWEC7NkDt95q\nG6oFzUsvBT9Ge2hQ4hOpVIoHH3wwajN8pzNdjzwCu3YlsxTYVZ+Bu9pc1QWqLamEoW3wYBg1Cq65\nJtBhYoEGJT4xZcqUqE0IhM50LVwIRx4Jw4aFZJCPuOozcFebq7pAtSWVsLT9/e+wfn0oQ/Hcc9EV\nLWj1jVIwzc0waBBceKEu3yiKoriCVt8oiWT5chu5J3HpRlEURYkfGpQoBVNTA336wCmnRG2JoiiK\n4gKJCEpEZKaINGccsepvt2DBgqhNCISOdC1cCOPHQ/fuIRrkI676DNzV5qouUG1JxWVtUZCIoCTN\nCuAgYED6iNX38+rq6qhNCIT2dG3YAEuXJnvpxlWfgbvaXNUFqi2puKwtChKR6CoiM4HPGmNyylrV\nRNfguf12mDTJ5pT07x+1NYqiKIpfaKJrbhwuIq+JSKOI3Ckig6M2qJRZuBBOOkkDEkVRFMU/khKU\n1AEXAeOBy4HDgH+KSK8ojSpVdu6ExYuTvXSjKIqixI9ENE8zxjzc4uEKEXkKeBX4AnBbNFZZVq6E\nz30O3nwzvDE/+Un4n/+B/fcPb8yWLFli28trUKIoiqL4SVJmSlphjNkCvARUdHTdhAkTSKVSrY4x\nY8a0yZZevHgxqVSqzesnT57M/PnzW52rr68nlUqxadMm1q+HT3/anu/W7QRGjpzNt77FB8ekSWs4\n8MAUX/rSylbnP/rRuRx11LRW56ZMaeLAA1N87nNLWp3/5CerKS+f+MHjb3wDHn4YDjnkPH76U390\ntGTmzJnMnj37g8cTJ05kzZo1pFIpVq5cCdhS4IEDYcmSuUybNq3V65uamkilUixZsqTV+erqaiZO\nnNjGtvPOO883f3SkA2ijw7Nn7txk6/BoqcOzMek6PDwdEyZMcEJHNn8cfvjhTujI5o/DDjvMCR3Z\n/DFu3LhE66iurv7gs3HAgAGkUimmTp3a5jWhYYxJ3AH0Bt4GprTz/AjALFu2zATFu+8aM2KEMQMH\nGvPqq8bcddddgY2VySuvGFNZacw++xgzb54xzc3BjZVN1xFHGHPxxcGNGRZh+ixsXNXmqi5jVFtS\ncVHbsmXLDGCAESbkz/ekVN/8BPgzdslmEPB94HjgaGPMW1muD7T6ZtcuSKXsVtL/+hcMH+77EJ2y\nYwdMmwZz58IXvgC/+Y1tZBY0DQ1w+OFw//3R7Y2gKIqiBIdW33TOIcBdwErgbuBN4KPZApKgMQa+\n/nW7O+5990UTkADsuy/ccgvcey8sWgSVlfDss8GPW1MD++wDZ5wR/FiKoihKaZGIoMQYc4Ex5hBj\nTE9jTLkx5ovGmFVR2PKDH8D8+fb41KeisKA1554L9fXQqxeMHm1nTIKc/Fq4EMaOhd69gxtDURRF\nKU0SEZTEhdtug5kz4Yc/hK98pfVzmQlGYVJRAbW1cNFFcOml8OUvw3vv+XPvlrq2boXHHnOn6iZK\nnwWNq9pc1QWqLam4rC0KNCjJkYcfhksusR/6V1/d9vk5c+aEb1QLevSwZcJ/+AMsWACjRsFzzxV/\n35a6HnnE5tO4EpRE7bMgcVWbq7pAtSUVl7VFQSISXfPF70TX+no4/XR7LFgA3bJ0d2lqaqKsrKzo\nsfxg5Uq7rNPYCL/8JXz1q4Xfq6Wur30NnngCXnjBJ0MjJk4+8xtXtbmqC1RbUnFRmya6xpjVq+3M\nwJFHwj33ZA9IgFi9KY88Ep58Es4/3y7pTJoETU2F3cvT1dwMDz3kziwJxMtnfuOqNld1gWpLKi5r\niwINSjrg7bfhzDOhrMwmePZKUFP7sjL47W/txnl3322TYDN68OTF8uV28z2XghJFURQlXmhQ0g7b\nt8NnP2vbxy9aBAcdFLVFhfHVr8JTT8Hu3bZs+K67CrtPTY3tg3LKKf7apyiKoigeGpRkobnZVtc8\n/TT8+c9wxBGdvyaz7W+cOPZYWLrUNju78EK4/HIbdOWCp2vhQhg/Hrp3D9DQkImzz4rFVW2u6gLV\nllRc1hYFGpRkYdo0+NOf7KzCmDG5vaa8vDxYo4qkd2+44w749a/tks6YMbY7a2eUl5ezYYMNalxb\nuom7z4rBVW2u6gLVllRc1hYFWn2Twc03w9Sptn37lCnB2Bc1zzxjq3M2bLB5J+ec0/H1t99uk2XX\nr4f+/UMxUVEURYkIrb6JCX/6k92Jd9o0dwMSgBNOgGXLbBLvuefy/9u7/1ir6zqO48+XmjhRTKeR\npE6z8Ec5lNRBCugUUyuSIkm0qSydIk7Lic2aWJjzZrqZcysVEZs2bZoKqbBSC39i3DRdgMxAJomG\nloogkHz643PI4/Fe7o3z/Z7P+X7u67Gdjfu9n/s9nxef733fz/n+5Pzz47N0ujN7Nhx+uCckZmZW\nLk9Kah57DE47DcaPh6uuSt2b8g0YEK/KueGGeEjnyCNhaRc37l+/HubOze/QjZmZtR9PSoiXyo4Z\nA8OGxUMVW23B/8qiZq63TUSCSZPi047feAOGDoX77vtwmzvuWMQ77+Q5KanimPVWrtlyzQXOVlU5\nZ0uhz09KVq6E44+HQYPi3Vr79duy9UyZMqXYjrXQoYfGu9YedVS8Queii+Lt5AGmTZvCoEFwyCFJ\nu1iKKo9ZT3LNlmsucLaqyjlbCn36RNfVq+Ot41eujA+0a+Yk6uXLl1f+LOwQ4Lrr4jk1hx0W72A7\natRyjjlmL266KXXvipfDmHUn12y55gJnq6ocs/lE1wQ2bIgneS5ZEm+f3uw2lcNGKcGFF8K8ebBi\nBRx0ECxduleWh24gjzHrTq7Zcs0FzlZVOWdLoU9OSkKAc8+NT729+24YMiR1j9rLsGHxtvIjRsDO\nO8Oxx6bukZmZ9QXdPF4ub9OmwfTpMHMmjB6dujftaZdd4P77Ye3a+BwdMzOzsvW5PSUzZsDUqXDF\nFfFW8kXp6OgobmVtQoLrr88v1yY5jtkmuWbLNRc4W1XlnC2FPjUpmTMHzjoLzj4bLr202HWvWbOm\n2BW2iVxzgbNVUa65wNmqKudsKfSZq286O+OVNqNGxUt/t+mTB67MzMw2z1fflGzZsnjzr/33j5e5\nekJiZmbWfrKflLz5Jpx4YjxZc/Zs6N8/dY/MzMysK1lPStati3coff11ePBBGDiwvPdatWpVeStP\nKNdc4GxVlGsucLaqyjlbCllPSi67DJ55BmbNgsGDy32viRMnlvsGieSaC5ytinLNBc5WVTlnSyHr\nE11hAffcM5SxY8t/z87Ozs3e0r6qcs0FzlZFueYCZ6uqHLOlPNE160nJlCkL6OjIa2MxMzMrk6++\nKcn48al7YGZmZr2V9aTEzMzMqsOTkoJMnz49dRdKkWsucLYqyjUXOFtV5ZwtBU9KCtLZ2dLDbi2T\nay5wtirKNRc4W1XlnC2FrE90rb/NvJmZmfXMJ7qamZlZn+dJiZmZmbUFT0rMzMysLXhSUpAxY8ak\n7kIpcs0FzlZFueYCZ6uqnLOl4ElJQSZPnpy6C6XINRc4WxXlmgucrapyzpaCr74xMzOz//HVN2Zm\nZtbneVJiZmZmbcGTkoLce++9qbtQilxzgbNVUa65wNmqKudsKXhSUpCOjo7UXShFrrnA2aoo11zg\nbFWVc7YUKjUpkXSepKWS1kp6StJhqfu0yW677Za6C6XINRc4WxXlmgucrapyzpZCZSYlksYD1wBT\ngUOA54A5knZN2jEzMzMrRGUmJcB3gV+GEG4LISwCzgHWABPTdsvMzMyKUIlJiaSPAV8A/rBpWYg3\nWPk9MDxVv8zMzKw426TuQC/tCmwNvNaw/DVgvy7abwewcOHCkrv1gfnz59PZ2dJ7zLRErrnA2aoo\n11zgbFWVY7a6v53btfq9K3FHV0m7AyuA4SGEp+uWdwAjQwjDG9pPAG5vbS/NzMyycmoI4Y5WvmFV\n9pSsAt4HBjYsHwis7KL9HOBUYBnwXqk9MzMzy8t2wN7Ev6UtVYk9JQCSngKeDiFcUPtawHLg5yGE\nq5N2zszMzJpWlT0lANcCt0paAMwnXo2zPXBryk6ZmZlZMSozKQkh3FW7J8mPiYdtngW+FEL4Z9qe\nmZmZWREqc/jGzMzM8laJ+5SYmZlZ/jwpMTMzs7bQpyclkkZIul/SCkkbJY3pos0Bku6T9G9JqyU9\nLWmPzaxzlKR7Jf2j1v4vtfum1LcZK2mupNclvSXpCUnH5ZCtof0RkjZIKvTOQimzSdpW0k8kLZP0\nnqS/Szojg1ynSnpW0ru1ttMl7VJErhKzDZb0sKSVtYd0viRpmqRtGtodJWlBbbxelHR6UblSZiu7\njqQcs7r2Vaohvd0eS6shbZCt6TrSpyclQH/iCbOTgI+cXCNpX2Ae8DdgJHAQMI3N3/vki8SHBX69\n1n4GcJukE+vajATmAicAQ4FHgFmShjSZp16qbJvWvxMwk/gogKKlzPYb4GjgTGAwcAqwuIks9ZLk\nknQEcaxuAg4ExgGHAzc2negDZWTbUOv3aOJYXACcBVxet969gdnER1QMAa4DbpY0urk4H5IkG+XX\nkVS5Nq2/ajWkt9nKrCGQ7netmDoSQvArnuy7ERjTsOzXwMwC1j0buLmHNi8AP8wlW239PyI+1bkz\nh3EDjgfeBD5eVp5EuS4CljS0mQwsr2C2a4A/1n3dAfy1i/d6oOrZumlTSh1JkSuTGtK4PbashiTI\nVkgd6et7SrolScCXgSWSHpL0mqSnJH2tod0MSY/0sLqdiBvi5t5rx821KVLZ2SSdCexDLCgtVXK2\nrwJ/Bi6R9IqkxZKullT68yFKzvUksKekE2rrGAh8E/hdcQm6V1Q2SZ8hFv1H6xYP46OftOfQogd5\nlpytq/dqSR0pO1cONaSbbMlqSK1PZWYrpI54UtK9TwA7AJcADxB3W/0WuEfSiLp2rwIvd7cSSScD\nhwK3bOa9LibucruryT73VmnZJH0WuJL4zISNxXe9R0Vnm1G3+NPACOBzwEnEXZjjgBsK7H93Shuz\nEMITwGnAnZLW19bxL+KnnFZoKpukxyWtJe4C/1MIYWrdtz9J1w/yHCCpX3ERulVmtkatrCOl5ap6\nDelhzFLWECgxW2F1pBW7kKrwomE3F7B7bdmvGtrdB9zey3UeDawm/nJ112YC8A5wdNWzESe584Gz\n65ZdTgt3vZY5bsRP2O8CO9QtGwv8B+hX4VwHEh94+T3g88RC9Rw9HHJsl2zAp4D9gfHER09cXPe9\nxcAlDe1PID5Lq9Axa3W2hnal1pFW5cqhhvSwPbashiTIVkgdqcwdXRNYRdxQFjYsXwgc0dMPSxoF\n3A9cEELo8onFkr5FPAloXAihp13uRSor247ET+EHS9o0898q/ojWA8eFEB5tsu89KXPcXgVWhBBW\nN6xXwB7AS1va6V4oM9f3gcdDCNfWvn5B0iRgnqQfhBAa9zQUralsIYQVtX8uUrwa4EZJPwuxUq6k\n6wd5vh1CWNdct3ulzGxAsjpSSi4yqCE9jFnKGgLlZiukjnhS0o0QwgZJzwD7NXxrMJvZPQ7xEkRg\nFnEWOb2bNqcANwPjQwgPNd/j3isx29vEGXK984if0L9BfGpzqUoet8eBcZK2DyGsqS3bj/jJ45Wm\nOt6DknNtD6xvWLaReOa+tqjD/4dmsnVha2Jd24q4N+RJ4p6ResfVlpeu5GzJ6kiJuSpdQ7rQOGbJ\nagiUnq2YOlL07qIqvYjHX4cAB9f+8y6sfb1n7fsnES+T+g6wL/HY2HpgeN06rqTuTGY+2EV+BfET\n2abXznVtJtTWc05DmwFVz9ZFPwo/cz7huPUn/uLeCRxAvJxuMfCLiuc6HVhX2x73IX5img880eZj\nNoF4It3+tX6fTCzs9W32Jh7W6CAW4km19R6bQbZS60iqXBWuIb0Zs1JrSOJshdSRwga5ii9gVG3Q\n3m943VLX5gzgReJxwE7gKw3rmAE83PB14/reb2jzSDdtbql6ti76UUZBSZaN+IliDvEP/cvATyno\nWHDiXOcBz9dyvUK838DubT5mJxOvZHiL+An7eWAKsG3Dz40EFgBrgSXAtyuwPfaYjZLrSMoxa1hH\nVWpIb7fH0mpIG2Rruo74gXxmZmbWFnxJsJmZmbUFT0rMzMysLXhSYmZmZm3BkxIzMzNrC56UmJmZ\nWVvwpMTMzMzagiclZmZm1hY8KTEzM7O24EmJmZmZtQVPSszMzKwteFJiZmZmbeG/y0VzMZ/WFZgA\nAAAASUVORK5CYII=\n", 347 | "text/plain": [ 348 | "" 349 | ] 350 | }, 351 | "metadata": {}, 352 | "output_type": "display_data" 353 | } 354 | ], 355 | "source": [ 356 | "fig, ax = plt.subplots()\n", 357 | "\n", 358 | "ax.plot(per_minute.index, per_minute)\n", 359 | "\n", 360 | "plt.grid(True)\n", 361 | "plt.title('Tweets Frequencies')\n", 362 | "plt.ylabel('Frequency')\n", 363 | "\n", 364 | "hours = mdates.MinuteLocator(interval=2)\n", 365 | "date_formatter = mdates.DateFormatter('%H:%M')\n", 366 | "\n", 367 | "ax.xaxis.set_major_locator(hours)\n", 368 | "ax.xaxis.set_major_formatter(date_formatter)\n", 369 | "max_freq = per_minute.max()\n", 370 | "ax.set_ylim(0, max_freq)\n", 371 | "\n", 372 | "plt.show();" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "# Anatomy of a Tweet\n", 380 | "\n", 381 | "This will come in handy when doing more analysis." 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 87, 387 | "metadata": { 388 | "collapsed": false 389 | }, 390 | "outputs": [ 391 | { 392 | "name": "stdout", 393 | "output_type": "stream", 394 | "text": [ 395 | "{\n", 396 | " \"retweet_count\": 9,\n", 397 | " \"id\": 820451940912037888,\n", 398 | " \"in_reply_to_user_id\": null,\n", 399 | " \"in_reply_to_screen_name\": null,\n", 400 | " \"user\": {\n", 401 | " \"profile_image_url_https\": \"https://pbs.twimg.com/profile_images/702521133732597760/8b-V29CP_normal.jpg\",\n", 402 | " \"id\": 15840592,\n", 403 | " \"profile_image_url\": \"http://pbs.twimg.com/profile_images/702521133732597760/8b-V29CP_normal.jpg\",\n", 404 | " \"description\": \"Wearing 1000 Hats/Minute. Yak Barber. Building None and NOWHERE. Earlier on: Demoscene (Neuro / Farbrausch). Married to @sylvia_ritter and @duangle. 133547455\",\n", 405 | " \"favourites_count\": 72955,\n", 406 | " \"follow_request_sent\": false,\n", 407 | " \"profile_sidebar_border_color\": \"000000\",\n", 408 | " \"followers_count\": 4614,\n", 409 | " \"geo_enabled\": false,\n", 410 | " \"utc_offset\": 3600,\n", 411 | " \"default_profile_image\": false,\n", 412 | " \"profile_sidebar_fill_color\": \"FFFFFF\",\n", 413 | " \"profile_text_color\": \"000000\",\n", 414 | " \"notifications\": false,\n", 415 | " \"profile_background_tile\": false,\n", 416 | " \"profile_link_color\": \"230085\",\n", 417 | " \"listed_count\": 350,\n", 418 | " \"id_str\": \"15840592\",\n", 419 | " \"time_zone\": \"Bern\",\n", 420 | " \"profile_use_background_image\": false,\n", 421 | " \"protected\": false,\n", 422 | " \"translator_type\": \"regular\",\n", 423 | " \"profile_background_image_url_https\": \"https://pbs.twimg.com/profile_background_images/378800000167314015/k9sKo4l6.jpeg\",\n", 424 | " \"is_translator\": false,\n", 425 | " \"lang\": \"en\",\n", 426 | " \"verified\": false,\n", 427 | " \"profile_background_image_url\": \"http://pbs.twimg.com/profile_background_images/378800000167314015/k9sKo4l6.jpeg\",\n", 428 | " \"name\": \"Leonard Ritter\",\n", 429 | " \"entities\": {\n", 430 | " \"url\": {\n", 431 | " \"urls\": [\n", 432 | " {\n", 433 | " \"url\": \"http://t.co/7IYHdgSuFx\",\n", 434 | " \"indices\": [\n", 435 | " 0,\n", 436 | " 22\n", 437 | " ],\n", 438 | " \"expanded_url\": \"http://www.duangle.com/nowhere\",\n", 439 | " \"display_url\": \"duangle.com/nowhere\"\n", 440 | " }\n", 441 | " ]\n", 442 | " },\n", 443 | " \"description\": {\n", 444 | " \"urls\": []\n", 445 | " }\n", 446 | " },\n", 447 | " \"profile_background_color\": \"000000\",\n", 448 | " \"default_profile\": false,\n", 449 | " \"screen_name\": \"paniq\",\n", 450 | " \"following\": true,\n", 451 | " \"contributors_enabled\": false,\n", 452 | " \"profile_banner_url\": \"https://pbs.twimg.com/profile_banners/15840592/1472275943\",\n", 453 | " \"created_at\": \"Wed Aug 13 18:50:10 +0000 2008\",\n", 454 | " \"has_extended_profile\": true,\n", 455 | " \"url\": \"http://t.co/7IYHdgSuFx\",\n", 456 | " \"statuses_count\": 161385,\n", 457 | " \"friends_count\": 1383,\n", 458 | " \"location\": \"Germany\",\n", 459 | " \"is_translation_enabled\": false\n", 460 | " },\n", 461 | " \"truncated\": false,\n", 462 | " \"contributors\": null,\n", 463 | " \"is_quote_status\": false,\n", 464 | " \"favorite_count\": 0,\n", 465 | " \"id_str\": \"820451940912037888\",\n", 466 | " \"source\": \"Twitter Web Client\",\n", 467 | " \"favorited\": false,\n", 468 | " \"lang\": \"en\",\n", 469 | " \"place\": null,\n", 470 | " \"retweeted\": false,\n", 471 | " \"entities\": {\n", 472 | " \"hashtags\": [],\n", 473 | " \"symbols\": [],\n", 474 | " \"urls\": [],\n", 475 | " \"user_mentions\": [\n", 476 | " {\n", 477 | " \"id_str\": \"24585498\",\n", 478 | " \"id\": 24585498,\n", 479 | " \"name\": \"TimOfLegend\",\n", 480 | " \"indices\": [\n", 481 | " 3,\n", 482 | " 15\n", 483 | " ],\n", 484 | " \"screen_name\": \"TimOfLegend\"\n", 485 | " }\n", 486 | " ]\n", 487 | " },\n", 488 | " \"in_reply_to_status_id_str\": null,\n", 489 | " \"retweeted_status\": {\n", 490 | " \"retweet_count\": 9,\n", 491 | " \"id\": 820450997759709186,\n", 492 | " \"in_reply_to_user_id\": null,\n", 493 | " \"favorited\": false,\n", 494 | " \"user\": {\n", 495 | " \"profile_image_url_https\": \"https://pbs.twimg.com/profile_images/781246620469866496/Wp5sz1sJ_normal.jpg\",\n", 496 | " \"id\": 24585498,\n", 497 | " \"profile_image_url\": \"http://pbs.twimg.com/profile_images/781246620469866496/Wp5sz1sJ_normal.jpg\",\n", 498 | " \"description\": \"FAQ: https://t.co/SUIdk7PIXl Official DF twitter: @doublefine Bugs & tech issues: https://t.co/A4Po9RbGml. Press requests: pr@doublefine.com\",\n", 499 | " \"favourites_count\": 16522,\n", 500 | " \"follow_request_sent\": false,\n", 501 | " \"profile_sidebar_border_color\": \"C0DEED\",\n", 502 | " \"followers_count\": 1103105,\n", 503 | " \"geo_enabled\": true,\n", 504 | " \"utc_offset\": -28800,\n", 505 | " \"default_profile_image\": false,\n", 506 | " \"profile_sidebar_fill_color\": \"DDEEF6\",\n", 507 | " \"profile_text_color\": \"333333\",\n", 508 | " \"notifications\": false,\n", 509 | " \"profile_background_tile\": true,\n", 510 | " \"profile_link_color\": \"4A913C\",\n", 511 | " \"listed_count\": 4380,\n", 512 | " \"id_str\": \"24585498\",\n", 513 | " \"time_zone\": \"Pacific Time (US & Canada)\",\n", 514 | " \"profile_use_background_image\": true,\n", 515 | " \"protected\": false,\n", 516 | " \"translator_type\": \"none\",\n", 517 | " \"profile_background_image_url_https\": \"https://pbs.twimg.com/profile_background_images/119245359/you_and_i.jpg\",\n", 518 | " \"is_translator\": false,\n", 519 | " \"lang\": \"en\",\n", 520 | " \"verified\": true,\n", 521 | " \"profile_background_image_url\": \"http://pbs.twimg.com/profile_background_images/119245359/you_and_i.jpg\",\n", 522 | " \"name\": \"TimOfLegend\",\n", 523 | " \"entities\": {\n", 524 | " \"url\": {\n", 525 | " \"urls\": [\n", 526 | " {\n", 527 | " \"url\": \"https://t.co/vGtEOtAC4C\",\n", 528 | " \"indices\": [\n", 529 | " 0,\n", 530 | " 23\n", 531 | " ],\n", 532 | " \"expanded_url\": \"http://www.doublefine.com\",\n", 533 | " \"display_url\": \"doublefine.com\"\n", 534 | " }\n", 535 | " ]\n", 536 | " },\n", 537 | " \"description\": {\n", 538 | " \"urls\": [\n", 539 | " {\n", 540 | " \"url\": \"https://t.co/SUIdk7PIXl\",\n", 541 | " \"indices\": [\n", 542 | " 5,\n", 543 | " 28\n", 544 | " ],\n", 545 | " \"expanded_url\": \"http://www.doublefine.com/about\",\n", 546 | " \"display_url\": \"doublefine.com/about\"\n", 547 | " },\n", 548 | " {\n", 549 | " \"url\": \"https://t.co/A4Po9RbGml\",\n", 550 | " \"indices\": [\n", 551 | " 82,\n", 552 | " 105\n", 553 | " ],\n", 554 | " \"expanded_url\": \"http://support.doublefine.com\",\n", 555 | " \"display_url\": \"support.doublefine.com\"\n", 556 | " }\n", 557 | " ]\n", 558 | " }\n", 559 | " },\n", 560 | " \"profile_background_color\": \"C0DEED\",\n", 561 | " \"default_profile\": false,\n", 562 | " \"screen_name\": \"TimOfLegend\",\n", 563 | " \"following\": false,\n", 564 | " \"contributors_enabled\": false,\n", 565 | " \"profile_banner_url\": \"https://pbs.twimg.com/profile_banners/24585498/1421107929\",\n", 566 | " \"created_at\": \"Sun Mar 15 21:03:29 +0000 2009\",\n", 567 | " \"has_extended_profile\": false,\n", 568 | " \"url\": \"https://t.co/vGtEOtAC4C\",\n", 569 | " \"statuses_count\": 25404,\n", 570 | " \"friends_count\": 812,\n", 571 | " \"location\": \"San Francisco, CA\",\n", 572 | " \"is_translation_enabled\": false\n", 573 | " },\n", 574 | " \"truncated\": false,\n", 575 | " \"contributors\": null,\n", 576 | " \"favorite_count\": 30,\n", 577 | " \"id_str\": \"820450997759709186\",\n", 578 | " \"source\": \"Twitter for iPhone\",\n", 579 | " \"in_reply_to_screen_name\": null,\n", 580 | " \"lang\": \"en\",\n", 581 | " \"place\": null,\n", 582 | " \"retweeted\": false,\n", 583 | " \"entities\": {\n", 584 | " \"hashtags\": [],\n", 585 | " \"symbols\": [],\n", 586 | " \"urls\": [],\n", 587 | " \"user_mentions\": []\n", 588 | " },\n", 589 | " \"in_reply_to_status_id_str\": null,\n", 590 | " \"is_quote_status\": false,\n", 591 | " \"text\": \"If you could make a human centipede using only 3-headed dogs, it could be a fun way to teach fractals to kids.\",\n", 592 | " \"in_reply_to_user_id_str\": null,\n", 593 | " \"in_reply_to_status_id\": null,\n", 594 | " \"coordinates\": null,\n", 595 | " \"created_at\": \"Sun Jan 15 02:02:15 +0000 2017\",\n", 596 | " \"geo\": null\n", 597 | " },\n", 598 | " \"text\": \"RT @TimOfLegend: If you could make a human centipede using only 3-headed dogs, it could be a fun way to teach fractals to kids.\",\n", 599 | " \"in_reply_to_user_id_str\": null,\n", 600 | " \"in_reply_to_status_id\": null,\n", 601 | " \"coordinates\": null,\n", 602 | " \"created_at\": \"Sun Jan 15 02:06:00 +0000 2017\",\n", 603 | " \"geo\": null\n", 604 | "}\n" 605 | ] 606 | } 607 | ], 608 | "source": [ 609 | "for status in Cursor(client.home_timeline).items(1):\n", 610 | " print(json.dumps(status._json, indent=4))" 611 | ] 612 | } 613 | ], 614 | "metadata": { 615 | "anaconda-cloud": {}, 616 | "kernelspec": { 617 | "display_name": "Python [default]", 618 | "language": "python", 619 | "name": "python3" 620 | }, 621 | "language_info": { 622 | "codemirror_mode": { 623 | "name": "ipython", 624 | "version": 3 625 | }, 626 | "file_extension": ".py", 627 | "mimetype": "text/x-python", 628 | "name": "python", 629 | "nbconvert_exporter": "python", 630 | "pygments_lexer": "ipython3", 631 | "version": "3.5.2" 632 | } 633 | }, 634 | "nbformat": 4, 635 | "nbformat_minor": 1 636 | } 637 | -------------------------------------------------------------------------------- /Twitter/Python_and_maps.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Interactive Maps Using Folium\n", 8 | "\n", 9 | "* For more information look at [Intro_Collecting_tweets](https://github.com/Data4Democracy/assemble/blob/master/tutorials/Intro_Collecting_Tweets.ipynb)\n", 10 | "\n", 11 | "* This notebook is based of [Marco Bonzanini](https://marcobonzanini.com/)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import json\n", 23 | "\n", 24 | "from tweepy import API\n", 25 | "from tweepy import OAuthHandler\n", 26 | "from tweepy import Cursor\n", 27 | "\n", 28 | "import folium" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Authentication" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "consumer_key = 'XXXXXXXXXXXXXXXXXXXXXXXX'\n", 47 | "consumer_secret = 'XXXXXXXXXXXXXXXXXXXXXXXX' \n", 48 | "access_token = 'XXXXXXXXXXXXXXXXXXXXXXXX'\n", 49 | "access_secret = 'XXXXXXXXXXXXXXXXXXXXXXXX'\n", 50 | "\n", 51 | "def get_twitter_auth():\n", 52 | " \"\"\"Setup Twitter Authentication.\n", 53 | " \n", 54 | " Return: tweepy.OAuthHandler object\n", 55 | " \"\"\"\n", 56 | " auth = OAuthHandler(consumer_key, consumer_secret)\n", 57 | " auth.set_access_token(access_token, access_secret)\n", 58 | " return auth\n", 59 | " \n", 60 | "def get_twitter_client():\n", 61 | " \"\"\"Setup Twitter API Client.\n", 62 | " \n", 63 | " Return: tweepy.API object\n", 64 | " \"\"\"\n", 65 | " auth = get_twitter_auth()\n", 66 | " client = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)\n", 67 | " return client\n", 68 | "\n", 69 | "client = get_twitter_client()" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "# Getting Some Sample Tweets" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "with open('home_timeline.jsonl','w') as f:\n", 88 | " for page in Cursor(client.home_timeline, count=200).pages(4): # limit of 800 for you\n", 89 | " for status in page:\n", 90 | " f.write(json.dumps(status._json)+'\\n')\n", 91 | " " 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Get coordinates from tweet" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 9, 104 | "metadata": { 105 | "collapsed": true 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "tweets = 'home_timeline.jsonl' # Contains tweets\n", 110 | "geo_tweets = 'home.geo.json' # Output file" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 10, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "with open(tweets,'r') as f:\n", 122 | " geo_data = {\n", 123 | " \"type\": \"FeatureCollection\",\n", 124 | " \"features\": [],\n", 125 | " }\n", 126 | " for line in f:\n", 127 | " tweet = json.loads(line)\n", 128 | " try:\n", 129 | " if tweet['coordinates']:\n", 130 | " geo_json_feature = {\n", 131 | " \"type\": \"Feature\",\n", 132 | " \"geometry\": {\n", 133 | " \"type\": \"Point\",\n", 134 | " \"coordinates\": tweet['coordinates']['coordinates'],\n", 135 | " },\n", 136 | " \"properties\": {\n", 137 | " \"text\": tweet['text'],\n", 138 | " \"created_at\": tweet['created_at']\n", 139 | " },\n", 140 | " }\n", 141 | " geo_data['features'].append(geo_json_feature)\n", 142 | " except KeyError:\n", 143 | " # json doc is not a tweet\n", 144 | " continue\n", 145 | " \n", 146 | "with open(geo_tweets, 'w') as f:\n", 147 | " f.write(json.dumps(geo_data, indent=4))" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 46, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "def make_map(geojson_file, map_file):\n", 159 | " # Create folium map centered at (latitude, longitude)\n", 160 | " tweet_map = folium.Map(location=[50,-50], zoom_start=2)\n", 161 | " # In case Tweets get too clustered\n", 162 | " marker_cluster = folium.MarkerCluster().add_to(tweet_map)\n", 163 | " \n", 164 | " geodata = json.load(open(geojson_file))\n", 165 | " for tweet in geodata['features']:\n", 166 | " tweet['geometry']['coordinates'].reverse()\n", 167 | " marker = folium.Marker(tweet['geometry']['coordinates'], popup=tweet['properties']['text'])\n", 168 | " marker.add_to(marker_cluster)\n", 169 | " \n", 170 | " tweet_map.save(map_file)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 47, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "make_map(geo_tweets, 'example.html')" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 48, 187 | "metadata": { 188 | "collapsed": false 189 | }, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/html": [ 194 | "\n", 195 | " \n", 202 | " " 203 | ], 204 | "text/plain": [ 205 | "" 206 | ] 207 | }, 208 | "execution_count": 48, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "from IPython.display import IFrame\n", 215 | "IFrame('example.html', width=700, height=350)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "Click on the marker! Come on.. I dare you!" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [] 233 | } 234 | ], 235 | "metadata": { 236 | "anaconda-cloud": {}, 237 | "kernelspec": { 238 | "display_name": "Python [conda root]", 239 | "language": "python", 240 | "name": "conda-root-py" 241 | }, 242 | "language_info": { 243 | "codemirror_mode": { 244 | "name": "ipython", 245 | "version": 3 246 | }, 247 | "file_extension": ".py", 248 | "mimetype": "text/x-python", 249 | "name": "python", 250 | "nbconvert_exporter": "python", 251 | "pygments_lexer": "ipython3", 252 | "version": "3.5.2" 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 1 257 | } 258 | -------------------------------------------------------------------------------- /Twitter/StreamingTweetsFromTwitter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Requires \n", 8 | "[dataset](https://dataset.readthedocs.io/en/latest/) \n", 9 | "[tweepy](http://tweepy.readthedocs.io/en/v3.5.0/) \n", 10 | "\n", 11 | "`pip install tweepy` \n", 12 | "`pip install dataset`" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import tweepy\n", 24 | "import dataset\n", 25 | "import json" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Accessing Twitter Data\n", 33 | "* Authentication\n", 34 | " * Performed using `Open Authorization (OAuth)` which involves communication between the user, the consumer (app) and the resource provider (Twitter in this case). \n", 35 | "* Data Collection\n", 36 | "* Data Cleaning\n", 37 | "\n", 38 | "\n", 39 | "## API\n", 40 | "* REST (Representational State Transfer) APIs \n", 41 | " * Allow us to read already existing Tweets. Keep in mind that there are limits on har far back in time we can search.\n", 42 | " * Useful for looking at a certain user.\n", 43 | "* Streaming APIs\n", 44 | " * Allow us to retrieve all the tweets that have been published since we opened an HTTP connection.\n", 45 | " * useful for listening to a live event.\n", 46 | " \n", 47 | "`Tweepy` is a Python client used to implement different types of calls to the desired API." 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "# Fill in your twitter API keys and tokens \n", 55 | "If you need to get access to the API signup [here](https://dev.twitter.com/)\n", 56 | "\n", 57 | "In order to access Twitter data we need to go to the [Application Management Page](https://apps.twitter.com/) to register your APP.\n", 58 | "\n", 59 | "Your [Keys and Access Tokens](https://dev.twitter.com/oauth/overview) will be available once your app is registered.\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 1, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "CONSUMER_KEY = \"\"\n", 71 | "CONSUMER_SECRET = \"\"\n", 72 | "ACCESS_TOKEN = \"\"\n", 73 | "ACCESS_TOKEN_SECRET = \"\"" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "# Create a [StreamListener](http://docs.tweepy.org/en/v3.5.0/streaming_how_to.html)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "class StreamListener(tweepy.StreamListener):\n", 92 | " def __init__(self, api=None):\n", 93 | " super(StreamListener, self).__init__()\n", 94 | " self.num_tweets = 0\n", 95 | " \n", 96 | " def on_status(self, status):\n", 97 | " '''This function is called each time a new tweet is sent to subscribed stream'''\n", 98 | " \n", 99 | " # stop once limit is hit\n", 100 | " if self.num_tweets >= LIMIT:\n", 101 | " print('\\n \\n Stored {} tweets {}'.format(LIMIT, db))\n", 102 | " return False\n", 103 | " \n", 104 | " # skip retweets\n", 105 | " if hasattr(status, 'retweeted_status'):\n", 106 | " return\n", 107 | " \n", 108 | " \n", 109 | " else:\n", 110 | " # count tweet \n", 111 | " self.num_tweets += 1\n", 112 | " if PRINT_TO_NOTEBOOK == 'Y':\n", 113 | " print(status.text)\n", 114 | " \n", 115 | " # check for hashtags and save as list\n", 116 | " if hasattr(status, 'entities'):\n", 117 | " hashtags = []\n", 118 | " for tag in status.entities['hashtags']:\n", 119 | " hashtags.append(tag['text'])\n", 120 | " hashtags = json.dumps(hashtags)\n", 121 | "\n", 122 | " # build dictionary of elements you want to save\n", 123 | " # Just some of many available fields\n", 124 | " # https://dev.twitter.com/overview/api/tweets\n", 125 | " # https://dev.twitter.com/overview/api/users\n", 126 | " \n", 127 | " tweet_dict = {\n", 128 | " 'description': status.user.description,\n", 129 | " 'loc': status.user.location,\n", 130 | " 'text': status.text,\n", 131 | " 'name': status.user.screen_name,\n", 132 | " 'user_created': status.user.created_at,\n", 133 | " 'followers': status.user.followers_count,\n", 134 | " 'id_str': status.id_str,\n", 135 | " 'retweet_count': status.retweet_count,\n", 136 | " 'friends_count': status.user.friends_count,\n", 137 | " 'hashtags': hashtags\n", 138 | " }\n", 139 | " \n", 140 | " store_tweet(tweet_dict)\n", 141 | " \n", 142 | " def on_error(self, status_code):\n", 143 | " '''Twitter is rate limiting, exit'''\n", 144 | "\n", 145 | " if status_code == 420:\n", 146 | " print('Twitter rate limit error_code {}, exiting...'.format(status_code))\n", 147 | " return False" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "collapsed": true 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "# Save tweet to sqlite db\n", 159 | "def store_tweet(tweet_dict):\n", 160 | " # set table\n", 161 | " table = db['tweets']\n", 162 | " table.insert(tweet_dict)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "### Configuration:\n", 170 | "`TOPICS` = topics you want to follow/stream \n", 171 | "`LIMIT` = STOP after X number of tweets collected \n", 172 | "`DATABASE_NAME` = name of SQLITE database \n", 173 | "`PRINT_TO_NOTEBOOK` = If `Y` prints tweet text to notbook (turn off for large # of tweets)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "TOPICS = [\"baseball\", \"football\", \"soccer\"]\n", 185 | "LIMIT = 10\n", 186 | "DATABASE_NAME = 'tweet_stream'\n", 187 | "PRINT_TO_NOTEBOOK = 'Y'\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "# Setup connection & database" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)\n", 206 | "auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n", 207 | "api = tweepy.API(auth)\n", 208 | "\n", 209 | "stream_listener = StreamListener()\n", 210 | "stream = tweepy.Stream(auth=api.auth, listener=stream_listener)\n", 211 | "db = dataset.connect('sqlite:///{}.sqlite'.format(DATABASE_NAME))" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "# Try it:" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "# Starts the stream\n", 230 | "stream.filter(track=TOPICS)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "collapsed": true 238 | }, 239 | "outputs": [], 240 | "source": [] 241 | } 242 | ], 243 | "metadata": { 244 | "anaconda-cloud": {}, 245 | "kernelspec": { 246 | "display_name": "Python [conda root]", 247 | "language": "python", 248 | "name": "conda-root-py" 249 | }, 250 | "language_info": { 251 | "codemirror_mode": { 252 | "name": "ipython", 253 | "version": 3 254 | }, 255 | "file_extension": ".py", 256 | "mimetype": "text/x-python", 257 | "name": "python", 258 | "nbconvert_exporter": "python", 259 | "pygments_lexer": "ipython3", 260 | "version": "3.5.2" 261 | } 262 | }, 263 | "nbformat": 4, 264 | "nbformat_minor": 1 265 | } 266 | -------------------------------------------------------------------------------- /Twitter/Twitter_Gettingpast_32K_Limit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "We will show how to use `Selenium` and `Tweepy` to get past the `32,000` Tweet limit. \n", 8 | "\n", 9 | "We use `Selenium` to open up a browser and visit Twitter's search page. From the Twiter search page we can obtain the tweet IDs for a given user, then using `tweepy` we can obtain the contents for all tweet IDs obtained.\n", 10 | "\n", 11 | "\n", 12 | "* Adapted from [Twitter Scraping](https://github.com/bpb27/twitter_scraping).\n", 13 | "* The Authentication process follows [Intro_Collecting_Tweets](https://github.com/Data4Democracy/assemble/blob/master/tutorials/Intro_Collecting_Tweets.ipynb).\n", 14 | "* In order to get this working you need to install [ChromeDriver]( https://sites.google.com/a/chromium.org/chromedriver/) \n", 15 | " * For Ubuntu ([source](https://christopher.su/2015/selenium-chromedriver-ubuntu/)):\n", 16 | "\n", 17 | "```\n", 18 | "wget -N http://chromedriver.storage.googleapis.com/2.26/chromedriver_linux64.zip\n", 19 | "unzip chromedriver_linux64.zip\n", 20 | "chmod +x chromedriver\n", 21 | "\n", 22 | "sudo mv -f chromedriver /usr/local/share/chromedriver\n", 23 | "sudo ln -s /usr/local/share/chromedriver /usr/local/bin/chromedriver\n", 24 | "sudo ln -s /usr/local/share/chromedriver /usr/bin/chromedriver\n", 25 | " ```" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 74, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import json\n", 37 | "import time\n", 38 | "import datetime\n", 39 | "\n", 40 | "from tweepy import API\n", 41 | "from tweepy import OAuthHandler\n", 42 | "\n", 43 | "from selenium import webdriver\n", 44 | "from selenium.webdriver.common.keys import Keys\n", 45 | "from selenium.common.exceptions import NoSuchElementException \n", 46 | "from selenium.common.exceptions import StaleElementReferenceException" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "# Input Parameters" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 82, 59 | "metadata": { 60 | "collapsed": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "user = 'kdnuggets'\n", 65 | "\n", 66 | "start = datetime.datetime(2017, 1, 15) \n", 67 | "end = datetime.datetime(2017, 1, 16) \n", 68 | "\n", 69 | "twitter_ids_filename = 'all_ids.json'" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Authentication" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 76, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "from config import *\n", 88 | "\n", 89 | "def get_twitter_auth():\n", 90 | " \"\"\"Setup Twitter Authentication.\n", 91 | " \n", 92 | " Return: tweepy.OAuthHandler object\n", 93 | " \"\"\"\n", 94 | " auth = OAuthHandler(consumer_key, consumer_secret)\n", 95 | " auth.set_access_token(access_token, access_secret)\n", 96 | " return auth\n", 97 | " \n", 98 | "def get_twitter_client():\n", 99 | " \"\"\"Setup Twitter API Client.\n", 100 | " \n", 101 | " Return: tweepy.API object\n", 102 | " \"\"\"\n", 103 | " auth = get_twitter_auth()\n", 104 | " client = API(auth)\n", 105 | " return client\n", 106 | "\n", 107 | "client = get_twitter_client()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Helper Functions" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 77, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "def twitter_url(user, start, end):\n", 126 | " \"\"\"Form url to access tweets via Twitter's search page.\n", 127 | " \n", 128 | " Return: string\n", 129 | " \"\"\"\n", 130 | " url1 = 'https://twitter.com/search?f=tweets&q=from%3A'\n", 131 | " url2 = user + '%20since%3A' + start.strftime('%Y-%m-%d') \n", 132 | " url3 = '%20until%3A' + end.strftime('%Y-%m-%d') + '%20include%3Aretweets&src=typd'\n", 133 | " return url1 + url2 + url3\n", 134 | " \n", 135 | "def increment_day(date, i):\n", 136 | " \"\"\"Increment day object by i days.\n", 137 | " \n", 138 | " Return: datetime object\n", 139 | " \"\"\"\n", 140 | " return date + datetime.timedelta(days=i)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Get Tweet IDs" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 79, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "scrolling down to load more tweets\n", 162 | "scrolling down to load more tweets\n", 163 | "20 tweets found, 0 total\n", 164 | "9 tweets found, 20 total\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "# Adapted from https://github.com/bpb27/twitter_scraping\n", 170 | "\n", 171 | "delay = 1 # time to wait on each page load before reading the page\n", 172 | "driver = webdriver.Chrome() \n", 173 | "\n", 174 | "tweet_selector = 'li.js-stream-item'\n", 175 | "id_selector = '.time a.tweet-timestamp'\n", 176 | "\n", 177 | "ids = list()\n", 178 | "for day in range((end - start).days + 1):\n", 179 | " # Get Twitter search url\n", 180 | " startDate = increment_day(start, 0)\n", 181 | " endDate = increment_day(start, 1)\n", 182 | " url = twitter_url(user, startDate, endDate)\n", 183 | "\n", 184 | " driver.get(url)\n", 185 | " time.sleep(delay)\n", 186 | "\n", 187 | " try:\n", 188 | " found_tweets = driver.find_elements_by_css_selector(tweet_selector)\n", 189 | " increment = 10\n", 190 | "\n", 191 | " # Scroll through the Twitter search page\n", 192 | " while len(found_tweets) >= increment:\n", 193 | " print('scrolling down to load more tweets')\n", 194 | " driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')\n", 195 | " time.sleep(delay)\n", 196 | " found_tweets = driver.find_elements_by_css_selector(tweet_selector)\n", 197 | " increment += 10\n", 198 | " print('{} tweets found, {} total'.format(len(found_tweets), len(ids)))\n", 199 | "\n", 200 | " # Get the IDs for all Tweets\n", 201 | " for tweet in found_tweets:\n", 202 | " try:\n", 203 | " id = tweet.find_element_by_css_selector(id_selector).get_attribute('href').split('/')[-1]\n", 204 | " ids.append(id)\n", 205 | " except StaleElementReferenceException as e:\n", 206 | " print('lost element reference', tweet)\n", 207 | "\n", 208 | " except NoSuchElementException:\n", 209 | " print('no tweets on this day')\n", 210 | "\n", 211 | " start = increment_day(start, 1)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## Save Ids" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 80, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "tweets found on this scrape: 29\n", 233 | "total tweet count: 29\n", 234 | "Tweets Scraped!\n" 235 | ] 236 | } 237 | ], 238 | "source": [ 239 | "# Adapted from https://github.com/bpb27/twitter_scraping\n", 240 | "\n", 241 | "try:\n", 242 | " with open(twitter_ids_filename) as f:\n", 243 | " all_ids = ids + json.load(f)\n", 244 | " data_to_write = list(set(all_ids))\n", 245 | " print('tweets found on this scrape: ', len(ids))\n", 246 | " print('total tweet count: ', len(data_to_write))\n", 247 | "except FileNotFoundError:\n", 248 | " with open(twitter_ids_filename, 'w') as f:\n", 249 | " all_ids = ids\n", 250 | " data_to_write = list(set(all_ids))\n", 251 | " print('tweets found on this scrape: ', len(ids))\n", 252 | " print('total tweet count: ', len(data_to_write))\n", 253 | "\n", 254 | "with open(twitter_ids_filename, 'w') as outfile:\n", 255 | " json.dump(data_to_write, outfile)\n", 256 | "\n", 257 | "print('Tweets Scraped!')\n", 258 | "driver.close()" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "# Get Tweet Info" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 81, 271 | "metadata": { 272 | "collapsed": false 273 | }, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "820722556936351744\n", 280 | "How Can Lean Six Sigma Help #MachineLearning? #KDN https://t.co/iPQJiQQhEz\n", 281 | "820741858326409224\n", 282 | "Solid Collection of \"Top #MachineLearning Books\" https://t.co/Dw4rL7ZzVF https://t.co/TC0PYFf8yK\n", 283 | "821019653698940928\n", 284 | "#ICYMI Game Theory Reveals the Future of #DeepLearning https://t.co/Dgq1dJI96h https://t.co/pB6hcmeG1c\n", 285 | "820725725359652864\n", 286 | "How IBM Is Using #ArtificialIntelligence to Provide #Cybersecurity https://t.co/b15TylAUrH #AI https://t.co/RJhBqxFTyc\n", 287 | "820642932084637698\n", 288 | "Exclusive Interview with top #DataScientist @Jeremyphoward on #DeepLearning, @Kaggle, #DataScience, and more… https://t.co/yUwleeOu0h\n", 289 | "820767777808125952\n", 290 | "What is the Role of the Activation Function in a Neural Network? #KDN https://t.co/jxzx8QChFl\n", 291 | "821024768375869442\n", 292 | "The Best Metric to Measure Accuracy of Classification Models #KDN https://t.co/9npH84wJ4S\n", 293 | "821012639102996480\n", 294 | "Poker Play Begins in \"Brains Vs. AI: Upping the Ante\" | Carnegie Mellon School of Computer Science… https://t.co/xwSoyPvXki\n", 295 | "820769790860075008\n", 296 | "Stanford courseware: #DataMining for Cyber #Security https://t.co/1UWpYtlN0l https://t.co/ZyZeVmUwOt\n", 297 | "820666882877427712\n", 298 | "#ICYMI The Major Advancements in #DeepLearning in 2016 https://t.co/qqXSid2Rvs https://t.co/F1N9bM1jyo\n", 299 | "820707623892877313\n", 300 | "10 Steps to Success in #Kaggle #DataScience Competitions #KDN https://t.co/1q65aIOF1j\n", 301 | "821005348706418688\n", 302 | "#ICYMI 3 methods to deal with outliers https://t.co/dr78u2lWYy https://t.co/6zjTfutcVD\n", 303 | "820692370874388481\n", 304 | "How to Choose a Data Format #KDN https://t.co/uCkVDUK8iT\n", 305 | "820685718502670336\n", 306 | "A Concise Overview of Recent Advances in the Internet of Things (#IoT) https://t.co/2HgaQbuzn4 https://t.co/6HO48dBJYA\n", 307 | "820696573558616066\n", 308 | "#DeepLearning for Self-Driving Cars: MIT Courseware https://t.co/NirWPBqs9K https://t.co/CNbcS4TVZg\n", 309 | "820650254773133313\n", 310 | "#ICYMI Ten Myths About Machine Learning, by Pedro Domingos https://t.co/4hMm2ojYwO https://t.co/2V36Lw8VxT\n", 311 | "820998297053630464\n", 312 | "#ICYMI Generative Adversarial Networks – Hot Topic in Machine Learning https://t.co/9DgHuiPHxO https://t.co/j2ufpQsqRR\n", 313 | "820994580531343361\n", 314 | "Shortcomings of #DeepLearning #KDN https://t.co/s13yap2ep6\n", 315 | "820703318678896640\n", 316 | "Baidu launches medical #chatbot to help Chinese doctors diagnose patients https://t.co/odsJnRd9Eh https://t.co/IymHSXb0x1\n", 317 | "820781844065120256\n", 318 | "Deriving Euclidean Distance Matrices on GPU, with Theano https://t.co/KZzrv5g5VG #GPU #programming #Python https://t.co/93BD3dqdA6\n", 319 | "820734522018897920\n", 320 | "A Concise Overview of Recent Advances in Vehicle Technologies https://t.co/ngOGz2oNSs https://t.co/dxEiwXIIuJ\n", 321 | "820662180861407232\n", 322 | "10 Tips to Improve your #DataScience #Interview #KDN https://t.co/94quRjNW5C\n", 323 | "821039939987111936\n", 324 | "The #DataScience Process, Rediscovered #KDN https://t.co/UdScyRHYQi\n", 325 | "820757455101640704\n", 326 | "A Concise Overview of Recent Advances in Chatbot Technologies https://t.co/c2DkUDikkc https://t.co/F8icjFXwKZ\n", 327 | "821029291194458112\n", 328 | "Learning to reconstruct an image from pixel coordinates: #NeuralNetworks map from x,y of an images pixels to R,G,B… https://t.co/HbbRX6zqlD\n", 329 | "820657265564852224\n", 330 | "Deep Reinforcement Learning: Playing a Racing Game https://t.co/w85vYoXN7g https://t.co/C42uCJupzE\n", 331 | "820721469487513600\n", 332 | "Cluster Analysis Puzzle: Learn by Doing! Quick clustering overview https://t.co/qjnNBhVqV5 https://t.co/8guPUlZg4C\n", 333 | "820647115206643713\n", 334 | "Predicting Future Human Behavior with #DeepLearning #KDN https://t.co/a1gBc7MJyq\n", 335 | "821048152266567681\n", 336 | "5 Big Predictions for #AI in 2017: #Reinforcement Learning, #Adversarial nets, #China, Language learning, #Hype https://t.co/fQ2SuCwrC2\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "with open(twitter_ids_filename) as f:\n", 342 | " ids = json.load(f)\n", 343 | " for tweetId in ids:\n", 344 | " print(tweetId)\n", 345 | " tweet = client.get_status(tweetId)\n", 346 | " print(tweet.text)" 347 | ] 348 | } 349 | ], 350 | "metadata": { 351 | "anaconda-cloud": {}, 352 | "kernelspec": { 353 | "display_name": "Python [default]", 354 | "language": "python", 355 | "name": "python3" 356 | }, 357 | "language_info": { 358 | "codemirror_mode": { 359 | "name": "ipython", 360 | "version": 3 361 | }, 362 | "file_extension": ".py", 363 | "mimetype": "text/x-python", 364 | "name": "python", 365 | "nbconvert_exporter": "python", 366 | "pygments_lexer": "ipython3", 367 | "version": "3.5.2" 368 | } 369 | }, 370 | "nbformat": 4, 371 | "nbformat_minor": 1 372 | } 373 | -------------------------------------------------------------------------------- /aws/AWS_Boto3_s3_intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AWS S3 using boto3\n", 8 | "This tutorial will demonstrate how to interact with S3 bucket from python using boto3 library.\n", 9 | "For ad hoc one time operations you may find the AWS [command line interface](https://aws.amazon.com/cli/) more useful." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "In order to get programtic access to aws you will need to provide a `AWS Access Key ID` and `AWS Secret Access Key` even if the resource is \"public\"\n", 17 | " * The easiest way to authenticate yourself is to install the AWS CLI mentioned above and run command `aws configure` from the command line.\n", 18 | " * When you run `aws configure` it should create a file in `~/.aws/credentials` which contains the login credentials. Boto3 will automatically recognize credentails stored in this location.\n", 19 | " * If you are trying to access a bucket owned by another team/person you will need to have them provide you with the crednetials. AWS configure allows you to setup each new credential under a profile. You can tell boto3 which credential you'd like to use by passing the `profile` argument otherwise it will pickup the `default` profile (if existing).\n", 20 | " * Alternatively, you can type your credentials directly into the script but this is not recommended" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "**Please read through [this](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-set-up.html) guide to setup your credentials. For this tutorial I am using my own personal test bucket. If you'd like to actually follow the steps you will need your own credentials and a test bucket. If you'd like to use my bucket to test contact @bstarling in slack and I can provide you with credentials to access `public-test-bucket-d4d` used in this tutorial**" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import boto3" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "['d4d', 'd4d_tutorial', 'd4d_s3', 'dynro', 'default']" 52 | ] 53 | }, 54 | "execution_count": 2, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "# check that boto3 is able to pick up valid credentials\n", 61 | "# Will print a list of profiles\n", 62 | "session = boto3.Session(profile_name='d4d_tutorial')\n", 63 | "session.available_profiles" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# Tell boto3 which resource you will use\n", 75 | "s3 = session.resource('s3')" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "# Specify AWS bucket\n", 87 | "bucket = s3.Bucket('public-test-bucket-d4d')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "s3.ObjectSummary(bucket_name='public-test-bucket-d4d', key='tutorial/')\n", 102 | "s3.ObjectSummary(bucket_name='public-test-bucket-d4d', key='tutorial/data.csv')\n", 103 | "s3.ObjectSummary(bucket_name='public-test-bucket-d4d', key='tutorial/file_one.txt')\n", 104 | "s3.ObjectSummary(bucket_name='public-test-bucket-d4d', key='tutorial/file_three.txt')\n", 105 | "s3.ObjectSummary(bucket_name='public-test-bucket-d4d', key='tutorial/file_two.txt')\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "# print all objects in bucket\n", 111 | "for obj in bucket.objects.all():\n", 112 | " print(obj)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 6, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "s3.Object(bucket_name='public-test-bucket-d4d', key='tutorial/data.csv')\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "# get object by name\n", 132 | "file = bucket.Object(key='tutorial/data.csv')\n", 133 | "print(file)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "metadata": { 140 | "collapsed": false 141 | }, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "{'AcceptRanges': 'bytes',\n", 147 | " 'Body': ,\n", 148 | " 'ContentLength': 8,\n", 149 | " 'ContentType': 'text/csv',\n", 150 | " 'ETag': '\"8015171fe51e613df5dcdf8e89e94b1c\"',\n", 151 | " 'LastModified': datetime.datetime(2017, 1, 24, 14, 14, 45, tzinfo=tzutc()),\n", 152 | " 'Metadata': {},\n", 153 | " 'ResponseMetadata': {'HTTPHeaders': {'accept-ranges': 'bytes',\n", 154 | " 'content-length': '8',\n", 155 | " 'content-type': 'text/csv',\n", 156 | " 'date': 'Wed, 25 Jan 2017 12:26:49 GMT',\n", 157 | " 'etag': '\"8015171fe51e613df5dcdf8e89e94b1c\"',\n", 158 | " 'last-modified': 'Tue, 24 Jan 2017 14:14:45 GMT',\n", 159 | " 'server': 'AmazonS3',\n", 160 | " 'x-amz-id-2': 'MfNDWsYCnrUpZPsw6ccST05Tb3CK2l4GR3MP8HenBLEkaSlEmkW4ScgKQgRaBIexSBFkz5dPmWo=',\n", 161 | " 'x-amz-request-id': '4DD3D2C04D199104'},\n", 162 | " 'HTTPStatusCode': 200,\n", 163 | " 'HostId': 'MfNDWsYCnrUpZPsw6ccST05Tb3CK2l4GR3MP8HenBLEkaSlEmkW4ScgKQgRaBIexSBFkz5dPmWo=',\n", 164 | " 'RequestId': '4DD3D2C04D199104',\n", 165 | " 'RetryAttempts': 0}}" 166 | ] 167 | }, 168 | "execution_count": 7, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "# AWS s3 object\n", 175 | "file.get()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 8, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "b'my data\\n'" 189 | ] 190 | }, 191 | "execution_count": 8, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "# read file body (careful doing this with large files)\n", 198 | "file.get()['Body'].read()" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 9, 204 | "metadata": { 205 | "collapsed": false 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "#Download file\n", 210 | "s3.meta.client.download_file('public-test-bucket-d4d', 'tutorial/data.csv', 'local_data_file.csv')" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 10, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "# or using attributes of variables assigned earlier" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 11, 227 | "metadata": { 228 | "collapsed": false 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "s3.meta.client.download_file(bucket.name, file.key, 'local_data_new.csv')" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 12, 238 | "metadata": { 239 | "collapsed": false 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "#download all files in a s3 \"folder\" with specific prefix:\n", 244 | "for item in bucket.objects.filter(Prefix='tutorial/file'):\n", 245 | " s3.meta.client.download_file(bucket.name, item.key, 'local_{}'.format(item.key.split('/')[-1]))" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": { 252 | "collapsed": false 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "# Check dir contents after download\n", 257 | "%ls" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "**NOTE: unless you need to do you work inside python you can do a lot with the [AWS CLI](http://docs.aws.amazon.com/cli/latest/reference/s3/index.html) which uses botocore under the hood*\n", 265 | "* **`aws s3 sync s3://bstarling-d4d/data/test`** sync a folder (need read/write access)\n", 266 | "* **`aws s3 cp s3://public-test-bucket-d4d/tutorial/data.csv local_data.csv`** copy a file from s3 to local file\n", 267 | "* **`aws s3 cp local_data.csv s3://public-test-bucket-d4d/tutorial/data.csv`** push local file to s3\n", 268 | "* **`aws s3 ls s3://public-test-bucket-d4d/tutorial/ --profile d4d_tutorial`** list contents (using d4d_tutorial credentials)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": true 276 | }, 277 | "outputs": [], 278 | "source": [] 279 | } 280 | ], 281 | "metadata": { 282 | "anaconda-cloud": {}, 283 | "kernelspec": { 284 | "display_name": "Python [conda root]", 285 | "language": "python", 286 | "name": "conda-root-py" 287 | }, 288 | "language_info": { 289 | "codemirror_mode": { 290 | "name": "ipython", 291 | "version": 3 292 | }, 293 | "file_extension": ".py", 294 | "mimetype": "text/x-python", 295 | "name": "python", 296 | "nbconvert_exporter": "python", 297 | "pygments_lexer": "ipython3", 298 | "version": "3.5.2" 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 1 303 | } 304 | --------------------------------------------------------------------------------