├── .coveragerc ├── .flake8 ├── .gitignore ├── .hound.yml ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── Procfile ├── README.md ├── analyser.py ├── archive-analyser.ipynb ├── dev-requirements.txt ├── docs ├── gender_reply.png └── tweet_class.png ├── manage.py ├── new_tweet_subset.js ├── static └── foo ├── tasks.py ├── test_archive_2016_2017_2_months.zip ├── tweet_display ├── __init__.py ├── admin.py ├── analyse_data.py ├── apps.py ├── helper.py ├── migrations │ ├── 0001_initial.py │ ├── 0002_graph_open_humans_member.py │ └── __init__.py ├── models.py ├── read_data.py ├── static │ ├── css │ │ ├── logo.svg │ │ └── metricsgraphics.css │ ├── favicon.ico │ ├── javascripts │ │ └── leaflet.timeline.js │ └── profile.jpg ├── tasks.py ├── templates │ └── tweet_display │ │ ├── application.html │ │ ├── index.html │ │ ├── interactions.html │ │ ├── location.html │ │ └── partials │ │ ├── graph_buttons.html │ │ ├── graph_in_making.html │ │ └── graph_status.html ├── tests │ ├── __init__.py │ └── tests_data.py ├── urls.py └── views.py ├── twitteranalyser ├── __init__.py ├── apps.py ├── celery.py ├── settings.py ├── templates │ └── twitteranalyser │ │ └── about.html ├── tests │ ├── __init__.py │ └── tests_views.py ├── urls.py ├── views.py └── wsgi.py └── users ├── __init__.py ├── admin.py ├── apps.py ├── forms.py ├── migrations ├── 0001_initial.py ├── 0002_openhumansmember_public.py └── __init__.py ├── models.py ├── templates └── users │ ├── complete.html │ ├── dashboard.html │ ├── index.html │ ├── partials │ └── upload_form.html │ ├── public_data.html │ └── upload_old.html ├── tests ├── __init__.py └── tests_views.py ├── urls.py └── views.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | */__init__.py 4 | */tests/* 5 | */migration/* 6 | manage.py 7 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | .git, 4 | __pycache__, 5 | docs, 6 | migrations 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | *.pyc 3 | staticfiles 4 | .env 5 | .env.staging 6 | .ipynb_checkpoints 7 | twitter_archive/ 8 | db.sqlite3 9 | dump.rdb 10 | .DS_Store 11 | data/ 12 | -------------------------------------------------------------------------------- /.hound.yml: -------------------------------------------------------------------------------- 1 | python: 2 | enabled: true 3 | config_file: .flake8 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | services: 4 | - redis-server 5 | - postgresql 6 | 7 | 8 | python: 9 | - "3.5" 10 | - "3.6" 11 | 12 | install: 13 | - pipenv install --dev 14 | 15 | before_script: 16 | - mkdir bin 17 | - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > bin/cc-test-reporter 18 | - chmod +x bin/cc-test-reporter 19 | - bin/cc-test-reporter before-build 20 | 21 | script: 22 | - python manage.py test 23 | - coverage run --source="." manage.py test 24 | 25 | after_success: 26 | - coverage xml 27 | - bin/cc-test-reporter after-build -t coverage.py 28 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## 1. Purpose 4 | 5 | A primary goal of TwArχiv is to be inclusive to the largest number of contributors, with the most varied and diverse backgrounds possible. As such, we are committed to providing a friendly, safe and welcoming environment for all, regardless of gender, sexual orientation, ability, ethnicity, socioeconomic status, and religion (or lack thereof). 6 | 7 | This code of conduct outlines our expectations for all those who participate in our community, as well as the consequences for unacceptable behavior. 8 | 9 | We invite all those who participate in TwArχiv to help us create safe and positive experiences for everyone. 10 | 11 | ## 2. Open Source Citizenship 12 | 13 | A supplemental goal of this Code of Conduct is to increase open source citizenship by encouraging participants to recognize and strengthen the relationships between our actions and their effects on our community. 14 | 15 | Communities mirror the societies in which they exist and positive action is essential to counteract the many forms of inequality and abuses of power that exist in society. 16 | 17 | If you see someone who is making an extra effort to ensure our community is welcoming, friendly, and encourages all participants to contribute to the fullest extent, we want to know. 18 | 19 | ## 3. Expected Behavior 20 | 21 | The following behaviors are expected and requested of all community members: 22 | 23 | * Participate in an authentic and active way. In doing so, you contribute to the health and longevity of this community. 24 | * Exercise consideration and respect in your speech and actions. 25 | * Attempt collaboration before conflict. 26 | * Refrain from demeaning, discriminatory, or harassing behavior and speech. 27 | * Be mindful of your surroundings and of your fellow participants. Alert community leaders if you notice a dangerous situation, someone in distress, or violations of this Code of Conduct, even if they seem inconsequential. 28 | * Remember that community event venues may be shared with members of the public; please be respectful to all patrons of these locations. 29 | 30 | ## 4. Unacceptable Behavior 31 | 32 | The following behaviors are considered harassment and are unacceptable within our community: 33 | 34 | * Violence, threats of violence or violent language directed against another person. 35 | * Sexist, racist, homophobic, transphobic, ableist or otherwise discriminatory jokes and language. 36 | * Posting or displaying sexually explicit or violent material. 37 | * Posting or threatening to post other people’s personally identifying information ("doxing"). 38 | * Personal insults, particularly those related to gender, sexual orientation, race, religion, or disability. 39 | * Inappropriate photography or recording. 40 | * Inappropriate physical contact. You should have someone’s consent before touching them. 41 | * Unwelcome sexual attention. This includes, sexualized comments or jokes; inappropriate touching, groping, and unwelcomed sexual advances. 42 | * Deliberate intimidation, stalking or following (online or in person). 43 | * Advocating for, or encouraging, any of the above behavior. 44 | * Sustained disruption of community events, including talks and presentations. 45 | 46 | ## 5. Consequences of Unacceptable Behavior 47 | 48 | Unacceptable behavior from any community member, including sponsors and those with decision-making authority, will not be tolerated. 49 | 50 | Anyone asked to stop unacceptable behavior is expected to comply immediately. 51 | 52 | If a community member engages in unacceptable behavior, the community organizers may take any action they deem appropriate, up to and including a temporary ban or permanent expulsion from the community without warning (and without refund in the case of a paid event). 53 | 54 | ## 6. Reporting Guidelines 55 | 56 | If you are subject to or witness unacceptable behavior, or have any other concerns, please notify a community organizer as soon as possible. bgreshake@googlemail.com. 57 | 58 | 59 | 60 | Additionally, community organizers are available to help community members engage with local law enforcement or to otherwise help those experiencing unacceptable behavior feel safe. In the context of in-person events, organizers will also provide escorts as desired by the person experiencing distress. 61 | 62 | ## 7. Addressing Grievances 63 | 64 | If you feel you have been falsely or unfairly accused of violating this Code of Conduct, you should notify Bastian Greshake Tzovaras with a concise description of your grievance. Your grievance will be handled in accordance with our existing governing policies. 65 | 66 | 67 | 68 | ## 8. Scope 69 | 70 | We expect all community participants (contributors, paid or otherwise; sponsors; and other guests) to abide by this Code of Conduct in all community venues–online and in-person–as well as in all one-on-one communications pertaining to community business. 71 | 72 | This code of conduct and its related procedures also applies to unacceptable behavior occurring outside the scope of community activities when such behavior has the potential to adversely affect the safety and well-being of community members. 73 | 74 | ## 9. Contact info 75 | 76 | bgreshake@googlemail.com 77 | 78 | ## 10. License and attribution 79 | 80 | This Code of Conduct is distributed under a [Creative Commons Attribution-ShareAlike license](http://creativecommons.org/licenses/by-sa/3.0/). 81 | 82 | Portions of text derived from the [Django Code of Conduct](https://www.djangoproject.com/conduct/) and the [Geek Feminism Anti-Harassment Policy](http://geekfeminism.wikia.com/wiki/Conference_anti-harassment/Policy). 83 | 84 | Retrieved on November 22, 2016 from [http://citizencodeofconduct.org/](http://citizencodeofconduct.org/) 85 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | ![](http://i0.kym-cdn.com/photos/images/original/001/027/798/5c8.gif) 3 | 4 | 🎉 Thanks so much for your interest in contributing! You're the best! 🎈 5 | 6 | Right now we don't have a detailed plan of what we want to work on next. But here are some important things to know. 7 | 8 | ### Overview 9 | 1. We do [have a Code of Conduct](https://github.com/gedankenstuecke/twitter-analyser/blob/master/CODE_OF_CONDUCT.md) that you should have read. 👍 10 | 2. Have a look at the open issues to see what could be improved. 11 | 12 | ### Missing things 13 | 1. We don't have any tests so far. If you feel that that is something you enjoy: Please go ahead! 😎 14 | 2. More data analysis! 15 | 16 | 17 | ### Making some $£¥€ 18 | In case you have a great idea for how to improve this project: Open Humans is [offering $5,000 as project grants for ideas that improve/grow the Open Humans eco-system](https://www.openhumans.org/grants/). And guess what: This project would totally qualify. So if you want to work on this, you could even be paid. 😂 19 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Install 2 | [TwArχiv](http://twarxiv.org) is a Django application that interfaces with [Open Humans](https://openhumans.org) 3 | for the file storage and user management and that is designed to be deployed to *Heroku*. As such there are some dependencies and intricacies that need to be taken into account. 4 | NOTE: We recommend installation on python3. 5 | 6 | ## Dependencies 7 | 8 | ### Database(s) 9 | TwArχiv uses two kinds of databases for short- and long-term storage. The short term storage (for managing tasks that are not run on the webserver) is done with `redis` while the long-term storage is done with `postgresql`. If you are deploying to the heroku production environment you just have to click the appropriate add-ons. 10 | 11 | For your development environment you have to install both `redis` and `postgresql` on your local machine. 12 | If you are running macOS and using `brew` (or are a user of `linuxbrew`) you can install both with the following commands: 13 | 14 | ``` 15 | brew install redis 16 | brew install postgresql 17 | ``` 18 | 19 | You can then run `redis-server` from your command line to start an instance of `redis`. 20 | The configuration of `postgres` can be a bit more involved. [Check out this blogpost for some tips](https://www.codementor.io/devops/tutorial/getting-started-postgresql-server-mac-osx). 21 | 22 | ### Python modules 23 | Django in general and TwArχiv in particular requires a larger set of `python` libraries. The current list can be found in the `requirements.txt` in this repository. Right now the requirements are the following: 24 | 25 | ``` 26 | gunicorn==19.7.1 27 | pytz==2017.2 28 | gender_guesser==0.4.0 29 | pandas==0.20.3 30 | tzwhere==3.0.3 31 | Django==1.11.3 32 | dj-database-url==0.4.2 33 | whitenoise==3.3.1 34 | psycopg2==2.7.3.1 35 | redis==2.10.6 36 | celery==4.1.0 37 | requests==2.18.4 38 | timezonefinder==2.1.2 39 | geojson==2.3.0 40 | arrow==0.12.0 41 | ``` 42 | 43 | If you are using `heroku` in your development environment it should take care of installing the modules listed in `requirements.txt` automatically. 44 | 45 | ## Create a Project on Open Humans 46 | We want to interface with Open Humans for our project. For this reason we need to create a research project on openhumans.org. After creating an account go to https://www.openhumans.org/direct-sharing/projects/manage/ 47 | and generate a new _OAuth_ project. The most important parts to get right are the `enrollment URL` and the `redirect URL`. For your development environment these should be the right URLs: 48 | 49 | ``` 50 | enrollment: http://127.0.0.1:5000/users/ 51 | redirect: http://127.0.0.1:5000/users/complete # no trailing slash! 52 | ``` 53 | 54 | ## Start development environment 55 | All good so far? Then we can now start developing in our local environment. 56 | I recommend using the `heroku-cli` interface to boot up both the `celery`-worker as well as the `gunicorn` webserver. You can install the CLI using `brew install heroku/brew/heroku` (or however that works on non-macs). If you are in the root directory of this repository and run `heroku local:run python manage.py migrate`, it will perform migrations to the database. `heroku local` will then use the `Procfile` to spawn local web & celery servers. If you've configured everything correctly you should be able to point your browser to `http://127.0.0.1:5000/` and see your very own copy of TwArχiv 57 | 58 | #### Heroku configuration 59 | `heroku` will try to read environment variables from `.env` for your local environment. Make sure you have such a file. It should contain the following keys: 60 | 61 | ``` 62 | REDIS_URL=redis:// # where is your redis server located? most likely at this url if in dev 63 | DATABASE_URL=postgres:///username # where does your postgres DB live? 64 | SECRET_KEY=foobar # the Django Secret Key 65 | ON_HEROKU=False # is our app deployed on heroku? 66 | OH_CLIENT_ID=NOT_A_KEY_EITHER # the client ID for your Open Humans project 67 | OH_CLIENT_SECRET=NOTAREALKEY # the secret key you get from Open Humans when creating a project. 68 | OH_ACTIVITY_PAGE=https://www.openhumans.org/activity/your-activity-name/ # What is your Project on Open Humans? 69 | APP_BASE_URL=http://127.0.0.1:5000/users # where is our app located? Open Humans wants to know 70 | 71 | PYTHONUNBUFFERED=true # make sure we can print to console 72 | ``` 73 | 74 | This file contains private data that would allow other parties to take over your project. So make sure that you **don't commit this file to your Git repository**. 75 | 76 | ## Deploy to `heroku` production 77 | Once it's set up it is as easy as running `git push heroku master`. For obvious reasons (see above) it won't have the `.env` file for setting the environment variables. For that reason you have to manually specify them for the production environment. The `heroku cli` makes this easy: 78 | 79 | ``` 80 | heroku config:set SECRET_KEY=foobar 81 | heroku config:set APP_BASE_URL=http://www.example.com 82 | ``` 83 | 84 | **Important:** Don't forget to set `ON_HEROKU=True`. Otherwise the automatic setup of your database environment will not work and you will wonder why the database migrations & queries won't work. 85 | 86 | **You don't have to set the `REDIS_URL` and `DATABASE_URL` in production. This will be done by heroku!** 87 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Bastian Greshake Tzovaras 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | coverage = "*" 8 | flake8 = "*" 9 | vcrpy = "*" 10 | requests-mock = "*" 11 | 12 | 13 | [packages] 14 | gunicorn = "*" 15 | pytz = "*" 16 | gender-guesser = "*" 17 | pandas = "*" 18 | tzwhere = "*" 19 | dj-database-url = "*" 20 | whitenoise = "*" 21 | psycopg2 = "*" 22 | redis = "*" 23 | celery = "*" 24 | requests = "*" 25 | timezonefinder = "*" 26 | geojson = "*" 27 | arrow = "*" 28 | kombu = "*" 29 | ijson = "*" 30 | Django = "*" 31 | 32 | [requires] 33 | python_version = "3.6" 34 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn twitteranalyser.wsgi --log-file=- 2 | worker: celery -A twitteranalyser worker --concurrency=1 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Welcome to the TwArχiv.

2 | 3 | [![Build Status](https://travis-ci.org/gedankenstuecke/twitter-analyser.svg?branch=master)](https://travis-ci.org/gedankenstuecke/twitter-analyser) 4 | [![Maintainability](https://api.codeclimate.com/v1/badges/8bb9400f864188116a91/maintainability)](https://codeclimate.com/github/gedankenstuecke/twitter-analyser/maintainability) 5 | [![Test Coverage](https://api.codeclimate.com/v1/badges/8bb9400f864188116a91/test_coverage)](https://codeclimate.com/github/gedankenstuecke/twitter-analyser/test_coverage) 6 | 7 | [![](docs/tweet_class.png)](http://twarxiv.org) 8 | [![](docs/gender_reply.png)](http://twarxiv.org) 9 | 10 | The [TwArχiv](http://twarxiv.org) is a *Twitter Archive Analyzer* that is designed to take in a complete *Twitter archive* as downloaded from Twitter.com and subsequently analyse it with respect to the number of tweets/replies etc. On the most basic level it displays how a person's tweeting frequency changes over time and how the proportion of tweets/retweets/replies changes over time. 11 | The [TwArχiv](http://twarxiv.org) additionally tries to predict the gender of the people that a given user retweets and replies to, potentially uncovering unconscious bias when it comes to online interactions. 12 | Furthermore, the [TwArχiv](http://twarxiv.org) uses geolocation to display a Tweet-location and movement profile for the user. 13 | 14 | ## Usage / Installation 15 | [TwArχiv](http://twarxiv.org) is a *Python* / *Django* application that has been designed to be deployed on *Heroku*. It is recommended to work with Python3 for installation. For the file storage and user management it interfaces with [Open Humans](https://openhumans.org) through *OAuth*. A deployed version can be seen live in action at [twarxiv.org](http://twarxiv.org). Read [the *INSTALL.md* for detailed installation instructions](https://github.com/gedankenstuecke/twitter-analyser/blob/master/INSTALL.md) 👍. 16 | 17 | ## Contributing 18 | We are always happy about new contributors! [Open an issue](https://github.com/gedankenstuecke/twitter-analyser/issues) if you find a bug or have feature ideas. If you want to contribute code please [head to our *CONTRIBUTING.md*](https://github.com/gedankenstuecke/twitter-analyser/blob/master/CONTRIBUTING.md). Also: You have a larger feature/analysis idea that's not in TwArχiv yet? [Open Humans is giving out grants of up to $5,000](https://www.openhumans.org/grants) for projects that help to grow their eco-system that are a perfect match for this! 19 | -------------------------------------------------------------------------------- /analyser.py: -------------------------------------------------------------------------------- 1 | import json 2 | import datetime 3 | import pytz 4 | import gender_guesser.detector as gender 5 | import pandas as pd 6 | from tzwhere import tzwhere 7 | gender_guesser = gender.Detector(case_sensitive=False) 8 | tzwhere_ = tzwhere.tzwhere() 9 | 10 | # READ JSON FILES FROM TWITTER ARCHIVE! 11 | 12 | 13 | def check_hashtag(single_tweet): 14 | '''check whether tweet has any hashtags''' 15 | return len(single_tweet['entities']['hashtags']) > 0 16 | 17 | 18 | def check_media(single_tweet): 19 | '''check whether tweet has any media attached''' 20 | return len(single_tweet['entities']['media']) > 0 21 | 22 | 23 | def check_url(single_tweet): 24 | '''check whether tweet has any urls attached''' 25 | return len(single_tweet['entities']['urls']) > 0 26 | 27 | 28 | def check_retweet(single_tweet): 29 | ''' 30 | check whether tweet is a RT. If yes: 31 | return name & user name of the RT'd user. 32 | otherwise just return nones 33 | ''' 34 | if 'retweeted_status' in single_tweet.keys(): 35 | return (single_tweet['retweeted_status']['user']['screen_name'], 36 | single_tweet['retweeted_status']['user']['name']) 37 | else: 38 | return (None, None) 39 | 40 | 41 | def check_coordinates(single_tweet): 42 | ''' 43 | check whether tweet has coordinates attached. 44 | if yes return the coordinates 45 | otherwise just return nones 46 | ''' 47 | if 'coordinates' in single_tweet['geo'].keys(): 48 | return (single_tweet['geo']['coordinates'][0], 49 | single_tweet['geo']['coordinates'][1]) 50 | else: 51 | return (None, None) 52 | 53 | 54 | def check_reply_to(single_tweet): 55 | ''' 56 | check whether tweet is a reply. If yes: 57 | return name & user name of the user that's replied to. 58 | otherwise just return nones 59 | ''' 60 | if 'in_reply_to_screen_name' in single_tweet.keys(): 61 | name = None 62 | for user in single_tweet['entities']['user_mentions']: 63 | if user['screen_name'] == single_tweet['in_reply_to_screen_name']: 64 | name = user['name'] 65 | break 66 | return (single_tweet['in_reply_to_screen_name'], name) 67 | else: 68 | return (None, None) 69 | 70 | 71 | def convert_time(coordinates, time_utc): 72 | ''' 73 | Does this tweet have a geo location? if yes 74 | we can easily convert the UTC timestamp to true local time! 75 | otherwise return nones 76 | ''' 77 | if coordinates[0] and coordinates[1]: 78 | timezone_str = tzwhere_.tzNameAt(coordinates[0], coordinates[1]) 79 | if timezone_str: 80 | timezone = pytz.timezone(timezone_str) 81 | time_obj_local = datetime.datetime.astimezone(time_utc, timezone) 82 | return time_obj_local 83 | 84 | 85 | def create_dataframe(tweets): 86 | ''' 87 | create a pandas dataframe from our tweet jsons 88 | ''' 89 | 90 | # initalize empty lists 91 | utc_time = [] 92 | longitude = [] 93 | latitude = [] 94 | local_time = [] 95 | hashtag = [] 96 | media = [] 97 | url = [] 98 | retweet_user_name = [] 99 | retweet_name = [] 100 | reply_user_name = [] 101 | reply_name = [] 102 | text = [] 103 | # iterate over all tweets and extract data 104 | for single_tweet in tweets: 105 | utc_time.append(datetime.datetime.strptime( 106 | single_tweet['created_at'], '%Y-%m-%d %H:%M:%S %z')) 107 | coordinates = check_coordinates(single_tweet) 108 | latitude.append(coordinates[0]) 109 | longitude.append(coordinates[1]) 110 | local_time.append(convert_time(coordinates, datetime.datetime.strptime( 111 | single_tweet['created_at'], '%Y-%m-%d %H:%M:%S %z'))) 112 | hashtag.append(check_hashtag(single_tweet)) 113 | media.append(check_media(single_tweet)) 114 | url.append(check_url(single_tweet)) 115 | retweet = check_retweet(single_tweet) 116 | retweet_user_name.append(retweet[0]) 117 | retweet_name.append(retweet[1]) 118 | reply = check_reply_to(single_tweet) 119 | reply_user_name.append(reply[0]) 120 | reply_name.append(reply[1]) 121 | text.append(single_tweet['text']) 122 | # convert the whole shebang into a pandas dataframe 123 | dataframe = pd.DataFrame(data={ 124 | 'utc_time': utc_time, 125 | 'local_time': local_time, 126 | 'latitude': latitude, 127 | 'longitude': longitude, 128 | 'hashtag': hashtag, 129 | 'media': media, 130 | 'url': url, 131 | 'retweet_user_name': retweet_user_name, 132 | 'retweet_name': retweet_name, 133 | 'reply_user_name': reply_user_name, 134 | 'reply_name': reply_name, 135 | 'text': text 136 | }) 137 | return dataframe 138 | 139 | 140 | def read_file_index(index_file): 141 | ''' 142 | read file that lists all 143 | tweet-containing json files 144 | ''' 145 | with open(index_file) as f: 146 | d = f.readlines()[1:] 147 | d = "".join(d) 148 | d = "[{" + d 149 | files = json.loads(d) 150 | return files 151 | 152 | 153 | def read_single_file(fpath): 154 | ''' 155 | read in the json of a single tweet.json 156 | ''' 157 | with open(fpath) as f: 158 | d = f.readlines()[1:] 159 | d = "".join(d) 160 | tweets = json.loads(d) 161 | return tweets 162 | 163 | 164 | def read_files(file_list, base_path): 165 | ''' 166 | use the file list as generated by 167 | read_file_index() to read in the json 168 | of all tweet.json files and convert them 169 | into individual data frames. 170 | Returns them so far not concatenated 171 | ''' 172 | data_frames = [] 173 | for single_file in file_list: 174 | tweets = read_single_file(base_path + '/' + single_file['file_name']) 175 | df_tweets = create_dataframe(tweets) 176 | data_frames.append(df_tweets) 177 | return data_frames 178 | 179 | 180 | def create_main_dataframe(tweet_index='twitter_archive/data/js/tweet_index.js', 181 | base_directory='twitter_archive'): 182 | file_index = read_file_index(tweet_index) 183 | dataframes = read_files(file_index, base_directory) 184 | dataframe = pd.concat(dataframes) 185 | dataframe = dataframe.sort_values('utc_time', ascending=False) 186 | dataframe = dataframe.set_index('utc_time') 187 | dataframe = dataframe.replace(to_replace={ 188 | 'url': {False: None}, 189 | 'hashtag': {False: None}, 190 | 'media': {False: None} 191 | }) 192 | return dataframe 193 | 194 | # GENERATE JSON FOR GRAPHING ON THE WEB 195 | 196 | 197 | def create_all_tweets(dataframe, rolling_frame='180d'): 198 | dataframe_grouped = dataframe.groupby(dataframe.index.date).count() 199 | dataframe_grouped.index = pd.to_datetime(dataframe_grouped.index) 200 | dataframe_mean_week = dataframe_grouped.rolling(rolling_frame).mean() 201 | 202 | 203 | def create_hourly_stats(dataframe): 204 | def get_hour(x): return x.hour 205 | 206 | def get_weekday(x): return x.weekday() 207 | 208 | local_times = dataframe.copy() 209 | local_times = local_times.loc[dataframe['local_time'].notnull()] 210 | 211 | local_times['weekday'] = local_times['local_time'].apply(get_weekday) 212 | local_times['hour'] = local_times['local_time'].apply(get_hour) 213 | 214 | local_times = local_times.replace(to_replace={'weekday': 215 | {0: 'Weekday', 216 | 1: 'Weekday', 217 | 2: 'Weekday', 218 | 3: 'Weekday', 219 | 4: 'Weekday', 220 | 5: 'Weekend', 221 | 6: 'Weekend', 222 | } 223 | }) 224 | 225 | local_times = local_times.groupby( 226 | [local_times['hour'], local_times['weekday']]).size().reset_index() 227 | local_times['values'] = local_times[0] 228 | local_times = local_times.set_index(local_times['hour']) 229 | 230 | return local_times.pivot(columns='weekday', values='values').reset_index() 231 | 232 | 233 | def predict_gender(dataframe, column_name, rolling_frame='180d'): 234 | ''' 235 | take full dataframe w/ tweets and extract 236 | gender for a name-column where applicable 237 | returns two-column df w/ timestamp & gender 238 | ''' 239 | def splitter(x): return x.split()[0] 240 | temp = dataframe[column_name].notnull() 241 | gender_column = dataframe.loc[temp][column_name].apply( 242 | splitter).apply( 243 | gender_guesser.get_gender) 244 | 245 | gender_dataframe = pd.DataFrame(data={ 246 | 'time': list(gender_column.index), 247 | 'gender': list(gender_column) 248 | }) 249 | 250 | gender_dataframe = gender_dataframe.set_index('time') 251 | group = [gender_dataframe.index.date, gender_dataframe['gender']] 252 | gender_dataframe_tab = gender_dataframe.groupby(group).size().reset_index() 253 | gender_dataframe_tab['date'] = gender_dataframe_tab['level_0'] 254 | gender_dataframe_tab['count'] = gender_dataframe_tab[0] 255 | gender_dataframe_tab = gender_dataframe_tab.drop([0, 'level_0'], axis=1) 256 | gender_dataframe_tab = gender_dataframe_tab.set_index('date') 257 | gender_dataframe_tab.index = pd.to_datetime(gender_dataframe_tab.index) 258 | gdf_pivot = gender_dataframe_tab.pivot(columns='gender', values='count') 259 | gdf_pivot = gdf_pivot.rolling(rolling_frame).mean() 260 | gdf_pivot = gdf_pivot.reset_index() 261 | gdf_pivot['date'] = gdf_pivot['date'].astype(str) 262 | gdf_pivot = gdf_pivot.drop( 263 | ['mostly_male', 'mostly_female', 'andy', 'unknown'], axis=1) 264 | return gdf_pivot 265 | 266 | # DUMP JSON FOR GRAPHING 267 | 268 | 269 | def write_json_for_graph(dataframe, 270 | outfile='graph.json', 271 | format='records'): 272 | json_object = dataframe.to_json(orient=format) 273 | with open(outfile, 'w') as f: 274 | f.write(json_object) 275 | 276 | 277 | def __main__(): 278 | dataframe = create_main_dataframe() 279 | retweet_gender = predict_gender(dataframe,'retweet_name','180d') 280 | write_json_for_graph(retweet_gender,'gender_rt.json') 281 | reply_gender = predict_gender(dataframe,'reply_name','180d') 282 | write_json_for_graph(reply_gender, 'gender_reply.json') 283 | 284 | if __name__ == "__main__": 285 | main() 286 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | coverage==4.5.1 2 | flake8==3.5.0 3 | vcrpy==1.11.1 4 | requests-mock==1.4 5 | -------------------------------------------------------------------------------- /docs/gender_reply.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/docs/gender_reply.png -------------------------------------------------------------------------------- /docs/tweet_class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/docs/tweet_class.png -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "twitteranalyser.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError: 10 | # The above import may fail for some other reason. Ensure that the 11 | # issue is really that Django is missing to avoid masking other 12 | # exceptions on Python 2. 13 | try: 14 | import django 15 | except ImportError: 16 | raise ImportError( 17 | "Couldn't import Django. Are you sure it's installed and " 18 | "available on your PYTHONPATH environment variable? Did you " 19 | "forget to activate a virtual environment?" 20 | ) 21 | raise 22 | execute_from_command_line(sys.argv) 23 | -------------------------------------------------------------------------------- /new_tweet_subset.js: -------------------------------------------------------------------------------- 1 | window.YTD.tweet.part0 = [ { 2 | "retweeted" : false, 3 | "source" : "Tweetbot for iΟS", 4 | "entities" : { 5 | "hashtags" : [ ], 6 | "symbols" : [ ], 7 | "user_mentions" : [ { 8 | "name" : "Philipp Bayer", 9 | "screen_name" : "PhilippBayer", 10 | "indices" : [ "16", "29" ], 11 | "id_str" : "121777206", 12 | "id" : "121777206" 13 | } ], 14 | "urls" : [ { 15 | "url" : "https://t.co/w8pz21j6Vv", 16 | "expanded_url" : "https://www.goodreads.com/review/show/652888163", 17 | "display_url" : "goodreads.com/review/show/65…", 18 | "indices" : [ "68", "91" ] 19 | } ] 20 | }, 21 | "display_text_range" : [ "0", "91" ], 22 | "favorite_count" : "1", 23 | "in_reply_to_status_id_str" : "1159965221924024320", 24 | "id_str" : "1159965503051485184", 25 | "in_reply_to_user_id" : "14286491", 26 | "truncated" : false, 27 | "retweet_count" : "0", 28 | "id" : "1159965503051485184", 29 | "in_reply_to_status_id" : "1159965221924024320", 30 | "possibly_sensitive" : false, 31 | "created_at" : "Fri Aug 09 23:11:41 +0000 2019", 32 | "favorited" : false, 33 | "full_text" : "Read the review @PhilippBayer wrote on the mentioned autobiography. https://t.co/w8pz21j6Vv", 34 | "lang" : "en", 35 | "in_reply_to_screen_name" : "gedankenstuecke", 36 | "in_reply_to_user_id_str" : "14286491" 37 | }, { 38 | "retweeted" : false, 39 | "source" : "Tweetbot for iΟS", 40 | "entities" : { 41 | "hashtags" : [ ], 42 | "symbols" : [ ], 43 | "user_mentions" : [ ], 44 | "urls" : [ { 45 | "url" : "https://t.co/Vqhq5TAdBV", 46 | "expanded_url" : "https://twitter.com/DavidBLowry/status/1159836767958138880", 47 | "display_url" : "twitter.com/DavidBLowry/st…", 48 | "indices" : [ "114", "137" ] 49 | } ] 50 | }, 51 | "display_text_range" : [ "0", "137" ], 52 | "favorite_count" : "2", 53 | "id_str" : "1159965221924024320", 54 | "truncated" : false, 55 | "retweet_count" : "0", 56 | "id" : "1159965221924024320", 57 | "possibly_sensitive" : false, 58 | "created_at" : "Fri Aug 09 23:10:34 +0000 2019", 59 | "favorited" : false, 60 | "full_text" : "He and the glowing raccoons are finally reunited, reading horoscopes and denying any link between HIV & AIDS. https://t.co/Vqhq5TAdBV", 61 | "lang" : "en" 62 | }, { 63 | "retweeted" : false, 64 | "source" : "Tweetbot for iΟS", 65 | "entities" : { 66 | "hashtags" : [ ], 67 | "symbols" : [ ], 68 | "user_mentions" : [ { 69 | "name" : "Nazeefa \uD83C\uDF40☄", 70 | "screen_name" : "NazeefaFatima", 71 | "indices" : [ "0", "14" ], 72 | "id_str" : "37054704", 73 | "id" : "37054704" 74 | } ], 75 | "urls" : [ ] 76 | }, 77 | "display_text_range" : [ "0", "57" ], 78 | "favorite_count" : "1", 79 | "in_reply_to_status_id_str" : "1159823677703282688", 80 | "id_str" : "1159852950254227461", 81 | "in_reply_to_user_id" : "37054704", 82 | "truncated" : false, 83 | "retweet_count" : "0", 84 | "id" : "1159852950254227461", 85 | "in_reply_to_status_id" : "1159823677703282688", 86 | "created_at" : "Fri Aug 09 15:44:27 +0000 2019", 87 | "favorited" : false, 88 | "full_text" : "@NazeefaFatima It was so great to finally meet in person!", 89 | "lang" : "en", 90 | "in_reply_to_screen_name" : "NazeefaFatima", 91 | "in_reply_to_user_id_str" : "37054704" 92 | }, { 93 | "retweeted" : false, 94 | "source" : "Twitter Web App", 95 | "entities" : { 96 | "user_mentions" : [ { 97 | "name" : "Philip Ellis", 98 | "screen_name" : "Philip_Ellis", 99 | "indices" : [ "3", "16" ], 100 | "id_str" : "222444337", 101 | "id" : "222444337" 102 | } ], 103 | "urls" : [ ], 104 | "symbols" : [ ], 105 | "media" : [ { 106 | "expanded_url" : "https://twitter.com/zbgolia/status/1154777994252161026/video/1", 107 | "source_status_id" : "1154777994252161026", 108 | "indices" : [ "88", "111" ], 109 | "url" : "https://t.co/kiSlwXQ9Fn", 110 | "media_url" : "http://pbs.twimg.com/ext_tw_video_thumb/1154777848395304961/pu/img/DCzgIYqzvou0VeLs.jpg", 111 | "id_str" : "1154777848395304961", 112 | "source_user_id" : "259113321", 113 | "id" : "1154777848395304961", 114 | "media_url_https" : "https://pbs.twimg.com/ext_tw_video_thumb/1154777848395304961/pu/img/DCzgIYqzvou0VeLs.jpg", 115 | "source_user_id_str" : "259113321", 116 | "sizes" : { 117 | "thumb" : { 118 | "w" : "150", 119 | "h" : "150", 120 | "resize" : "crop" 121 | }, 122 | "medium" : { 123 | "w" : "1200", 124 | "h" : "675", 125 | "resize" : "fit" 126 | }, 127 | "small" : { 128 | "w" : "680", 129 | "h" : "383", 130 | "resize" : "fit" 131 | }, 132 | "large" : { 133 | "w" : "1280", 134 | "h" : "720", 135 | "resize" : "fit" 136 | } 137 | }, 138 | "type" : "photo", 139 | "source_status_id_str" : "1154777994252161026", 140 | "display_url" : "pic.twitter.com/kiSlwXQ9Fn" 141 | } ], 142 | "hashtags" : [ ] 143 | }, 144 | "display_text_range" : [ "0", "111" ], 145 | "favorite_count" : "0", 146 | "id_str" : "1159146575622475776", 147 | "truncated" : false, 148 | "retweet_count" : "0", 149 | "id" : "1159146575622475776", 150 | "possibly_sensitive" : false, 151 | "created_at" : "Wed Aug 07 16:57:34 +0000 2019", 152 | "favorited" : false, 153 | "full_text" : "RT @Philip_Ellis: [JOB INTERVIEW]\n\nInterviewer: Do you have any questions for us?\n\nMe: https://t.co/kiSlwXQ9Fn", 154 | "lang" : "en", 155 | "extended_entities" : { 156 | "media" : [ { 157 | "expanded_url" : "https://twitter.com/zbgolia/status/1154777994252161026/video/1", 158 | "source_status_id" : "1154777994252161026", 159 | "indices" : [ "88", "111" ], 160 | "url" : "https://t.co/kiSlwXQ9Fn", 161 | "media_url" : "http://pbs.twimg.com/ext_tw_video_thumb/1154777848395304961/pu/img/DCzgIYqzvou0VeLs.jpg", 162 | "id_str" : "1154777848395304961", 163 | "video_info" : { 164 | "aspect_ratio" : [ "16", "9" ], 165 | "duration_millis" : "45412", 166 | "variants" : [ { 167 | "bitrate" : "256000", 168 | "content_type" : "video/mp4", 169 | "url" : "https://video.twimg.com/ext_tw_video/1154777848395304961/pu/vid/480x270/9dXRMkihwUJrcedP.mp4?tag=10" 170 | }, { 171 | "bitrate" : "832000", 172 | "content_type" : "video/mp4", 173 | "url" : "https://video.twimg.com/ext_tw_video/1154777848395304961/pu/vid/640x360/XI7BsQJUDBaKeBz9.mp4?tag=10" 174 | }, { 175 | "content_type" : "application/x-mpegURL", 176 | "url" : "https://video.twimg.com/ext_tw_video/1154777848395304961/pu/pl/EzF1MVQbpeN4oXWg.m3u8?tag=10" 177 | }, { 178 | "bitrate" : "2176000", 179 | "content_type" : "video/mp4", 180 | "url" : "https://video.twimg.com/ext_tw_video/1154777848395304961/pu/vid/1280x720/g60m28vGjo1uKFrj.mp4?tag=10" 181 | } ] 182 | }, 183 | "source_user_id" : "259113321", 184 | "additional_media_info" : { 185 | "monetizable" : false 186 | }, 187 | "id" : "1154777848395304961", 188 | "media_url_https" : "https://pbs.twimg.com/ext_tw_video_thumb/1154777848395304961/pu/img/DCzgIYqzvou0VeLs.jpg", 189 | "source_user_id_str" : "259113321", 190 | "sizes" : { 191 | "thumb" : { 192 | "w" : "150", 193 | "h" : "150", 194 | "resize" : "crop" 195 | }, 196 | "medium" : { 197 | "w" : "1200", 198 | "h" : "675", 199 | "resize" : "fit" 200 | }, 201 | "small" : { 202 | "w" : "680", 203 | "h" : "383", 204 | "resize" : "fit" 205 | }, 206 | "large" : { 207 | "w" : "1280", 208 | "h" : "720", 209 | "resize" : "fit" 210 | } 211 | }, 212 | "type" : "video", 213 | "source_status_id_str" : "1154777994252161026", 214 | "display_url" : "pic.twitter.com/kiSlwXQ9Fn" 215 | } ] 216 | } 217 | }, { 218 | "retweeted" : false, 219 | "source" : "Tweetbot for iΟS", 220 | "entities" : { 221 | "hashtags" : [ ], 222 | "symbols" : [ ], 223 | "user_mentions" : [ { 224 | "name" : "liubov", 225 | "screen_name" : "luyibov", 226 | "indices" : [ "0", "8" ], 227 | "id_str" : "2889619139", 228 | "id" : "2889619139" 229 | }, { 230 | "name" : "Jake Wintermute", 231 | "screen_name" : "SynBio1", 232 | "indices" : [ "9", "17" ], 233 | "id_str" : "2570913493", 234 | "id" : "2570913493" 235 | }, { 236 | "name" : "marc santolini", 237 | "screen_name" : "msantolini", 238 | "indices" : [ "18", "29" ], 239 | "id_str" : "299603744", 240 | "id" : "299603744" 241 | }, { 242 | "name" : "Roberto Toro", 243 | "screen_name" : "R3RT0", 244 | "indices" : [ "30", "36" ], 245 | "id_str" : "2231179117", 246 | "id" : "2231179117" 247 | }, { 248 | "name" : "katja heuer", 249 | "screen_name" : "katjaQheuer", 250 | "indices" : [ "37", "49" ], 251 | "id_str" : "2981013099", 252 | "id" : "2981013099" 253 | }, { 254 | "name" : "Jon Tennant", 255 | "screen_name" : "Protohedgehog", 256 | "indices" : [ "50", "64" ], 257 | "id_str" : "352650591", 258 | "id" : "352650591" 259 | } ], 260 | "urls" : [ ] 261 | }, 262 | "display_text_range" : [ "0", "126" ], 263 | "favorite_count" : "3", 264 | "in_reply_to_status_id_str" : "1159132637795160069", 265 | "id_str" : "1159140448377724929", 266 | "in_reply_to_user_id" : "2889619139", 267 | "truncated" : false, 268 | "retweet_count" : "0", 269 | "id" : "1159140448377724929", 270 | "in_reply_to_status_id" : "1159132637795160069", 271 | "created_at" : "Wed Aug 07 16:33:13 +0000 2019", 272 | "favorited" : false, 273 | "full_text" : "@luyibov @SynBio1 @msantolini @R3RT0 @katjaQheuer @Protohedgehog I’m not an expert. But I think these are called “stripes”. :p", 274 | "lang" : "en", 275 | "in_reply_to_screen_name" : "luyibov", 276 | "in_reply_to_user_id_str" : "2889619139" 277 | }, { 278 | "retweeted" : false, 279 | "source" : "Tweetbot for iΟS", 280 | "entities" : { 281 | "hashtags" : [ ], 282 | "symbols" : [ ], 283 | "user_mentions" : [ { 284 | "name" : "Bastian Greshake Tzovaras", 285 | "screen_name" : "gedankenstuecke", 286 | "indices" : [ "3", "19" ], 287 | "id_str" : "14286491", 288 | "id" : "14286491" 289 | } ], 290 | "urls" : [ { 291 | "url" : "https://t.co/Xq0JEgsRhW", 292 | "expanded_url" : "https://twitter.com/Michael__Rera/status/1158374078996242432", 293 | "display_url" : "twitter.com/Michael__Rera/…", 294 | "indices" : [ "76", "99" ] 295 | } ] 296 | }, 297 | "display_text_range" : [ "0", "99" ], 298 | "favorite_count" : "0", 299 | "id_str" : "1159080024080887809", 300 | "truncated" : false, 301 | "retweet_count" : "0", 302 | "id" : "1159080024080887809", 303 | "possibly_sensitive" : false, 304 | "created_at" : "Wed Aug 07 12:33:07 +0000 2019", 305 | "favorited" : false, 306 | "full_text" : "RT @gedankenstuecke: That would be me, looking for an apartment in Paris. \uD83D\uDE00 https://t.co/Xq0JEgsRhW", 307 | "lang" : "en" 308 | }, { 309 | "retweeted" : false, 310 | "source" : "Tweetbot for iΟS", 311 | "entities" : { 312 | "hashtags" : [ ], 313 | "symbols" : [ ], 314 | "user_mentions" : [ { 315 | "name" : "marc santolini", 316 | "screen_name" : "msantolini", 317 | "indices" : [ "0", "11" ], 318 | "id_str" : "299603744", 319 | "id" : "299603744" 320 | }, { 321 | "name" : "Jon Tennant", 322 | "screen_name" : "Protohedgehog", 323 | "indices" : [ "12", "26" ], 324 | "id_str" : "352650591", 325 | "id" : "352650591" 326 | } ], 327 | "urls" : [ ] 328 | }, 329 | "display_text_range" : [ "0", "75" ], 330 | "favorite_count" : "0", 331 | "in_reply_to_status_id_str" : "1158848834556051456", 332 | "id_str" : "1158849711681523713", 333 | "in_reply_to_user_id" : "299603744", 334 | "truncated" : false, 335 | "retweet_count" : "0", 336 | "id" : "1158849711681523713", 337 | "in_reply_to_status_id" : "1158848834556051456", 338 | "created_at" : "Tue Aug 06 21:17:56 +0000 2019", 339 | "favorited" : false, 340 | "full_text" : "@msantolini @Protohedgehog I should have put the scare quotes: “Restaurant”", 341 | "lang" : "en", 342 | "in_reply_to_screen_name" : "msantolini", 343 | "in_reply_to_user_id_str" : "299603744" 344 | }, { 345 | "retweeted" : false, 346 | "source" : "Tweetbot for iΟS", 347 | "entities" : { 348 | "hashtags" : [ ], 349 | "symbols" : [ ], 350 | "user_mentions" : [ { 351 | "name" : "marc santolini", 352 | "screen_name" : "msantolini", 353 | "indices" : [ "0", "11" ], 354 | "id_str" : "299603744", 355 | "id" : "299603744" 356 | }, { 357 | "name" : "Jon Tennant", 358 | "screen_name" : "Protohedgehog", 359 | "indices" : [ "12", "26" ], 360 | "id_str" : "352650591", 361 | "id" : "352650591" 362 | } ], 363 | "urls" : [ ] 364 | }, 365 | "display_text_range" : [ "0", "107" ], 366 | "favorite_count" : "4", 367 | "in_reply_to_status_id_str" : "1158836352387100677", 368 | "id_str" : "1158836779841150978", 369 | "in_reply_to_user_id" : "299603744", 370 | "truncated" : false, 371 | "retweet_count" : "0", 372 | "id" : "1158836779841150978", 373 | "in_reply_to_status_id" : "1158836352387100677", 374 | "created_at" : "Tue Aug 06 20:26:33 +0000 2019", 375 | "favorited" : false, 376 | "full_text" : "@msantolini @Protohedgehog Yep, traditionally the only vegetarian thing you can eat in German restaurants \uD83D\uDE02", 377 | "lang" : "en", 378 | "in_reply_to_screen_name" : "msantolini", 379 | "in_reply_to_user_id_str" : "299603744" 380 | }, { 381 | "retweeted" : false, 382 | "source" : "Tweetbot for iΟS", 383 | "entities" : { 384 | "hashtags" : [ ], 385 | "symbols" : [ ], 386 | "user_mentions" : [ { 387 | "name" : "Michael Rera", 388 | "screen_name" : "Michael__Rera", 389 | "indices" : [ "0", "14" ], 390 | "id_str" : "4075485214", 391 | "id" : "4075485214" 392 | }, { 393 | "name" : "KIEZ Bistro Allemand", 394 | "screen_name" : "KIEZ_Bistro", 395 | "indices" : [ "15", "27" ], 396 | "id_str" : "2514706255", 397 | "id" : "2514706255" 398 | } ], 399 | "urls" : [ ] 400 | }, 401 | "display_text_range" : [ "0", "177" ], 402 | "favorite_count" : "1", 403 | "in_reply_to_status_id_str" : "1158835594744799239", 404 | "id_str" : "1158836067337940992", 405 | "in_reply_to_user_id" : "4075485214", 406 | "truncated" : false, 407 | "retweet_count" : "1", 408 | "id" : "1158836067337940992", 409 | "in_reply_to_status_id" : "1158835594744799239", 410 | "created_at" : "Tue Aug 06 20:23:43 +0000 2019", 411 | "favorited" : false, 412 | "full_text" : "@Michael__Rera @KIEZ_Bistro In good German tradition we were called ‘boring people’ (Langweiler) all evening for eating the vegetarian version of Käsespätzle. Would go again! :D", 413 | "lang" : "en", 414 | "in_reply_to_screen_name" : "Michael__Rera", 415 | "in_reply_to_user_id_str" : "4075485214" 416 | }, { 417 | "retweeted" : false, 418 | "source" : "Tweetbot for iΟS", 419 | "entities" : { 420 | "user_mentions" : [ { 421 | "name" : "Michael Rera", 422 | "screen_name" : "Michael__Rera", 423 | "indices" : [ "5", "19" ], 424 | "id_str" : "4075485214", 425 | "id" : "4075485214" 426 | } ], 427 | "urls" : [ ], 428 | "symbols" : [ ], 429 | "media" : [ { 430 | "expanded_url" : "https://twitter.com/gedankenstuecke/status/1158806049874436096/photo/1", 431 | "indices" : [ "100", "123" ], 432 | "url" : "https://t.co/UYfuNLdOmD", 433 | "media_url" : "http://pbs.twimg.com/media/EBTn72vWsAA9L4H.jpg", 434 | "id_str" : "1158806019633491968", 435 | "id" : "1158806019633491968", 436 | "media_url_https" : "https://pbs.twimg.com/media/EBTn72vWsAA9L4H.jpg", 437 | "sizes" : { 438 | "thumb" : { 439 | "w" : "150", 440 | "h" : "150", 441 | "resize" : "crop" 442 | }, 443 | "medium" : { 444 | "w" : "1200", 445 | "h" : "900", 446 | "resize" : "fit" 447 | }, 448 | "small" : { 449 | "w" : "680", 450 | "h" : "510", 451 | "resize" : "fit" 452 | }, 453 | "large" : { 454 | "w" : "2048", 455 | "h" : "1536", 456 | "resize" : "fit" 457 | } 458 | }, 459 | "type" : "photo", 460 | "display_url" : "pic.twitter.com/UYfuNLdOmD" 461 | } ], 462 | "hashtags" : [ ] 463 | }, 464 | "display_text_range" : [ "0", "123" ], 465 | "favorite_count" : "4", 466 | "id_str" : "1158806049874436096", 467 | "truncated" : false, 468 | "retweet_count" : "1", 469 | "id" : "1158806049874436096", 470 | "possibly_sensitive" : false, 471 | "created_at" : "Tue Aug 06 18:24:26 +0000 2019", 472 | "favorited" : false, 473 | "full_text" : "When @Michael__Rera picks where you should go for dinner and he directs you to a German beer place. https://t.co/UYfuNLdOmD", 474 | "lang" : "en", 475 | "extended_entities" : { 476 | "media" : [ { 477 | "expanded_url" : "https://twitter.com/gedankenstuecke/status/1158806049874436096/photo/1", 478 | "indices" : [ "100", "123" ], 479 | "url" : "https://t.co/UYfuNLdOmD", 480 | "media_url" : "http://pbs.twimg.com/media/EBTn72vWsAA9L4H.jpg", 481 | "id_str" : "1158806019633491968", 482 | "id" : "1158806019633491968", 483 | "media_url_https" : "https://pbs.twimg.com/media/EBTn72vWsAA9L4H.jpg", 484 | "sizes" : { 485 | "thumb" : { 486 | "w" : "150", 487 | "h" : "150", 488 | "resize" : "crop" 489 | }, 490 | "medium" : { 491 | "w" : "1200", 492 | "h" : "900", 493 | "resize" : "fit" 494 | }, 495 | "small" : { 496 | "w" : "680", 497 | "h" : "510", 498 | "resize" : "fit" 499 | }, 500 | "large" : { 501 | "w" : "2048", 502 | "h" : "1536", 503 | "resize" : "fit" 504 | } 505 | }, 506 | "type" : "photo", 507 | "display_url" : "pic.twitter.com/UYfuNLdOmD" 508 | } ] 509 | } 510 | }, { 511 | "retweeted" : false, 512 | "source" : "Tweetbot for Mac", 513 | "entities" : { 514 | "hashtags" : [ ], 515 | "symbols" : [ ], 516 | "user_mentions" : [ { 517 | "name" : "Hervé Ménager", 518 | "screen_name" : "rvmngr", 519 | "indices" : [ "0", "7" ], 520 | "id_str" : "632591287", 521 | "id" : "632591287" 522 | } ], 523 | "urls" : [ ] 524 | }, 525 | "display_text_range" : [ "0", "32" ], 526 | "favorite_count" : "1", 527 | "in_reply_to_status_id_str" : "1158467245313581056", 528 | "id_str" : "1158520189102825475", 529 | "in_reply_to_user_id" : "632591287", 530 | "truncated" : false, 531 | "retweet_count" : "0", 532 | "id" : "1158520189102825475", 533 | "in_reply_to_status_id" : "1158467245313581056", 534 | "created_at" : "Mon Aug 05 23:28:32 +0000 2019", 535 | "favorited" : false, 536 | "full_text" : "@rvmngr Awesome, thanks so much!", 537 | "lang" : "en", 538 | "in_reply_to_screen_name" : "rvmngr", 539 | "in_reply_to_user_id_str" : "632591287" 540 | }] 541 | -------------------------------------------------------------------------------- /static/foo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/static/foo -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | import celery 2 | import os 3 | app = celery.Celery('example') 4 | app.conf.update(BROKER_URL=os.environ['REDIS_URL'], 5 | CELERY_RESULT_BACKEND=os.environ['REDIS_URL']) 6 | 7 | 8 | @app.task 9 | def add(x, y): 10 | return x + y 11 | -------------------------------------------------------------------------------- /test_archive_2016_2017_2_months.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/test_archive_2016_2017_2_months.zip -------------------------------------------------------------------------------- /tweet_display/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/tweet_display/__init__.py -------------------------------------------------------------------------------- /tweet_display/admin.py: -------------------------------------------------------------------------------- 1 | # Register your models here. 2 | -------------------------------------------------------------------------------- /tweet_display/analyse_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import gender_guesser.detector as gender 3 | import geojson 4 | gender_guesser = gender.Detector(case_sensitive=False) 5 | 6 | 7 | def predict_gender(dataframe, column_name, rolling_frame='180d'): 8 | ''' 9 | take full dataframe w/ tweets and extract 10 | gender for a name-column where applicable 11 | returns two-column df w/ timestamp & gender 12 | ''' 13 | def splitter(x): return ''.join(x.split()[:1]) 14 | temp = dataframe[column_name].notnull() 15 | gender_column = dataframe.loc[temp][column_name].apply( 16 | splitter).apply( 17 | gender_guesser.get_gender) 18 | 19 | gender_dataframe = pd.DataFrame(data={ 20 | 'time': list(gender_column.index), 21 | 'gender': list(gender_column) 22 | }) 23 | 24 | gender_dataframe = gender_dataframe.set_index('time') 25 | group = [gender_dataframe.index.date, gender_dataframe['gender']] 26 | gender_dataframe_tab = gender_dataframe.groupby(group).size().reset_index() 27 | gender_dataframe_tab['date'] = gender_dataframe_tab['level_0'] 28 | gender_dataframe_tab['count'] = gender_dataframe_tab[0] 29 | gender_dataframe_tab = gender_dataframe_tab.drop([0, 'level_0'], axis=1) 30 | gender_dataframe_tab = gender_dataframe_tab.set_index('date') 31 | gender_dataframe_tab.index = pd.to_datetime(gender_dataframe_tab.index) 32 | gdf_pivot = gender_dataframe_tab.pivot(columns='gender', values='count') 33 | gdf_pivot = gdf_pivot.rolling(rolling_frame).mean() 34 | gdf_pivot = gdf_pivot.reset_index() 35 | gdf_pivot['date'] = gdf_pivot['date'].astype(str) 36 | gdf_pivot = gdf_pivot.drop( 37 | ['mostly_male', 'mostly_female', 'andy', 'unknown'], axis=1) 38 | return gdf_pivot 39 | 40 | 41 | def create_hourly_stats(dataframe): 42 | def get_hour(x): return x.hour 43 | 44 | def get_weekday(x): return x.weekday() 45 | 46 | local_times = dataframe.copy() 47 | local_times = local_times.loc[dataframe['local_time'].notnull()] 48 | 49 | local_times['weekday'] = local_times['local_time'].apply(get_weekday) 50 | local_times['hour'] = local_times['local_time'].apply(get_hour) 51 | 52 | local_times = local_times.replace(to_replace={'weekday': 53 | {0: 'Weekday', 54 | 1: 'Weekday', 55 | 2: 'Weekday', 56 | 3: 'Weekday', 57 | 4: 'Weekday', 58 | 5: 'Weekend', 59 | 6: 'Weekend', 60 | } 61 | }) 62 | 63 | local_times = local_times.groupby( 64 | [local_times['hour'], local_times['weekday']]).size().reset_index() 65 | local_times['values'] = local_times[0] 66 | local_times = local_times.set_index(local_times['hour']) 67 | 68 | local_times = local_times.pivot( 69 | columns='weekday', values='values').reset_index() 70 | local_times['Weekday'] = local_times['Weekday'] / 5 71 | local_times['Weekend'] = local_times['Weekend'] / 2 72 | 73 | return local_times.reset_index() 74 | 75 | 76 | def create_tweet_types(dataframe): 77 | dataframe_grouped = dataframe.groupby(dataframe.index.date).count() 78 | dataframe_grouped.index = pd.to_datetime(dataframe_grouped.index) 79 | 80 | dataframe_mean_week = dataframe_grouped.rolling('180d').mean() 81 | dataframe_mean_week['p_url'] = ( 82 | dataframe_mean_week['url'] / dataframe_mean_week['text']) * 100 83 | dataframe_mean_week['p_media'] = ( 84 | dataframe_mean_week['media'] / dataframe_mean_week['text']) * 100 85 | dataframe_mean_week['p_reply'] = ( 86 | dataframe_mean_week['reply_name'] / dataframe_mean_week['text']) * 100 87 | dataframe_mean_week['p_rt'] = ( 88 | dataframe_mean_week['retweet_name'] / dataframe_mean_week['text']) * 100 89 | dataframe_mean_week['p_hash'] = ( 90 | dataframe_mean_week['hashtag'] / dataframe_mean_week['text']) * 100 91 | dataframe_mean_week['p_other'] = 100 - \ 92 | (dataframe_mean_week['p_reply'] + dataframe_mean_week['p_rt']) 93 | 94 | dataframe_mean_week = dataframe_mean_week.reset_index() 95 | dataframe_mean_week['date'] = dataframe_mean_week['index'].astype(str) 96 | dataframe_mean_week = dataframe_mean_week.drop(['reply_user_name', 97 | 'retweet_user_name', 98 | 'latitude', 99 | 'longitude', 100 | 'local_time', 101 | 'url', 102 | 'media', 103 | 'reply_name', 104 | 'retweet_name', 105 | 'hashtag', 106 | 'index', 107 | ], 108 | axis=1) 109 | 110 | return dataframe_mean_week.reset_index() 111 | 112 | 113 | def create_top_replies(dataframe): 114 | top_replies = dataframe[dataframe['reply_user_name'].isin( 115 | list(dataframe['reply_user_name'].value_counts()[:5].reset_index()['index']))] 116 | top_replies = top_replies.reset_index()[['reply_user_name', 'utc_time']] 117 | top_replies['utc_time'] = top_replies['utc_time'].dt.date 118 | top_replies = top_replies.groupby(["utc_time", "reply_user_name"]).size() 119 | top_replies = top_replies.reset_index() 120 | top_replies['date'] = top_replies['utc_time'].astype(str) 121 | top_replies['value'] = top_replies[0] 122 | top_replies = top_replies.drop([0, 'utc_time'], axis=1) 123 | top_replies['date'] = pd.to_datetime(top_replies['date']) 124 | group = ['reply_user_name', pd.Grouper(key='date', freq='QS')] 125 | top_replies = top_replies.groupby( 126 | group)['value'].sum().reset_index().sort_values('date') 127 | top_replies['date'] = top_replies['date'].astype(str) 128 | return top_replies.reset_index().pivot(index='date', columns='reply_user_name', values='value').fillna(value=0).reset_index() 129 | 130 | 131 | def create_heatmap(dataframe): 132 | latitudes = dataframe['latitude'].notnull() 133 | return dataframe[latitudes][['latitude', 'longitude']] 134 | 135 | 136 | def create_overall(dataframe): 137 | dataframe_grouped = dataframe.groupby(dataframe.index.date).count()['text'] 138 | dataframe_grouped.index = pd.to_datetime(dataframe_grouped.index) 139 | dataframe_mean_week = dataframe_grouped.rolling('180d').mean() 140 | dataframe_mean_week = dataframe_mean_week.reset_index() 141 | dataframe_mean_week['tweets'] = dataframe_mean_week['text'] 142 | dataframe_mean_week['date'] = dataframe_mean_week['index'].dt.date.astype( 143 | str) 144 | return dataframe_mean_week[['date', 'tweets']] 145 | 146 | 147 | def create_timeline(dataframe): 148 | latitudes = dataframe['latitude'].notnull() 149 | timeline = dataframe[latitudes][['latitude', 'longitude']] 150 | timeline['start'] = timeline.index.date 151 | timeline['end'] = pd.Series(index=timeline.index).tshift( 152 | periods=28, freq='D').index.date 153 | features = [] 154 | timeline.apply(lambda X: features.append( 155 | geojson.Feature(geometry=geojson.Point((float(X["longitude"]), 156 | float(X["latitude"]),)), 157 | properties=dict(start=str(X["start"]), 158 | end=str(X["end"]))) 159 | ), axis=1) 160 | 161 | return geojson.dumps(geojson.FeatureCollection(features)) 162 | -------------------------------------------------------------------------------- /tweet_display/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class TweetDisplayConfig(AppConfig): 5 | name = 'tweet_display' 6 | -------------------------------------------------------------------------------- /tweet_display/helper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from users.models import OpenHumansMember 3 | from .models import Graph 4 | 5 | 6 | def grant_access(request, oh_id): 7 | if oh_id is None: 8 | if request.user.is_authenticated: 9 | return request.user.openhumansmember.oh_id 10 | else: 11 | if (OpenHumansMember.objects.get(oh_id=oh_id).public or 12 | (request.user.is_authenticated and 13 | request.user.openhumansmember.oh_id == oh_id)): 14 | return oh_id 15 | return False 16 | 17 | 18 | def get_file_url(oh_id): 19 | oh_member = OpenHumansMember.objects.get(oh_id=oh_id) 20 | token = oh_member.get_access_token() 21 | req = requests.get( 22 | 'https://www.openhumans.org/api/direct-sharing/' 23 | 'project/exchange-member/', params={'access_token': token}) 24 | if req.status_code == 200 and 'data' in req.json(): 25 | data = req.json()['data'] 26 | # WARNING! This is assumes the first file encountered is what you want! 27 | if len(data) > 0: 28 | return data[0]['download_url'] 29 | return None 30 | 31 | 32 | def get_current_user(request): 33 | if request.user.is_authenticated: 34 | return request.user.openhumansmember.oh_id 35 | return None 36 | 37 | 38 | def check_graphs(graph_types, oh_id): 39 | graphs_ready = [] 40 | for graph in graph_types: 41 | found = Graph.objects.filter(graph_type__exact=graph, 42 | open_humans_member__oh_id=oh_id) 43 | if found: 44 | graphs_ready.append(graph) 45 | return graphs_ready 46 | 47 | 48 | def message_success(oh_user): 49 | print('trying to send message for {}'.format(oh_user.oh_id)) 50 | subject = 'Your graphs are ready!' 51 | message = 'Dear TwArxiv user,\nthe graphs generated from your Twitter \ 52 | archive are now ready for you.\nGo over to \ 53 | https://twtr-analyser.herokuapp.com/tweet_display/index/{} to \ 54 | view them'.format(oh_user.oh_id) 55 | message_url = 'https://www.openhumans.org/api/direct-sharing/project/message/?access_token={}'.format( 56 | oh_user.get_access_token()) 57 | response = requests.post(message_url, data={'subject': subject, 58 | 'message': message}) 59 | print(response) 60 | print(response.json()) 61 | -------------------------------------------------------------------------------- /tweet_display/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.3 on 2017-11-27 23:57 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | initial = True 11 | 12 | dependencies = [ 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='Graph', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('graph_type', models.CharField(max_length=200)), 21 | ('graph_description', models.CharField(max_length=200)), 22 | ('graph_data', models.TextField()), 23 | ], 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /tweet_display/migrations/0002_graph_open_humans_member.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.3 on 2017-12-12 23:11 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('users', '0001_initial'), 13 | ('tweet_display', '0001_initial'), 14 | ] 15 | 16 | operations = [ 17 | migrations.AddField( 18 | model_name='graph', 19 | name='open_humans_member', 20 | field=models.ForeignKey(blank=True, 21 | null=True, 22 | on_delete=django.db.models.deletion.CASCADE, 23 | to='users.OpenHumansMember'), 24 | preserve_default=False, 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /tweet_display/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/tweet_display/migrations/__init__.py -------------------------------------------------------------------------------- /tweet_display/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from users.models import OpenHumansMember 3 | 4 | 5 | class Graph(models.Model): 6 | graph_type = models.CharField(max_length=200) 7 | graph_description = models.CharField(max_length=200) 8 | graph_data = models.TextField() 9 | open_humans_member = models.ForeignKey(OpenHumansMember, 10 | blank=True, null=True, 11 | on_delete=models.CASCADE) 12 | 13 | def __str__(self): 14 | return self.graph_type + ': ' + self.graph_description 15 | -------------------------------------------------------------------------------- /tweet_display/read_data.py: -------------------------------------------------------------------------------- 1 | from timezonefinder import TimezoneFinder 2 | import tempfile 3 | import zipfile 4 | import json 5 | import datetime 6 | import pytz 7 | import ijson 8 | import io 9 | import pandas as pd 10 | import requests 11 | import os 12 | 13 | # tzwhere_ = tzwhere.tzwhere() 14 | tzf = TimezoneFinder() 15 | 16 | 17 | # READ JSON FILES FROM TWITTER ARCHIVE! 18 | 19 | def check_hashtag(single_tweet): 20 | '''check whether tweet has any hashtags''' 21 | return len(single_tweet['entities']['hashtags']) > 0 22 | 23 | 24 | def check_media(single_tweet): 25 | '''check whether tweet has any media attached''' 26 | if 'media' in single_tweet['entities'].keys(): 27 | return len(single_tweet['entities']['media']) > 0 28 | else: 29 | return False 30 | 31 | 32 | def check_url(single_tweet): 33 | '''check whether tweet has any urls attached''' 34 | return len(single_tweet['entities']['urls']) > 0 35 | 36 | 37 | def check_retweet(single_tweet): 38 | ''' 39 | check whether tweet is a RT. If yes: 40 | return name & user name of the RT'd user. 41 | otherwise just return nones 42 | ''' 43 | if 'full_text' in single_tweet.keys(): 44 | if single_tweet['full_text'].startswith("RT @"): 45 | if len(single_tweet['entities']['user_mentions']) > 0: 46 | return ( 47 | single_tweet['entities']['user_mentions'][0]['screen_name'], 48 | single_tweet['entities']['user_mentions'][0]['name']) 49 | if 'retweeted_status' in single_tweet.keys(): 50 | return (single_tweet['retweeted_status']['user']['screen_name'], 51 | single_tweet['retweeted_status']['user']['name']) 52 | return (None, None) 53 | 54 | 55 | def check_coordinates(single_tweet): 56 | ''' 57 | check whether tweet has coordinates attached. 58 | if yes return the coordinates 59 | otherwise just return nones 60 | ''' 61 | if 'geo' in single_tweet.keys(): 62 | if 'coordinates' in single_tweet['geo'].keys(): 63 | return (float(single_tweet['geo']['coordinates'][0]), 64 | float(single_tweet['geo']['coordinates'][1])) 65 | else: 66 | return (None, None) 67 | else: 68 | return (None, None) 69 | 70 | 71 | def check_reply_to(single_tweet): 72 | ''' 73 | check whether tweet is a reply. If yes: 74 | return name & user name of the user that's replied to. 75 | otherwise just return nones 76 | ''' 77 | if 'in_reply_to_screen_name' in single_tweet.keys(): 78 | name = None 79 | for user in single_tweet['entities']['user_mentions']: 80 | if user['screen_name'] == single_tweet['in_reply_to_screen_name']: 81 | name = user['name'] 82 | break 83 | return (single_tweet['in_reply_to_screen_name'], name) 84 | else: 85 | return (None, None) 86 | 87 | 88 | def convert_time(coordinates, time_utc): 89 | ''' 90 | Does this tweet have a geo location? if yes 91 | we can easily convert the UTC timestamp to true local time! 92 | otherwise return nones 93 | ''' 94 | if coordinates[0] and coordinates[1]: 95 | timezone_str = tzf.timezone_at(lat=coordinates[0], lng=coordinates[1]) 96 | if timezone_str: 97 | timezone = pytz.timezone(timezone_str) 98 | time_obj_local = datetime.datetime.astimezone(time_utc, timezone) 99 | return time_obj_local 100 | 101 | 102 | def create_dataframe(tweets): 103 | ''' 104 | create a pandas dataframe from our tweet jsons 105 | ''' 106 | 107 | # initalize empty lists 108 | utc_time = [] 109 | longitude = [] 110 | latitude = [] 111 | local_time = [] 112 | hashtag = [] 113 | media = [] 114 | url = [] 115 | retweet_user_name = [] 116 | retweet_name = [] 117 | reply_user_name = [] 118 | reply_name = [] 119 | text = [] 120 | # iterate over all tweets and extract data 121 | for single_tweet in tweets: 122 | try: 123 | utc_time.append( 124 | datetime.datetime.strptime( 125 | single_tweet['tweet']['created_at'], 126 | '%a %b %d %H:%M:%S %z %Y')) 127 | except ValueError: 128 | utc_time.append( 129 | datetime.datetime.strptime( 130 | single_tweet['tweet']['created_at'], 131 | '%Y-%m-%d %H:%M:%S %z')) 132 | coordinates = check_coordinates(single_tweet['tweet']) 133 | latitude.append(coordinates[0]) 134 | longitude.append(coordinates[1]) 135 | try: 136 | creation_time = datetime.datetime.strptime( 137 | single_tweet['tweet']['created_at'], 138 | '%a %b %d %H:%M:%S %z %Y') 139 | except ValueError: 140 | creation_time = datetime.datetime.strptime( 141 | single_tweet['tweet']['created_at'], 142 | '%Y-%m-%d %H:%M:%S %z') 143 | converted_time = convert_time(coordinates, creation_time) 144 | local_time.append(converted_time) 145 | hashtag.append(check_hashtag(single_tweet['tweet'])) 146 | media.append(check_media(single_tweet['tweet'])) 147 | url.append(check_url(single_tweet['tweet'])) 148 | retweet = check_retweet(single_tweet['tweet']) 149 | retweet_user_name.append(retweet[0]) 150 | retweet_name.append(retweet[1]) 151 | reply = check_reply_to(single_tweet['tweet']) 152 | reply_user_name.append(reply[0]) 153 | reply_name.append(reply[1]) 154 | if 'full_text' in single_tweet['tweet'].keys(): 155 | text.append(single_tweet['tweet']['full_text']) 156 | else: 157 | text.append(single_tweet['tweet']['text']) 158 | # convert the whole shebang into a pandas dataframe 159 | dataframe = pd.DataFrame(data={ 160 | 'utc_time': utc_time, 161 | 'local_time': local_time, 162 | 'latitude': latitude, 163 | 'longitude': longitude, 164 | 'hashtag': hashtag, 165 | 'media': media, 166 | 'url': url, 167 | 'retweet_user_name': retweet_user_name, 168 | 'retweet_name': retweet_name, 169 | 'reply_user_name': reply_user_name, 170 | 'reply_name': reply_name, 171 | 'text': text, 172 | }) 173 | return dataframe 174 | 175 | 176 | def fetch_zip_file(zip_url): 177 | tf = tempfile.NamedTemporaryFile() 178 | print('downloading files') 179 | tf.write(requests.get(zip_url).content) 180 | tf.flush() 181 | if zipfile.is_zipfile(tf.name): 182 | return (zipfile.ZipFile(tf.name), 'zipped') 183 | else: 184 | return (open(tf.name, 'r'), 'json') 185 | 186 | 187 | def read_old_zip_archive(zf): 188 | with zf.open('data/js/tweet_index.js', 'r') as f: 189 | f = io.TextIOWrapper(f) 190 | d = f.readlines()[1:] 191 | d = "[{" + "".join(d) 192 | json_files = json.loads(d) 193 | data_frames = [] 194 | print('iterate over individual files') 195 | for single_file in json_files: 196 | print('read ' + single_file['file_name']) 197 | with zf.open(single_file['file_name']) as f: 198 | f = io.TextIOWrapper(f) 199 | d = f.readlines()[1:] 200 | d = "".join(d) 201 | tweets = json.loads(d) 202 | df_tweets = create_dataframe(tweets) 203 | data_frames.append(df_tweets) 204 | return data_frames 205 | 206 | 207 | def read_files(zf, filetype): 208 | if filetype == 'zipped': 209 | if 'data/js/tweet_index.js' in zf.namelist(): 210 | print('reading index') 211 | data_frames = read_old_zip_archive(zf) 212 | return data_frames 213 | elif 'tweet.js' in zf.namelist(): 214 | with zf.open('tweet.js') as f: 215 | f = io.TextIOWrapper(f) 216 | tweet_string = f.readlines() 217 | tweet_string = "".join([i.strip() for i in tweet_string]) 218 | tweet_string = tweet_string[25:] 219 | 220 | elif filetype == 'json': 221 | tweet_string = zf.readlines() 222 | tweet_string = "".join([i.strip() for i in tweet_string]) 223 | tweet_string = tweet_string[25:] 224 | correct_json = tempfile.NamedTemporaryFile(mode='w') 225 | correct_json.write(tweet_string) 226 | correct_json.flush() 227 | tweets = ijson.items(open(correct_json.name, 'r'), 'item') 228 | data_frame = create_dataframe(tweets) 229 | return [data_frame] 230 | 231 | 232 | def create_main_dataframe(zip_url='http://ruleofthirds.de/test_archive.zip'): 233 | if zip_url.startswith('http'): 234 | print('reading zip file from web') 235 | zip_file, filetype = fetch_zip_file(zip_url) 236 | elif os.path.isfile(zip_url): 237 | print('reading zip file from disk') 238 | zip_file = zipfile.ZipFile(zip_url) 239 | filetype = 'zipped' 240 | else: 241 | raise ValueError('zip_url is not an URL nor a file in disk') 242 | 243 | dataframes = read_files(zip_file, filetype) 244 | print('concatenating...') 245 | dataframe = pd.concat(dataframes) 246 | dataframe = dataframe.sort_values('utc_time', ascending=False) 247 | dataframe = dataframe.set_index('utc_time') 248 | dataframe = dataframe.replace(to_replace={ 249 | 'url': {False: None}, 250 | 'hashtag': {False: None}, 251 | 'media': {False: None} 252 | }) 253 | return dataframe 254 | -------------------------------------------------------------------------------- /tweet_display/static/css/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 23 | 26 | 33 | 34 | 35 | 55 | 57 | 58 | 60 | image/svg+xml 61 | 63 | 64 | 65 | 66 | 67 | 71 | 76 | 86 | 96 | 106 | 117 | 126 | TwArχiv 141 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /tweet_display/static/css/metricsgraphics.css: -------------------------------------------------------------------------------- 1 | .mg-active-datapoint { 2 | fill: black; 3 | font-size: 1.3rem; 4 | font-weight: 400; 5 | opacity: 0.8; 6 | } 7 | 8 | .mg-area1-color { 9 | fill: #0000ff; 10 | } 11 | 12 | .mg-area2-color { 13 | fill: #05b378; 14 | } 15 | 16 | .mg-area3-color { 17 | fill: #db4437; 18 | } 19 | 20 | .mg-area4-color { 21 | fill: #f8b128; 22 | } 23 | 24 | .mg-area5-color { 25 | fill: #5c5c5c; 26 | } 27 | 28 | text.mg-barplot-group-label { 29 | font-weight:900; 30 | } 31 | 32 | .mg-barplot rect.mg-bar { 33 | shape-rendering: auto; 34 | } 35 | 36 | .mg-barplot rect.mg-bar.default-bar { 37 | fill: #b6b6fc; 38 | } 39 | 40 | .mg-barplot rect.mg-bar.default-active { 41 | fill: #9e9efc; 42 | } 43 | 44 | .mg-barplot .mg-bar-prediction { 45 | fill: #5b5b5b; 46 | } 47 | 48 | .mg-barplot .mg-bar-baseline { 49 | stroke: #5b5b5b; 50 | stroke-width: 2; 51 | } 52 | 53 | .mg-bar-target-element { 54 | font-size:11px; 55 | padding-left:5px; 56 | padding-right:5px; 57 | font-weight:300; 58 | } 59 | 60 | .mg-baselines line { 61 | opacity: 1; 62 | shape-rendering: auto; 63 | stroke: #b3b2b2; 64 | stroke-width: 1px; 65 | } 66 | 67 | .mg-baselines text { 68 | fill: black; 69 | font-size: 0.9rem; 70 | opacity: 0.6; 71 | stroke: none; 72 | } 73 | 74 | .mg-baselines-small text { 75 | font-size: 0.6rem; 76 | } 77 | 78 | .mg-category-guides line { 79 | stroke: #b3b2b2; 80 | } 81 | 82 | .mg-header { 83 | cursor: default; 84 | font-size: 1.6rem; 85 | } 86 | 87 | .mg-header .mg-chart-description { 88 | fill: #ccc; 89 | font-family: FontAwesome; 90 | font-size: 1.6rem; 91 | } 92 | 93 | .mg-header .mg-warning { 94 | fill: #ccc; 95 | font-family: FontAwesome; 96 | font-size: 1.2rem; 97 | } 98 | 99 | .mg-points circle { 100 | opacity: 0.65; 101 | } 102 | 103 | .mg-popover { 104 | font-size: 1.3rem; 105 | } 106 | 107 | .mg-popover-content { 108 | cursor: auto; 109 | line-height: 17px; 110 | } 111 | 112 | .mg-data-table { 113 | margin-top: 30px; 114 | } 115 | 116 | .mg-data-table thead tr th { 117 | border-bottom: 1px solid darkgray; 118 | cursor: default; 119 | font-size: 1.1rem; 120 | font-weight: normal; 121 | padding: 5px 5px 8px 5px; 122 | text-align: right; 123 | } 124 | 125 | .mg-data-table thead tr th .fa { 126 | color: #ccc; 127 | padding-left: 4px; 128 | } 129 | 130 | .mg-data-table thead tr th .popover { 131 | font-size: 1.4rem; 132 | font-weight: normal; 133 | } 134 | 135 | .mg-data-table .secondary-title { 136 | color: darkgray; 137 | } 138 | 139 | .mg-data-table tbody tr td { 140 | margin: 2px; 141 | padding: 5px; 142 | vertical-align: top; 143 | } 144 | 145 | .mg-data-table tbody tr td.table-text { 146 | opacity: 0.8; 147 | padding-left: 30px; 148 | } 149 | 150 | .mg-y-axis line.mg-extended-yax-ticks { 151 | opacity: 0.4; 152 | } 153 | 154 | .mg-x-axis line.mg-extended-xax-ticks { 155 | opacity: 0.4; 156 | } 157 | 158 | .mg-histogram .axis path, 159 | .mg-histogram .axis line { 160 | fill: none; 161 | opacity: 0.7; 162 | shape-rendering: auto; 163 | stroke: #ccc; 164 | } 165 | 166 | tspan.hist-symbol { 167 | fill: #9e9efc; 168 | } 169 | 170 | .mg-histogram .mg-bar rect { 171 | fill: #b6b6fc; 172 | shape-rendering: auto; 173 | } 174 | 175 | .mg-histogram .mg-bar rect.active { 176 | fill: #9e9efc; 177 | } 178 | 179 | .mg-least-squares-line { 180 | stroke: red; 181 | stroke-width: 1px; 182 | } 183 | 184 | .mg-lowess-line { 185 | fill: none; 186 | stroke: red; 187 | } 188 | 189 | .mg-line1-color { 190 | stroke: #4040e8; 191 | } 192 | 193 | .mg-hover-line1-color { 194 | fill: #4040e8; 195 | } 196 | 197 | .mg-line2-color { 198 | stroke: #05b378; 199 | } 200 | 201 | .mg-hover-line2-color { 202 | fill: #05b378; 203 | } 204 | 205 | .mg-line3-color { 206 | stroke: #db4437; 207 | } 208 | 209 | .mg-hover-line3-color { 210 | fill: #db4437; 211 | } 212 | 213 | .mg-line4-color { 214 | stroke: #f8b128; 215 | } 216 | 217 | .mg-hover-line4-color { 218 | fill: #f8b128; 219 | } 220 | 221 | .mg-line5-color { 222 | stroke: #5c5c5c; 223 | } 224 | 225 | .mg-hover-line5-color { 226 | fill: #5c5c5c; 227 | } 228 | 229 | .mg-line-legend text { 230 | font-size: 1.5rem; 231 | font-weight: 300; 232 | stroke: none; 233 | } 234 | 235 | .mg-line1-legend-color { 236 | color: #4040e8; 237 | fill: #4040e8; 238 | } 239 | 240 | .mg-line2-legend-color { 241 | color: #05b378; 242 | fill: #05b378; 243 | } 244 | 245 | .mg-line3-legend-color { 246 | color: #db4437; 247 | fill: #db4437; 248 | } 249 | 250 | .mg-line4-legend-color { 251 | color: #f8b128; 252 | fill: #f8b128; 253 | } 254 | 255 | .mg-line5-legend-color { 256 | color: #5c5c5c; 257 | fill: #5c5c5c; 258 | } 259 | 260 | .mg-main-area-solid svg .mg-main-area { 261 | fill: #ccccff; 262 | opacity: 1; 263 | } 264 | 265 | .mg-markers line { 266 | opacity: 1; 267 | shape-rendering: auto; 268 | stroke: #b3b2b2; 269 | stroke-width: 1px; 270 | } 271 | 272 | .mg-markers text { 273 | fill: black; 274 | font-size: 1.2rem; 275 | opacity: 0.6; 276 | } 277 | 278 | .mg-missing-text { 279 | opacity: 0.9; 280 | } 281 | 282 | .mg-missing-background { 283 | stroke: blue; 284 | fill: none; 285 | stroke-dasharray: 10,5; 286 | stroke-opacity: 0.05; 287 | stroke-width: 2; 288 | } 289 | 290 | .mg-missing .mg-main-line { 291 | opacity: 0.1; 292 | } 293 | 294 | .mg-missing .mg-main-area { 295 | opacity: 0.03; 296 | } 297 | 298 | path.mg-main-area { 299 | opacity: 0.2; 300 | stroke: none; 301 | } 302 | 303 | path.mg-confidence-band { 304 | fill: #ccc; 305 | opacity: 0.4; 306 | stroke: none; 307 | } 308 | 309 | path.mg-main-line { 310 | fill: none; 311 | opacity: 0.8; 312 | stroke-width: 1.5px; 313 | } 314 | 315 | .mg-points circle { 316 | fill-opacity: 0.4; 317 | stroke-opacity: 1; 318 | } 319 | 320 | circle.mg-points-mono { 321 | fill: #0000ff; 322 | stroke: #0000ff; 323 | } 324 | 325 | tspan.mg-points-mono { 326 | fill: #0000ff; 327 | stroke: #0000ff; 328 | } 329 | 330 | /* a selected point in a scatterplot */ 331 | .mg-points circle.selected { 332 | fill-opacity: 1; 333 | stroke-opacity: 1; 334 | } 335 | 336 | .mg-voronoi path { 337 | fill: none; 338 | pointer-events: all; 339 | stroke: none; 340 | stroke-opacity: 0.1; 341 | } 342 | 343 | .mg-x-rug-mono, 344 | .mg-y-rug-mono { 345 | stroke: black; 346 | } 347 | 348 | .mg-x-axis line, 349 | .mg-y-axis line { 350 | opacity: 1; 351 | shape-rendering: auto; 352 | stroke: #b3b2b2; 353 | stroke-width: 2px; 354 | } 355 | 356 | .mg-x-axis text, 357 | .mg-y-axis text, 358 | .mg-histogram .axis text { 359 | fill: black; 360 | font-size: 1.2rem; 361 | opacity: 0.6; 362 | } 363 | 364 | .mg-x-axis .label, 365 | .mg-y-axis .label, 366 | .mg-axis .label { 367 | font-size: 1.3rem; 368 | text-transform: uppercase; 369 | font-weight: 400; 370 | } 371 | 372 | .mg-x-axis-small text, 373 | .mg-y-axis-small text, 374 | .mg-active-datapoint-small { 375 | font-size: 0.6rem; 376 | } 377 | 378 | .mg-x-axis-small .label, 379 | .mg-y-axis-small .label { 380 | font-size: 0.65rem; 381 | } 382 | 383 | .mg-european-hours { 384 | } 385 | 386 | .mg-year-marker text { 387 | fill: black; 388 | font-size: 0.7rem; 389 | opacity: 0.6; 390 | } 391 | 392 | .mg-year-marker line { 393 | opacity: 1; 394 | shape-rendering: auto; 395 | stroke: #b3b2b2; 396 | stroke-width: 1.2px; 397 | } 398 | 399 | .mg-year-marker-small text { 400 | font-size: 0.6rem; 401 | } 402 | -------------------------------------------------------------------------------- /tweet_display/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/tweet_display/static/favicon.ico -------------------------------------------------------------------------------- /tweet_display/static/javascripts/leaflet.timeline.js: -------------------------------------------------------------------------------- 1 | !function(t,e){if("object"==typeof exports&&"object"==typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var n=e();for(var i in n)("object"==typeof exports?exports:t)[i]=n[i]}}(this,function(){return function(t){function e(i){if(n[i])return n[i].exports;var r=n[i]={i:i,l:!1,exports:{}};return t[i].call(r.exports,r,r.exports,e),r.l=!0,r.exports}var n={};return e.m=t,e.c=n,e.i=function(t){return t},e.d=function(t,n,i){e.o(t,n)||Object.defineProperty(t,n,{configurable:!1,enumerable:!0,get:i})},e.n=function(t){var n=t&&t.__esModule?function(){return t.default}:function(){return t};return e.d(n,"a",n),n},e.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},e.p="",e(e.s=7)}([function(t,e,n){"use strict";function i(t){return t&&t.__esModule?t:{default:t}}var r=n(3),o=i(r);L.Timeline=L.GeoJSON.extend({times:null,ranges:null,initialize:function(t){var e=this,n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};this.times=[],this.ranges=new o.default;var i={drawOnSetTime:!0};L.GeoJSON.prototype.initialize.call(this,null,n),L.Util.setOptions(this,i),L.Util.setOptions(this,n),this.options.getInterval&&(this._getInterval=function(){var t;return(t=e.options).getInterval.apply(t,arguments)}),t&&this._process(t)},_getInterval:function(t){var e="start"in t.properties,n="end"in t.properties;return!(!e||!n)&&{start:new Date(t.properties.start).getTime(),end:new Date(t.properties.end).getTime()}},_process:function(t){var e=this,n=1/0,i=-(1/0);t.features.forEach(function(t){var r=e._getInterval(t);r&&(e.ranges.insert(r.start,r.end,t),e.times.push(r.start),e.times.push(r.end),n=Math.min(n,r.start),i=Math.max(i,r.end))}),this.start=this.options.start||n,this.end=this.options.end||i,this.time=this.start,0!==this.times.length&&(this.times.sort(function(t,e){return t-e}),this.times=this.times.reduce(function(t,e,n){if(0===n)return t;var i=t[t.length-1];return i!==e&&t.push(e),t},[this.times[0]]))},setTime:function(t){this.time="number"==typeof t?t:new Date(t).getTime(),this.options.drawOnSetTime&&this.updateDisplayedLayers(),this.fire("change")},updateDisplayedLayers:function(){for(var t=this,e=this.ranges.lookup(this.time),n=0;n0&&void 0!==arguments[0]?arguments[0]:{},e={duration:1e4,enableKeyboardControls:!1,enablePlayback:!0,formatOutput:function(t){return""+(t||"")},showTicks:!0,waitToUpdateMap:!1,position:"bottomleft",steps:1e3};this.timelines=[],L.Util.setOptions(this,e),L.Util.setOptions(this,t),"undefined"!=typeof t.start&&(this.start=t.start),"undefined"!=typeof t.end&&(this.end=t.end)},_getTimes:function(){var t=this,e=[];if(this.timelines.forEach(function(n){var r=n.times.filter(function(e){return e>=t.start&&e<=t.end});e.push.apply(e,i(r))}),e.length){e.sort(function(t,e){return t-e});var n=[e[0]];return e.reduce(function(t,e){return t!==e&&n.push(e),e}),n}return e},_recalculate:function(){var t="undefined"!=typeof this.options.start,e="undefined"!=typeof this.options.end,n=this.options.duration,i=1/0,r=-(1/0);this.timelines.forEach(function(t){t.startr&&(r=t.end)}),t||(this.start=i,this._timeSlider.min=i===1/0?0:i,this._timeSlider.value=this._timeSlider.min),e||(this.end=r,this._timeSlider.max=r===-(1/0)?0:r),this._stepSize=Math.max(1,(this.end-this.start)/this.options.steps),this._stepDuration=Math.max(1,n/this.options.steps)},_nearestEventTime:function(t){for(var e=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0,n=this._getTimes(),i=!1,r=n[0],o=1;o=t){if(e===-1)return r;if(a!==t)return a;i=!0}r=a}return r},_createDOM:function(){var t=["leaflet-control-layers","leaflet-control-layers-expanded","leaflet-timeline-control"],e=L.DomUtil.create("div",t.join(" "));if(this.container=e,this.options.enablePlayback){var n=L.DomUtil.create("div","sldr-ctrl-container",e),i=L.DomUtil.create("div","button-container",n);this._makeButtons(i),this.options.enableKeyboardControls&&this._addKeyListeners(),this._makeOutput(n)}this._makeSlider(e),this.options.showTicks&&this._buildDataList(e)},_addKeyListeners:function(){var t=this;this._listener=function(){return t._onKeydown.apply(t,arguments)},document.addEventListener("keydown",this._listener)},_removeKeyListeners:function(){document.removeEventListener("keydown",this._listener)},_buildDataList:function(t){this._datalist=L.DomUtil.create("datalist","",t);var e=Math.floor(1e6*Math.random());this._datalist.id="timeline-datalist-"+e,this._timeSlider.setAttribute("list",this._datalist.id),this._rebuildDataList()},_rebuildDataList:function(){for(var t=this._datalist;t.firstChild;)t.removeChild(t.firstChild);var e=L.DomUtil.create("select","",this._datalist);this._getTimes().forEach(function(t){L.DomUtil.create("option","",e).value=t})},_makeButton:function(t,e){var n=this,i=L.DomUtil.create("button",e,t);i.addEventListener("click",function(){return n[e]()}),L.DomEvent.disableClickPropagation(i)},_makeButtons:function(t){this._makeButton(t,"prev"),this._makeButton(t,"play"),this._makeButton(t,"pause"),this._makeButton(t,"next")},_makeSlider:function(t){var e=this,n=L.DomUtil.create("input","time-slider",t);n.type="range",n.min=this.start||0,n.max=this.end||0,n.value=this.start||0,n.addEventListener("change",function(t){return e._sliderChanged(t)}),n.addEventListener("input",function(t){return e._sliderChanged(t)}),n.addEventListener("pointerdown",function(){return e.map.dragging.disable()}),document.addEventListener("pointerup",function(){return e.map.dragging.enable()}),this._timeSlider=n},_makeOutput:function(t){this._output=L.DomUtil.create("output","time-text",t),this._output.innerHTML=this.options.formatOutput(this.start)},_onKeydown:function(t){switch(t.keyCode||t.which){case 37:this.prev();break;case 39:this.next();break;case 32:this.toggle();break;default:return}t.preventDefault()},_sliderChanged:function(t){var e=parseFloat(t.target.value,10);this.time=e,this.options.waitToUpdateMap&&"change"!==t.type||this.timelines.forEach(function(t){return t.setTime(e)}),this._output&&(this._output.innerHTML=this.options.formatOutput(e))},_resetIfTimelinesChanged:function(t){this.timelines.length!==t&&(this._recalculate(),this.options.showTicks&&this._rebuildDataList(),this.setTime(this.start))},addTimelines:function(){var t=this;this.pause();for(var e=this.timelines.length,n=arguments.length,i=Array(n),r=0;r=t&&e.push(n.data),e.push.apply(e,i(this.lookup(t,n.right)))),e)}}]),t}();e.default=s},function(t,e,n){e=t.exports=n(5)(),e.push([t.i,'.leaflet-control.leaflet-timeline-control{width:96%;box-sizing:border-box;margin:2%;margin-bottom:20px;text-align:center}.leaflet-control.leaflet-timeline-control *{vertical-align:middle}.leaflet-control.leaflet-timeline-control input[type=range]{width:80%}.leaflet-control.leaflet-timeline-control .sldr-ctrl-container{float:left;width:15%;box-sizing:border-box}.leaflet-control.leaflet-timeline-control .button-container button{position:relative;width:20%;height:20px}.leaflet-control.leaflet-timeline-control .button-container button:after,.leaflet-control.leaflet-timeline-control .button-container button:before{content:"";position:absolute}.leaflet-control.leaflet-timeline-control .button-container button.play:before{border:7px solid transparent;border-width:7px 0 7px 10px;border-left-color:#000;margin-top:-7px;background:transparent;margin-left:-5px}.leaflet-control.leaflet-timeline-control .button-container button.pause{display:none}.leaflet-control.leaflet-timeline-control .button-container button.pause:before{width:4px;height:14px;border:4px solid #000;border-width:0 4px;margin-top:-7px;margin-left:-6px;background:transparent}.leaflet-control.leaflet-timeline-control .button-container button.prev:after,.leaflet-control.leaflet-timeline-control .button-container button.prev:before{margin:-8px 0 0;background:#000}.leaflet-control.leaflet-timeline-control .button-container button.prev:before{width:2px;height:14px;margin-top:-7px;margin-left:-7px}.leaflet-control.leaflet-timeline-control .button-container button.prev:after{border:7px solid transparent;border-width:7px 10px 7px 0;border-right-color:#000;margin-top:-7px;margin-left:-5px;background:transparent}.leaflet-control.leaflet-timeline-control .button-container button.next:after,.leaflet-control.leaflet-timeline-control .button-container button.next:before{margin:-8px 0 0;background:#000}.leaflet-control.leaflet-timeline-control .button-container button.next:before{width:2px;height:14px;margin-top:-7px;margin-left:5px}.leaflet-control.leaflet-timeline-control .button-container button.next:after{border:7px solid transparent;border-width:7px 0 7px 10px;border-left-color:#000;margin-top:-7px;margin-left:-5px;background:transparent}.leaflet-control.leaflet-timeline-control.playing button.pause{display:inline-block}.leaflet-control.leaflet-timeline-control.playing button.play{display:none}',""])},function(t,e){t.exports=function(){var t=[];return t.toString=function(){for(var t=[],e=0;e=0&&b.splice(e,1)}function a(t){var e=document.createElement("style");return e.type="text/css",r(t,e),e}function s(t){var e=document.createElement("link");return e.rel="stylesheet",r(t,e),e}function l(t,e){var n,i,r;if(e.singleton){var l=g++;n=v||(v=a(e)),i=u.bind(null,n,l,!1),r=u.bind(null,n,l,!0)}else t.sourceMap&&"function"==typeof URL&&"function"==typeof URL.createObjectURL&&"function"==typeof URL.revokeObjectURL&&"function"==typeof Blob&&"function"==typeof btoa?(n=s(e),i=f.bind(null,n),r=function(){o(n),n.href&&URL.revokeObjectURL(n.href)}):(n=a(e),i=h.bind(null,n),r=function(){o(n)});return i(t),function(e){if(e){if(e.css===t.css&&e.media===t.media&&e.sourceMap===t.sourceMap)return;i(t=e)}else r()}}function u(t,e,n,i){var r=n?"":i.css;if(t.styleSheet)t.styleSheet.cssText=y(e,r);else{var o=document.createTextNode(r),a=t.childNodes;a[e]&&t.removeChild(a[e]),a.length?t.insertBefore(o,a[e]):t.appendChild(o)}}function h(t,e){var n=e.css,i=e.media;if(i&&t.setAttribute("media",i),t.styleSheet)t.styleSheet.cssText=n;else{for(;t.firstChild;)t.removeChild(t.firstChild);t.appendChild(document.createTextNode(n))}}function f(t,e){var n=e.css,i=e.sourceMap;i&&(n+="\n/*# sourceMappingURL=data:application/json;base64,"+btoa(unescape(encodeURIComponent(JSON.stringify(i))))+" */");var r=new Blob([n],{type:"text/css"}),o=t.href;t.href=URL.createObjectURL(r),o&&URL.revokeObjectURL(o)}var c={},p=function(t){var e;return function(){return"undefined"==typeof e&&(e=t.apply(this,arguments)),e}},d=p(function(){return/msie [6-9]\b/.test(window.navigator.userAgent.toLowerCase())}),m=p(function(){return document.head||document.getElementsByTagName("head")[0]}),v=null,g=0,b=[];t.exports=function(t,e){if("undefined"!=typeof DEBUG&&DEBUG&&"object"!=typeof document)throw new Error("The style-loader cannot be used in a non-browser environment");e=e||{},"undefined"==typeof e.singleton&&(e.singleton=d()),"undefined"==typeof e.insertAt&&(e.insertAt="bottom");var r=i(t);return n(r,e),function(t){for(var o=[],a=0;a 2 | 3 | 4 | 5 | 6 | 7 | 8 | TwArχiv 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | {% load static %} 27 | 29 | 30 | 31 | 34 | 35 | 38 | 39 | 40 | 41 | 42 | 43 | 79 | 80 | 81 | 82 | 83 |
84 | 85 | 86 | 137 | 138 | {% block content %} 139 | {% endblock %} 140 | 141 |
142 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /tweet_display/templates/tweet_display/index.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 | 5 | {% if oh_id == current_user_oh_id %} 6 | 7 | {% include 'tweet_display/partials/graph_status.html' %} 8 | 9 | {%endif%} 10 |
11 |
12 | {%if 'overall_tweets' in graphs%} 13 |
14 | {%else%} 15 | {% include 'tweet_display/partials/graph_in_making.html' %} 16 | {%endif%} 17 |
18 |
19 |
20 |
21 |

Tweets per day

22 |
23 |
24 |

25 | The amount we tweet per day can change over time. This data can be 26 | really noisy: On some days we might hammer out one tweet after the other, 27 | while on other days we don't get around to doing much tweeting at all. 28 | There might be vacations, days where you're sick in bed etc. This can make 29 | it very hard to see long-term trends. 30 |

31 |

32 | For this reason the data here was 33 | averaged to remove the noise. This was done by applying a rolling average 34 | on a 180 day window. This should make it much easier to see how one's 35 | behaviour changed over larger time-frames (In pandas this 36 | rolling average is created by 37 | 38 | dataframe.rolling('180d').mean() 39 | ). 40 |

41 |

42 | In our graph to the left the x-axis gives us the time since 43 | one signed up for Twitter and the y-axis gives the 44 | normalized number of tweets on a given day. 45 |

46 |
47 |
48 |
49 |
50 |
51 |
52 | {%if 'tweet_types' in graphs%} 53 |
54 | {%else%} 55 | {% include 'tweet_display/partials/graph_in_making.html' %} 56 | {%endif%} 57 |
58 |
59 |
60 |
61 |

Different types of tweets over time

62 |
63 |
64 |

65 | There are different types of tweets one can send on Twitter. Some tweets 66 | are replies to tweets by other users, while other tweets are 67 | retweets of tweets done by other users. 68 | Last, but not least there are the good old regular tweets, 69 | which are neither a reply to anyone nor a retweet of a tweet. 70 |

71 |

72 | Again, this data might be very noisy due to short-time changes. 73 | For this reason the data was averaged to remove the noise as well. 74 | This was done by applying the same rolling average on a 180 day window as above. 75 | This should make it much easier to see how one's 76 | behaviour changed over larger time-frames (In pandas this 77 | rolling average is created by 78 | 79 | dataframe.rolling('180d').mean() 80 | ). 81 |

82 |

83 | In our graph to the left the x-axis gives us the time since 84 | one signed up for Twitter and the y-axis gives the 85 | percentage of the overall tweet volume for the different tweet types. 86 |

87 |
88 |
89 |
90 |
91 |
92 |
93 | {%if 'hourly_tweets' in graphs%} 94 |
95 | {%else%} 96 | {% include 'tweet_display/partials/graph_in_making.html' %} 97 | {%endif%} 98 |
99 |
100 |
101 |
102 |

When are tweets posted?

103 |
104 |
105 |

106 | Different people have different styles in how they use Twitter. Some 107 | are owls and tweet deep into the night, while others of us are larks 108 | and send our first tweets out while the sun rises. And of course, there 109 | might be differences between workdays and weekends. 110 |

111 |

112 | Twitter does not save the local time in which a tweet was sent. Instead 113 | it gives all dates and times in UTC. To get the correct local time 114 | it is thus necessary to know the latitude/longitude from which a given 115 | tweet was sent. Correspondingly our graph to the left is based purely on those 116 | tweets for which a geo location is known. 117 |

118 |

119 | In our graph to the left the x-axis gives us the hour of the 120 | day in which tweets were sent and the y-axis gives us the overall 121 | number of (geotagged) tweets that were sent on a given workday or day of the weekend. 122 |

123 |
124 |
125 |
126 |
127 | {% include 'tweet_display/partials/graph_buttons.html' %} 128 | 129 | 192 | 193 | {% endblock %} 194 | -------------------------------------------------------------------------------- /tweet_display/templates/tweet_display/interactions.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 | 5 | {% if oh_id == current_user_oh_id %} 6 | 7 | {% include 'tweet_display/partials/graph_status.html' %} 8 | 9 | {%endif%} 10 | 11 |
12 |
13 | {%if 'top_replies' in graphs%} 14 |
15 | {%else%} 16 | {% include 'tweet_display/partials/graph_in_making.html' %} 17 | {%endif%} 18 |
19 |
20 |
21 |
22 |

The most interactions

23 |
24 |
25 |

26 | Replying to someone's tweets is the main way of interacting on Twitter. 27 | The people we interact with can stay the same over large spans of time, 28 | or they can change - reflecting different social networks and circles of 29 | friends we move in. 30 |

31 |

32 | The graph here shows you the Top Five people you have replied to 33 | most, summed up for your total time on Twitter. To make the graph less 34 | prone to noise the replies for each user are not given per day but are 35 | summed up per quarter. 36 |

37 |

38 | The x-axis thus lists individual 39 | quarters since the sign-up to Twitter. The y-axis gives the 40 | sum of replies for the individual users per quarter. The Top Five 41 | people you replied to are color-coded and are also listed on the graph 42 | when hovering over a given data point. 43 |

44 |
45 |
46 |
47 |
48 | 49 |
50 |
51 | {%if 'gender_reply' in graphs%} 52 |
53 | {%else%} 54 | {% include 'tweet_display/partials/graph_in_making.html' %} 55 | {%endif%} 56 |
57 |
58 |
59 |
60 |

The gender breakdown for replies

61 |
62 |
63 |

64 | Twitter can be a highly gendered experience and there is 65 | 66 | plenty of research showing that such biases overwhelmingly favor men 67 | . Other tools, like 68 | 69 | Twitterlytic 70 | allow you to find out how the gender breakdown is amongst the people 71 | you follow and the people that follow you. By looking at a whole Twitter 72 | archive we can have a look into whether interactions - replies and retweets - 73 | are gender balanced as well. 74 |

75 |

76 | The graph shows you the number of replies to Twitter users 77 | that are classified as either male or female. The 78 | classifications are predictions based on users' first names as 79 | given in their Twitter accounts. The predictions itself are performed by the Python 80 | package 81 | 82 | gender_guesser 83 | . It uses name/gender-frequencies from a larger text corpus. 84 | mostly male, mostly female, andy 85 | and unknown classifications are ignored. 86 | To decrease the noise the daily values have been averaged by a daily 87 | average over a 180 day window (dataframe.rolling('180d').mean()). 88 |

89 |

90 | Ideally these graphs would 91 | 92 | include non-binary folks. Doing this is a bit trickier. It is thus a 93 | work in progress. 94 | 95 |

96 |
97 |
98 |
99 |
100 | 101 |
102 |
103 | {%if 'gender_rt' in graphs%} 104 |
105 | {%else%} 106 | {% include 'tweet_display/partials/graph_in_making.html' %} 107 | {%endif%} 108 |
109 |
110 |
111 |
112 |

The gender breakdown for retweets

113 |
114 |
115 |

116 | Even more interesting than whether replying to people might be gendered 117 | 118 | can be the question which voices are being amplified 119 | . 120 | On Twitter a good indicator of amplification are retweets. These can 121 | be gender balanced or show biases, similarly to the replies to other users. 122 |

123 |

124 | The graph shows you the number of retweets to Twitter users 125 | that are classified as either male or female. The 126 | classifications are again predictions made by the Python 127 | package 128 | 129 | gender_guesser 130 | . 131 | To decrease the noise the daily values have again been averaged by a daily 132 | average over a 180 day window (dataframe.rolling('180d').mean()). 133 |

134 |

135 | Ideally these graphs would 136 | 137 | include non-binary folks. Doing this is a bit trickier. It is thus a 138 | work in progress. 139 | 140 |

141 |
142 |
143 |
144 |
145 | {% include 'tweet_display/partials/graph_buttons.html' %} 146 | 147 | 209 | 210 | {% endblock %} 211 | -------------------------------------------------------------------------------- /tweet_display/templates/tweet_display/location.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 | 5 | {% if oh_id == current_user_oh_id %} 6 | 7 | {% include 'tweet_display/partials/graph_status.html' %} 8 | 9 | {%endif%} 10 | 11 | 30 | 31 |
32 |
33 | {%if 'heatmap' in graphs%} 34 |
35 | {%else%} 36 | {% include 'tweet_display/partials/graph_in_making.html' %} 37 | {%endif%} 38 |
39 |
40 |
41 |
42 |

Where have I been tweeting?

43 |
44 |
45 |

46 | Tweets come with a lot of metadata these days. Amongst the most powerful 47 | and/or interesting kinds is the location data that many of us attach 48 | to our tweets. Twitter and Third-party Twitter clients allow 49 | the use of exact latitude/longitude for the location-data that is 50 | being associated with tweets. Thanks to this a Twitter archive can give 51 | a good overview of where all those tweets have been sent from. Be it 52 | tweets from our sofa, out of the office 53 | or while traveling and posting the holiday snapshots. 54 |

55 |

56 | This map visualizes all tweets out of the Twitter archive that have a latitude and longitude 57 | attached to them. It converts them into a simple heatmap. Regions with many tweets 58 | are shown in red, while regions with less tweets are given in blue. The exact scale 59 | depends on the zoom level. Zoom in to see exactly where you sent all 60 | these posts from. Can you identify the main places you post from? 61 |

62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 | {%if 'timeline' in graphs%} 70 |
71 | {%else%} 72 | {% include 'tweet_display/partials/graph_in_making.html' %} 73 | {%endif%} 74 |
75 |
76 |
77 |
78 |

Tweet locations over time

79 |
80 |
81 |

82 | Not all tweets come with geo locations enabled. But all geotagged 83 | tweets come with a time stamp of when they have been posted. This allows us 84 | to add another dimension to these maps: The time when they were posted, enabling 85 | us to have a movie that can serve as a proxy of movement across the globe. 86 |

87 |

88 | Instead of converting the geotagged tweets into a heatmap, this map shows the 89 | movement profile by turning a Twitter archive into an interactive map 90 | visualization. Press play to start the movement. Each geotagged tweet will appear as a 91 | pink blip on the map on the day it was posted. To make them more visible each tweet will stay 92 | on the map for 28 days. 93 |

94 |
95 |
96 |
97 |

98 | {% include 'tweet_display/partials/graph_buttons.html' %} 99 | 100 | 101 | 102 | 113 | 114 | 115 | 116 | 152 | {% endblock %} 153 | -------------------------------------------------------------------------------- /tweet_display/templates/tweet_display/partials/graph_buttons.html: -------------------------------------------------------------------------------- 1 |
2 | 7 |
8 | -------------------------------------------------------------------------------- /tweet_display/templates/tweet_display/partials/graph_in_making.html: -------------------------------------------------------------------------------- 1 | 17 | -------------------------------------------------------------------------------- /tweet_display/templates/tweet_display/partials/graph_status.html: -------------------------------------------------------------------------------- 1 | 12 | -------------------------------------------------------------------------------- /tweet_display/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/tweet_display/tests/__init__.py -------------------------------------------------------------------------------- /tweet_display/tests/tests_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from django.test import TestCase 4 | 5 | from ..read_data import create_main_dataframe, read_files 6 | from ..analyse_data import create_heatmap, \ 7 | create_hourly_stats, \ 8 | create_overall, \ 9 | create_timeline, \ 10 | create_top_replies, \ 11 | create_tweet_types 12 | 13 | 14 | class DataTestCase(TestCase): 15 | """ 16 | Test cases for data loading and analysis. Uses only two months, 17 | December 2016 and January 2017, from test_archive.zip. 18 | """ 19 | 20 | def setUp(self): 21 | file_name = 'test_archive_2016_2017_2_months.zip' 22 | current_dir = os.path.dirname(os.path.realpath('__file__')) 23 | test_file = os.path.join(current_dir, file_name) 24 | 25 | new_filename = 'new_tweet_subset.js' 26 | new_test_file = os.path.join(current_dir, new_filename) 27 | 28 | self.df = create_main_dataframe(test_file) 29 | self.new_df = read_files(open(new_test_file, 'r'), 'json')[0] 30 | self.new_df = self.new_df.sort_values('utc_time', ascending=False) 31 | self.new_df = self.new_df.set_index('utc_time') 32 | self.new_df = self.new_df.replace(to_replace={ 33 | 'url': {False: None}, 34 | 'hashtag': {False: None}, 35 | 'media': {False: None} 36 | }) 37 | 38 | def test_create_hourly_stats(self): 39 | stats = create_hourly_stats(self.df) 40 | self.assertEquals(stats.shape, (20, 4)) 41 | 42 | 43 | def test_create_heatmap(self): 44 | heatmap = create_heatmap(self.df) 45 | self.assertEquals(heatmap.shape, (469, 2)) 46 | new_heatmap = create_heatmap(self.new_df) 47 | self.assertEquals(new_heatmap.shape, (0, 2)) 48 | 49 | 50 | def test_create_overall(self): 51 | overall = create_overall(self.df) 52 | self.assertEquals(overall.shape, (62, 2)) 53 | new_overall = create_overall(self.new_df) 54 | self.assertEquals(new_overall.shape, (4, 2)) 55 | 56 | def test_create_timeline(self): 57 | timeline = create_timeline(self.df) 58 | self.assertEquals(len(timeline), 78186) 59 | 60 | def test_top_replies(self): 61 | replies = create_top_replies(self.df) 62 | self.assertEquals(replies.shape, (2, 6)) 63 | 64 | def test_top_types(self): 65 | types = create_tweet_types(self.df) 66 | self.assertEquals(types.shape, (62, 9)) 67 | -------------------------------------------------------------------------------- /tweet_display/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | from django.views.generic.base import RedirectView 3 | from . import views 4 | 5 | 6 | urlpatterns = [ 7 | url(r'^index/$', views.index, name='index'), 8 | url(r'^index/(?P\w+)/$', 9 | views.index, 10 | name='index_w_id'), 11 | url(r'^location/$', views.location, name='location'), 12 | url(r'^location/(?P\w+)/$', views.location, name='location_w_id'), 13 | url(r'^interactions/$', views.interactions, name='interactions'), 14 | url(r'^interactions/(?P\w+)/$', 15 | views.interactions, 16 | name='interactions_w_id'), 17 | url(r'^gender_rt/(?P\w+)/$', views.gender_rt, name='gender_rt'), 18 | url(r'^gender_reply/(?P\w+)/$', 19 | views.gender_reply, 20 | name='gender_reply'), 21 | url(r'^hourly_tweets/(?P\w+)/$', 22 | views.hourly_tweets, 23 | name='hourly_tweets'), 24 | url(r'^tweet_types/(?P\w+)/$', 25 | views.tweet_types, 26 | name='tweet_types'), 27 | url(r'^top_replies/(?P\w+)/$', 28 | views.top_replies, 29 | name='top_replies'), 30 | url(r'^heatmap/(?P\w+)/$', views.heatmap, name='heatmap'), 31 | url(r'^timeline/(?P\w+)/$', views.timeline, name='timeline'), 32 | url(r'^overall_tweets/(?P\w+)/$', 33 | views.overall_tweets, 34 | name='overall_tweets'), 35 | url(r'^$', RedirectView.as_view(pattern_name='index', permanent=False)) 36 | ] 37 | -------------------------------------------------------------------------------- /tweet_display/views.py: -------------------------------------------------------------------------------- 1 | from django.http import HttpResponse 2 | from django.shortcuts import render, redirect 3 | 4 | from .models import Graph 5 | from .helper import grant_access, get_current_user, check_graphs 6 | 7 | # Create your views here. 8 | 9 | 10 | def index(request, oh_id=None): 11 | context = {'section': 'general', 'graph_section': True} 12 | if oh_id is not None: 13 | context['link_target'] = oh_id 14 | if grant_access(request, oh_id): 15 | context['oh_id'] = grant_access(request, oh_id) 16 | context['current_user_oh_id'] = get_current_user(request) 17 | graphs = ['overall_tweets', 'hourly_tweets', 'tweet_types'] 18 | context['graphs'] = check_graphs(graphs, context['oh_id']) 19 | return render(request, 'tweet_display/index.html', context) 20 | else: 21 | return redirect('/users/') 22 | 23 | 24 | def location(request, oh_id=None): 25 | context = {'section': 'location', 'graph_section': True} 26 | if oh_id is not None: 27 | context['link_target'] = oh_id 28 | if grant_access(request, oh_id): 29 | context['oh_id'] = grant_access(request, oh_id) 30 | context['current_user_oh_id'] = get_current_user(request) 31 | graphs = ['timeline', 'heatmap'] 32 | context['graphs'] = check_graphs(graphs, context['oh_id']) 33 | return render(request, 'tweet_display/location.html', context) 34 | else: 35 | return redirect('/users/') 36 | 37 | 38 | def interactions(request, oh_id=None): 39 | context = {'section': 'interactions', 'graph_section': True} 40 | if oh_id is not None: 41 | context['link_target'] = oh_id 42 | if grant_access(request, oh_id): 43 | context['oh_id'] = grant_access(request, oh_id) 44 | context['current_user_oh_id'] = get_current_user(request) 45 | graphs = ['gender_rt', 'gender_reply', 'top_replies'] 46 | context['graphs'] = check_graphs(graphs, context['oh_id']) 47 | return render(request, 'tweet_display/interactions.html', context) 48 | else: 49 | return redirect('/users/') 50 | 51 | 52 | def gender_rt(request, oh_id): 53 | if grant_access(request, oh_id): 54 | graph = Graph.objects.get(graph_type__exact='gender_rt', 55 | open_humans_member__oh_id=oh_id) 56 | return HttpResponse(graph.graph_data, content_type='application/json') 57 | else: 58 | return redirect('/users/') 59 | 60 | 61 | def gender_reply(request, oh_id): 62 | if grant_access(request, oh_id): 63 | graph = Graph.objects.get(graph_type__exact='gender_reply', 64 | open_humans_member__oh_id=oh_id) 65 | return HttpResponse(graph.graph_data, content_type='application/json') 66 | else: 67 | return redirect('/users') 68 | 69 | 70 | def hourly_tweets(request, oh_id): 71 | if grant_access(request, oh_id): 72 | graph = Graph.objects.get(graph_type__exact='hourly_tweets', 73 | open_humans_member__oh_id=oh_id) 74 | return HttpResponse(graph.graph_data, content_type='application/json') 75 | else: 76 | return redirect('/users/') 77 | 78 | 79 | def tweet_types(request, oh_id): 80 | if grant_access(request, oh_id): 81 | graph = Graph.objects.get(graph_type__exact='tweet_types', 82 | open_humans_member__oh_id=oh_id) 83 | return HttpResponse(graph.graph_data, content_type='application/json') 84 | else: 85 | return redirect('/users/') 86 | 87 | 88 | def top_replies(request, oh_id): 89 | if grant_access(request, oh_id): 90 | graph = Graph.objects.get(graph_type__exact='top_replies', 91 | open_humans_member__oh_id=oh_id) 92 | return HttpResponse(graph.graph_data, content_type='application/json') 93 | else: 94 | return redirect('/users/') 95 | 96 | 97 | def heatmap(request, oh_id): 98 | if grant_access(request, oh_id): 99 | graph = Graph.objects.get(graph_type__exact='heatmap', 100 | open_humans_member__oh_id=oh_id) 101 | return HttpResponse(graph.graph_data, content_type='application/json') 102 | else: 103 | return redirect('/users/') 104 | 105 | 106 | def timeline(request, oh_id): 107 | if grant_access(request, oh_id): 108 | graph = Graph.objects.get(graph_type__exact='timeline', 109 | open_humans_member__oh_id=oh_id) 110 | return HttpResponse(graph.graph_data, content_type='application/json') 111 | else: 112 | return redirect('/users/') 113 | 114 | 115 | def overall_tweets(request, oh_id): 116 | if grant_access(request, oh_id): 117 | graph = Graph.objects.get(graph_type__exact='overall_tweets', 118 | open_humans_member__oh_id=oh_id) 119 | return HttpResponse(graph.graph_data, content_type='application/json') 120 | else: 121 | return redirect('/users/') 122 | -------------------------------------------------------------------------------- /twitteranalyser/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals 2 | 3 | # This will make sure the app is always imported when 4 | # Django starts so that shared_task will use this app. 5 | from .celery import app as celery_app 6 | 7 | __all__ = ['celery_app'] 8 | -------------------------------------------------------------------------------- /twitteranalyser/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class TwitteranalyserConfig(AppConfig): 5 | name = 'twitteranalyser' 6 | -------------------------------------------------------------------------------- /twitteranalyser/celery.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals 2 | import os 3 | from celery import Celery 4 | 5 | 6 | # set the default Django settings module for the 'celery' program. 7 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'twitteranalyser.settings') 8 | 9 | app = Celery('proj') 10 | 11 | # Using a string here means the worker doesn't have to serialize 12 | # the configuration object to child processes. 13 | # - namespace='CELERY' means all celery-related configuration keys 14 | # should have a `CELERY_` prefix. 15 | app.config_from_object('django.conf:settings', namespace='CELERY') 16 | app.conf.update(CELERY_BROKER_URL=os.environ['REDIS_URL'], 17 | CELERY_RESULT_BACKEND=os.environ['REDIS_URL']) 18 | 19 | # Load task modules from all registered Django app configs. 20 | app.autodiscover_tasks() 21 | # app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) 22 | 23 | 24 | @app.task(bind=True) 25 | def debug_task(self): 26 | print('Request: {0!r}'.format(self.request)) 27 | -------------------------------------------------------------------------------- /twitteranalyser/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for twitteranalyser project. 3 | 4 | Generated by 'django-admin startproject' using Django 1.11.3. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/1.11/ref/settings/ 11 | """ 12 | 13 | import os 14 | import dj_database_url 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | # Open Humans configuration 19 | OH_CLIENT_ID = os.getenv('OH_CLIENT_ID') 20 | OH_CLIENT_SECRET = os.getenv('OH_CLIENT_SECRET') 21 | OH_ACTIVITY_PAGE = os.getenv('OH_ACTIVITY_PAGE') 22 | OH_REDIRECT_URI = os.getenv('OH_REDIRECT_URI', '') 23 | OH_BASE_URL = 'https://www.openhumans.org' 24 | # OH_BASE_URL = 'https://staging.openhumans.org' 25 | APP_BASE_URL = os.getenv('APP_BASE_URL', 'http://127.0.0.1:5000/users') 26 | ON_HEROKU = os.getenv('ON_HEROKU', False) 27 | INACTIVE_PROJECT = True if os.getenv('INACTIVE_PROJECT', "True").lower() == 'true' else False 28 | 29 | 30 | # Quick-start development settings - unsuitable for production 31 | # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ 32 | 33 | # SECURITY WARNING: keep the secret key used in production secret! 34 | SECRET_KEY = os.getenv('SECRET_KEY', 'foo') 35 | # SECRET_KEY = ')r46i@!_#_j!!+xe)2+kbj#gxmrzwj&g24^2ke&e1o1nh=9zof' 36 | 37 | # SECURITY WARNING: don't run with debug turned on in production! 38 | DEBUG = False if os.getenv('DEBUG', '').lower() == 'false' else True 39 | 40 | ALLOWED_HOSTS = ['*'] 41 | 42 | 43 | # Application definition 44 | 45 | INSTALLED_APPS = [ 46 | 'django.contrib.admin', 47 | 'django.contrib.auth', 48 | 'django.contrib.contenttypes', 49 | 'django.contrib.sessions', 50 | 'django.contrib.messages', 51 | 'django.contrib.staticfiles', 52 | 'tweet_display.apps.TweetDisplayConfig', 53 | 'users.apps.UsersConfig', 54 | 'twitteranalyser.apps.TwitteranalyserConfig' 55 | ] 56 | 57 | MIDDLEWARE = [ 58 | 'django.middleware.security.SecurityMiddleware', 59 | 'whitenoise.middleware.WhiteNoiseMiddleware', 60 | 'django.contrib.sessions.middleware.SessionMiddleware', 61 | 'django.middleware.common.CommonMiddleware', 62 | 'django.middleware.csrf.CsrfViewMiddleware', 63 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 64 | 'django.contrib.messages.middleware.MessageMiddleware', 65 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 66 | ] 67 | 68 | ROOT_URLCONF = 'twitteranalyser.urls' 69 | 70 | TEMPLATES = [ 71 | { 72 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 73 | 'DIRS': [], 74 | 'APP_DIRS': True, 75 | 'OPTIONS': { 76 | 'context_processors': [ 77 | 'django.template.context_processors.debug', 78 | 'django.template.context_processors.request', 79 | 'django.contrib.auth.context_processors.auth', 80 | 'django.contrib.messages.context_processors.messages', 81 | ], 82 | }, 83 | }, 84 | ] 85 | 86 | WSGI_APPLICATION = 'twitteranalyser.wsgi.application' 87 | 88 | 89 | # Database 90 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases 91 | 92 | 93 | DATABASES = { 94 | 'default': { 95 | 'ENGINE': 'django.db.backends.sqlite3', 96 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 97 | } 98 | } 99 | 100 | if ON_HEROKU: 101 | db_from_env = dj_database_url.config(conn_max_age=500) 102 | DATABASES = {'default': db_from_env} 103 | 104 | # Password validation 105 | # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators 106 | 107 | AUTH_PASSWORD_VALIDATORS = [ 108 | { 109 | 'NAME': 'django.contrib.auth.password_validation.' 110 | 'UserAttributeSimilarityValidator', 111 | }, 112 | { 113 | 'NAME': 'django.contrib.auth.password_validation.' 114 | 'MinimumLengthValidator', 115 | }, 116 | { 117 | 'NAME': 'django.contrib.auth.password_validation.' 118 | 'CommonPasswordValidator', 119 | }, 120 | { 121 | 'NAME': 'django.contrib.auth.password_validation.' 122 | 'NumericPasswordValidator', 123 | }, 124 | ] 125 | 126 | 127 | # Internationalization 128 | # https://docs.djangoproject.com/en/1.11/topics/i18n/ 129 | 130 | LANGUAGE_CODE = 'en-us' 131 | 132 | TIME_ZONE = 'UTC' 133 | 134 | USE_I18N = True 135 | 136 | USE_L10N = True 137 | 138 | USE_TZ = True 139 | 140 | # Static files (CSS, JavaScript, Images) 141 | # https://docs.djangoproject.com/en/1.9/howto/static-files/ 142 | 143 | 144 | STATIC_URL = '/static/' 145 | 146 | STATICFILES_DIRS = [ 147 | os.path.join(BASE_DIR, 'static'), 148 | ] 149 | 150 | STATIC_ROOT = os.path.join(BASE_DIR, 'staticfiles') 151 | 152 | # Simplified static file serving. 153 | # https://warehouse.python.org/project/whitenoise/ 154 | 155 | STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage' 156 | -------------------------------------------------------------------------------- /twitteranalyser/templates/twitteranalyser/about.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 |
5 |
6 |
7 |
8 |

Who's running this?

9 |
10 |
11 |
12 |
13 | {% load static %} 14 | 15 |
16 |
17 |

18 | The site is hosted and run by 19 | Bastian Greshake Tzovaras. When he is not diving in the data of Twitter archives 20 | he is tweeting himself at 21 | @gedankenstuecke, 22 | busy as Director of Research of 23 | Open Humans 24 | and co-founder of 25 | openSNP. 26 |

27 |

28 | If you want to be in touch with Bastian you can reach him at bastian@openhumans.org 29 |

30 |
31 |
32 |
33 |
34 |
35 |
36 |

You did this all alone?

37 |
38 |
39 |

40 | Good point. I really didn't. There were people who directly and indirectly helped in making them. 41 | Because of this Thanks go to: 42 |

43 |

44 | Madeleine Price Ball, Executive 45 | Director of Open Humans, helped with implementing the file-upload code that pushes the uploaded 46 | data to Open Humans. 47 |

48 |

49 | Much of the map-based JavaScript stuff that is implemented here are remnants of 50 | a great workshop that Joey Lee gave 51 | at Mozfest 2016! 52 |

53 |
54 |
55 |
56 |
57 |

You missed a cool visualization!

58 |
59 |
60 |

61 | I'm pretty sure that I missed something cool that could be done with Twitter Archives. 62 | You can always let me know what I missed. But even better: This whole thing is open source. So you can 63 | open an issue on GitHub (thanks for that) or even directly contribute a pull request that adds the data 64 | visualization that you're missing! 65 |

66 |

67 | Even better: If you have a great idea that would substantially extend the TwArχiv 68 | you can even be paid for doing so: 69 | Open Humans offers grants of $5,000 for the creation of projects. 70 | What I personally would love to see: Doing a Tweet-based sentiment analysis over time! 71 |

72 |
73 |
74 |
75 |
76 |

FAQ

77 |
78 |
79 |

Will my Twitter archive be public if I use TwArχiv?

80 |
81 |
82 |

83 | The short answer: No. TwArχiv uses Open Humans 84 | to store the data. By default Open Humans will not make your data publicly available 85 | and will not share the data with anyone. 86 |

87 |

88 | Having said this: Open Humans gives you the 89 | option to make your data publicly available. To do this you can head over to 90 | Open Humans. On the page you will find the option 91 | to make your data fully public. 92 |

93 |
94 |
95 |
96 |
97 |

Will my visualizations that TwArχiv generates be public?

98 |
99 |
100 |

101 | The same short answer: No. By default no one but you can access the graphs you see after your 102 | login. But if you want to share them with your closest 3,000 friends on Twitter you can do so. 103 | Just head to your settings and they are publicly available. 104 |

105 |

106 | If you do this, your 107 | graphs will be public visualizations 108 | right here on the website. You can also choose to make the 109 | graphs private again at any point later on. Making your graphs on TwArχiv 110 | public or private does not relate to your Archive sharing settings on Open Humans. 111 |

112 |
113 |
114 |
115 |
116 |

Why does TwArχiv not generate my graphs?!

117 |
118 |
119 |

120 | Reading in all of your archive might take a while, depending on how prolific a Twitter user you are. 121 | Bastian's archive contains around 100,000 tweets and it easily takes some ~15 minutes to read in all that data 122 | and generate the graphs. Just have some patience and come back after sending some more tweets. 123 |

124 |
125 |
126 |
127 |
128 |

I waited like, a really long time. But there are still no graphs!

129 |
130 |
131 |

132 | Sometimes things go wrong when reading in the files, sorry for that (also, did you follow the instructions to the letter?). 133 | If you suspect that this is the case please head to your settings and trigger a 134 | reprocessing of your data. 135 |

136 |

137 | That didn't help either? Please email Bastian and you can jointly investigate what's going on. 138 |

139 |
140 |
141 |
142 |
143 |

I tweeted a lot since the data upload. Can I update my data?

144 |
145 |
146 |

147 | Absolutely. If you want to update your archive for a newer version you can just head to 148 | your settings and upload a newer copy. We will automatically 149 | delete the old files and replace everything with newer copies. 150 |

151 |
152 |
153 |
154 |
155 | 156 | {% endblock %} 157 | -------------------------------------------------------------------------------- /twitteranalyser/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/twitteranalyser/tests/__init__.py -------------------------------------------------------------------------------- /twitteranalyser/tests/tests_views.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase, Client 2 | from django.conf import settings 3 | 4 | 5 | class AboutTestCase(TestCase): 6 | """ 7 | Test cases for the about view function. 8 | """ 9 | 10 | def setUp(self): 11 | """ 12 | Set up the app for following test. 13 | """ 14 | settings.DEBUG = True 15 | settings.OH_ACTIVITY_PAGE = 'foobar.com' 16 | 17 | def test_about(self): 18 | """ 19 | Tests the about view function. 20 | """ 21 | c = Client() 22 | response = c.get('/about/') 23 | self.assertEqual(response.status_code, 200) 24 | self.assertEqual(response.context['section'], 'about') 25 | self.assertEqual(response.context['oh_proj_page'], 'foobar.com') 26 | self.assertTemplateUsed(response, 'twitteranalyser/about.html') 27 | -------------------------------------------------------------------------------- /twitteranalyser/urls.py: -------------------------------------------------------------------------------- 1 | """twitteranalyser URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.11/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import include, url 17 | from django.contrib import admin 18 | from django.views.generic.base import RedirectView 19 | from . import views 20 | 21 | urlpatterns = [ 22 | url(r'^tweet_display/', include('tweet_display.urls')), 23 | url(r'^users/', include('users.urls')), 24 | url(r'^admin/', admin.site.urls), 25 | url(r'^$', RedirectView.as_view(pattern_name='user_home', 26 | permanent=False)), 27 | url(r'^about/$', views.about, name='about'), 28 | 29 | ] 30 | -------------------------------------------------------------------------------- /twitteranalyser/views.py: -------------------------------------------------------------------------------- 1 | # from django.http import HttpResponse 2 | from django.shortcuts import render # redirect 3 | from django.conf import settings 4 | 5 | 6 | def about(request): 7 | context = {'section': 'about', 8 | 'oh_proj_page': settings.OH_ACTIVITY_PAGE} 9 | return render(request, 'twitteranalyser/about.html', context) 10 | -------------------------------------------------------------------------------- /twitteranalyser/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for twitteranalyser project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "twitteranalyser.settings") 14 | 15 | application = get_wsgi_application() 16 | -------------------------------------------------------------------------------- /users/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/users/__init__.py -------------------------------------------------------------------------------- /users/admin.py: -------------------------------------------------------------------------------- 1 | # Register your models here. 2 | -------------------------------------------------------------------------------- /users/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class UsersConfig(AppConfig): 5 | name = 'users' 6 | -------------------------------------------------------------------------------- /users/forms.py: -------------------------------------------------------------------------------- 1 | from django import forms 2 | 3 | 4 | class UploadFileForm(forms.Form): 5 | file = forms.FileField() 6 | -------------------------------------------------------------------------------- /users/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.3 on 2017-12-12 21:50 3 | from __future__ import unicode_literals 4 | 5 | from django.conf import settings 6 | from django.db import migrations, models 7 | import django.db.models.deletion 8 | 9 | 10 | class Migration(migrations.Migration): 11 | 12 | initial = True 13 | 14 | dependencies = [ 15 | migrations.swappable_dependency(settings.AUTH_USER_MODEL), 16 | ] 17 | 18 | operations = [ 19 | migrations.CreateModel( 20 | name='OpenHumansMember', 21 | fields=[ 22 | ('oh_id', models.CharField(max_length=16, primary_key=True, serialize=False, unique=True)), 23 | ('access_token', models.CharField(max_length=256)), 24 | ('refresh_token', models.CharField(max_length=256)), 25 | ('token_expires', models.DateTimeField()), 26 | ('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), 27 | ], 28 | ), 29 | ] 30 | -------------------------------------------------------------------------------- /users/migrations/0002_openhumansmember_public.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.3 on 2017-12-12 23:59 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('users', '0001_initial'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='openhumansmember', 17 | name='public', 18 | field=models.BooleanField(default=False), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /users/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/users/migrations/__init__.py -------------------------------------------------------------------------------- /users/models.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | import os 3 | 4 | import arrow 5 | from django.conf import settings 6 | from django.contrib.auth.models import User 7 | from django.db import models 8 | import requests 9 | 10 | OH_CLIENT_ID = os.getenv('OH_CLIENT_ID', '') 11 | OH_CLIENT_SECRET = os.getenv('OH_CLIENT_SECRET', '') 12 | 13 | 14 | def make_unique_username(base): 15 | """ 16 | Ensure a unique username. Probably this never actually gets used. 17 | """ 18 | try: 19 | User.objects.get(username=base) 20 | except User.DoesNotExist: 21 | return base 22 | n = 2 23 | while True: 24 | name = base + str(n) 25 | try: 26 | User.objects.get(username=name) 27 | n += 1 28 | except User.DoesNotExist: 29 | return name 30 | 31 | 32 | class OpenHumansMember(models.Model): 33 | """ 34 | Store OAuth2 data for Open Humans member. 35 | A User account is created for this Open Humans member. 36 | """ 37 | user = models.OneToOneField(User, on_delete=models.CASCADE) 38 | oh_id = models.CharField(max_length=16, primary_key=True, unique=True) 39 | access_token = models.CharField(max_length=256) 40 | refresh_token = models.CharField(max_length=256) 41 | token_expires = models.DateTimeField() 42 | public = models.BooleanField(default=False) 43 | 44 | @staticmethod 45 | def get_expiration(expires_in): 46 | return (arrow.now() + timedelta(seconds=expires_in)).format() 47 | 48 | @classmethod 49 | def create(cls, oh_id, access_token, refresh_token, expires_in): 50 | new_username = make_unique_username( 51 | base='{}_openhumans'.format(oh_id)) 52 | new_user = User(username=new_username) 53 | new_user.save() 54 | oh_member = cls( 55 | user=new_user, 56 | oh_id=oh_id, 57 | access_token=access_token, 58 | refresh_token=refresh_token, 59 | token_expires=cls.get_expiration(expires_in)) 60 | return oh_member 61 | 62 | def __str__(self): 63 | return "".format( 64 | self.oh_id) 65 | 66 | def get_access_token(self): 67 | """ 68 | Return access token. Refresh first if necessary. 69 | """ 70 | # Also refresh if nearly expired (less than 60s remaining). 71 | delta = timedelta(seconds=60) 72 | if arrow.get(self.token_expires) - delta < arrow.now(): 73 | self._refresh_tokens() 74 | return self.access_token 75 | 76 | def _refresh_tokens(self): 77 | """ 78 | Refresh access token. 79 | """ 80 | response = requests.post( 81 | 'https://www.openhumans.org/oauth2/token/', 82 | data={ 83 | 'grant_type': 'refresh_token', 84 | 'refresh_token': self.refresh_token}, 85 | auth=requests.auth.HTTPBasicAuth( 86 | settings.OH_CLIENT_ID, settings.OH_CLIENT_SECRET)) 87 | if response.status_code == 200: 88 | data = response.json() 89 | self.access_token = data['access_token'] 90 | self.refresh_token = data['refresh_token'] 91 | self.token_expires = self.get_expiration(data['expires_in']) 92 | self.save() 93 | -------------------------------------------------------------------------------- /users/templates/users/complete.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 | 5 |

One last step: Uploading your data

6 |

7 | Thank you! We have your authorization from Open Humans. 8 |

9 |
10 |
11 |

12 | To get your Twitter archive … 13 |

14 |
    15 |
  1. Go to your Twitter Data page
  2. 16 |
  3. Enter your password and request your data
  4. 17 |
  5. Check your email – you should get a download link for your data!
  6. 18 |
19 |

20 | To upload your tweets… 21 |

22 |
    23 |
  1. Downloaded the archive you requested as described above
  2. 24 |
  3. Open this zipped archive and extract the tweet.js file from it.
  4. 25 |
  5. Optionally: If your tweet.js file is very big you can make a new zip file only containing that
  6. 26 |
  7. You can now upload the tweet.js to Open Humans account using the form below.
  8. 27 |
28 | After all of this your data will be analyzed! Depending on your tweet volume and resulting archive size the upload might take a while. 29 |

30 | What's in the tweet.js file I'm supposed to upload? 31 |

32 | The file contains all of your tweets along with metadata like geolocation, 33 | whether it was a reply and to whom, links etc. 34 | It does not contain your direct messages, videos, images or anything else. 35 |
36 |
37 |
38 |
39 | {% include 'users/partials/upload_form.html' %} 40 |
41 |
42 |

43 | You can also visit this project's page on Open Humans 44 | to see any data you've previously uploaded. 45 |

46 |
47 |
48 | 49 | {% endblock %} 50 | -------------------------------------------------------------------------------- /users/templates/users/dashboard.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 | 5 |

Welcome to the TwArχiv Analyzer

6 | 7 | 8 |
9 |
10 |

Your Graphs

11 | 12 |
13 |
14 |

General 📈

15 |
16 |
17 | General graphs, 18 | show how the daily volume of tweets (and their type) changes over time, 19 | as well as information on which time of the day tweets are sent. 20 |
21 |
22 | 23 |
24 |
25 |

Interactions 🤝

26 |
27 |
28 | Look at your interactions 29 | on Twitter. To whom do you reply the most? And how is your gender balance when it 30 | comes to replies and retweets? 31 |
32 |
33 | 34 |
35 |
36 |

Locations 🗺

37 |
38 |
39 | Look at the locations 40 | from which you have tweeted. From where on the globe do you tweet most? 41 | Your sofa? Your office? Some obscure mountain top? 42 |
43 |
44 | 45 |
46 | 47 |
48 |

Your Settings

49 |
50 |
51 |

Public Graphs 52 |

53 |
54 |
55 | {% if oh_member.public %} 56 |
57 | 58 | Private 59 |
60 |

61 | Right now your graphs are Public 🔗. 62 | This means you can share 63 | the links to your graphs with friends and they can look at them. 64 | You can change this setting at any time. By the way: Making the 65 | graphs public does not make your Twitter archive public! 66 |

67 | {% else %} 68 |
69 | Public 70 | 71 |
72 |

73 | Right now your graphs are Private 🚫. 74 | This means you can't share these links with others. You can change this setting at any time. 75 | By the way: Making the graphs public does not make your Twitter archive public! 76 |

77 | {%endif%} 78 |
79 |
80 | 81 |
82 |
83 |

Upload{% if has_data %} new{% endif %}

84 |
85 |
86 |

87 | {% if has_data %} 88 | You want to replace the tweets that are stored on Open Humans with a newer copy? 89 | {% else %} 90 | We first need a copy of your tweets loaded on Open Humans to do our analysis. 91 | {% endif %} 92 | Just upload the tweet.js file from your Twitter data export with form above and you're good to go. 93 |

94 | {% include 'users/partials/upload_form.html' %} 95 |
96 |
97 | 98 | {% if has_data %} 99 |
100 |
101 |

Regenerate graphs 102 |

103 |
104 |
105 |
106 | {% csrf_token %} 107 | 108 |
109 |

110 | Sometimes the generation of the graphs fails (sorry for that). Press the button above 111 | and we will use your currently uploaded archive to generate the graphs anew, hopefully 112 | fixing your problems. 113 |

114 |
115 |
116 | {% endif %} 117 | 118 |
119 |
120 |

Delete all graphs

121 |
122 |
123 | Delete graphs 124 |

125 | You'd rather not have all of these graphs stored on our website? Fair enough, 126 | press the button above and we'll remove your TwArχiv account 127 | along with all graphs. (A reminder: This does not delete your Open Humans 128 | account and/or data stored there.) 129 |

130 |
131 |
132 |
133 |
134 | 135 | {%endblock%} 136 | -------------------------------------------------------------------------------- /users/templates/users/index.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 | 5 |
6 |

Welcome to the TwArχiv.

7 |

8 | Upload your Twitter archive to Open Humans and 9 | we will analyze it so that you can learn about 10 | your tweet behaviour. 11 |

12 |
13 | 14 |
15 |
16 |

How it works

17 |
    18 |
  1. Request your Twitter archive from the Twitter website
    19 | Visit the 20 | 21 | Twitter data page 22 | and enter your password to request your data. Generating the 23 | data will take Twitter some time. Once it is ready you will 24 | get an email from Twitter with a link where you can download the data. 25 |
  2. 26 |
  3. Log in or create an Open Humans account
    27 | You can upload your Twitter archive into this account once Twitter 28 | has created it. 29 |
  4. 30 |
  5. Authorize the TwArχiv in Open Humans
    31 | This authorizes us to deposit your archive into your Open Humans account.
  6. 32 |
  7. Extract your Tweets from the archive
    33 | Open the zipped archive file that you downloaded from Twitter and 34 | extract the tweet.js file from it. 35 |
  8. 36 |
  9. Upload your tweets.
    37 | You will be redirected back to the TwArχiv and can then 38 | upload your archive data. 39 | You'll be able to 40 | access & manage your data on 41 | Open Humans. 42 |
  10. 43 |
44 | {%if inactive_project%} 45 | 51 | {%else%} 52 | 54 | Get started / Login 55 | 56 | {%endif%} 57 |
58 |
59 |

FAQ

60 |
61 |
62 |

Can I see an example of what I'll get?

63 |
64 |
65 |

66 | You bet you can! Head to our already public visualizations 67 | to see how the archives of other users have been visualized. 68 | You will get a general analysis of your tweet behavior, 69 | a breakdown of your interactions over time as well as maps that show from where 70 | you tweet. 71 |

72 |
73 |
74 |
75 |
76 |

Will my archive data be public?

77 |
78 |
79 |

80 | The short answer: No. The longer answer: No, unless you decide that 81 | you want to make this data public. If that is the case then you can head over to 82 | Open Humans. 83 | and make the full archive data publicly available. 84 |

85 |
86 |
87 |
88 |
89 |

Will my visualizations be publicly available?

90 |
91 |
92 |

93 | The same short answer: No. The longer answer: No, unless you decide that 94 | you want to make this data public. If you do so, the 95 | graphs will be public visualizations 96 | right here on the website. 97 | You will also get URLs that allow you to share the visualizations with others. 98 |

99 |

100 | You can also choose to make the 101 | graphs private again at any point later on. By the way: You can make your graphs publicly 102 | available without publicly sharing your full archive. 103 |

104 |
105 |
106 |
107 |
108 | 109 | 110 | {%endblock%} 111 | -------------------------------------------------------------------------------- /users/templates/users/partials/upload_form.html: -------------------------------------------------------------------------------- 1 | 67 | {% csrf_token %} 68 |
69 |
70 | {% if form.errors %} 71 |
72 | Errors
73 | {{ form.errors }} 74 |
75 | {% endif %} 76 | {% csrf_token %} 77 |

78 | Upload your tweet.js file
79 | (either uncompressed or as a zip file)
80 | 81 |

82 |

83 |

84 | The analysis after the upload can take a while. We will message you through 85 | the Open Humans system once your graphs are ready. 86 |

87 |
88 |

89 | 90 | The Javascript uploader has failed for some users in the past. You could try another browser 91 | or use the simpler upload form. 92 | 93 |

94 |
95 | -------------------------------------------------------------------------------- /users/templates/users/public_data.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 |
5 |
6 |

Public visualizations

7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | {% for public_user in public_users %} 18 | 19 | 20 | 21 | 22 | 23 | 24 | {% endfor %} 25 | 26 |
UserGraphs
{{ public_user.oh_id|upper }}General 📈Interactions 🤝Locations 🗺
27 | 28 | 41 |
42 |
43 |

44 | These users have made their visualizations publicly accessible (Thank you for that! 🎉). Click through them and see how 45 | different users are using Twitter very differently from each other! 🐦 Interested enough that you want 46 | to give it a try as well? Get started now. 47 |

48 |
49 |
50 | 51 | {%endblock%} 52 | -------------------------------------------------------------------------------- /users/templates/users/upload_old.html: -------------------------------------------------------------------------------- 1 | {% extends 'tweet_display/application.html' %} 2 | 3 | {% block content %} 4 | 5 |
6 |
7 |

Upload Twitter archive file

8 | 9 |
10 | {% if form.errors %} 11 |
12 | Errors
13 | {{ form.errors }} 14 |
15 | {% endif %} 16 | {% csrf_token %} 17 |

18 | (complete zip-file as downloaded)
19 | 20 |

21 | 22 |
23 | 24 |
25 | 26 |
27 |

28 | The analysis after the upload can take a while. We will message you through 29 | the Open Humans system once your graphs are ready. (Please check your 30 | SPAM folder too). 31 |

32 |

33 | A word of warning: This uploader does not work well for large Twitter archives. 34 | If an archive takes longer than 30 seconds to be uploaded the website 35 | will time out. In these cases the use of the JavaScript uploader is preferred 36 | as it does not have this limitation. 37 |

38 | 39 |
40 | 41 | {% endblock %} 42 | -------------------------------------------------------------------------------- /users/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gedankenstuecke/twitter-analyser/91e3172bed6f786b34c237548ab81c347fd5b146/users/tests/__init__.py -------------------------------------------------------------------------------- /users/tests/tests_views.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase, Client 2 | from django.conf import settings 3 | from users.models import OpenHumansMember 4 | import requests_mock 5 | from unittest.mock import mock_open, patch 6 | from urllib.error import HTTPError 7 | from users.views import upload_file_to_oh 8 | 9 | OH_BASE_URL = settings.OH_BASE_URL 10 | OH_API_BASE = OH_BASE_URL + '/api/direct-sharing' 11 | OH_DIRECT_UPLOAD = OH_API_BASE + '/project/files/upload/direct/' 12 | OH_DIRECT_UPLOAD_COMPLETE = OH_API_BASE + '/project/files/upload/complete/' 13 | 14 | 15 | class IndexTestCase(TestCase): 16 | """ 17 | Test cases for the index view function. 18 | """ 19 | 20 | def setUp(self): 21 | """ 22 | Set up the app for following test. 23 | """ 24 | settings.DEBUG = True 25 | settings.OH_CLIENT_ID = 'foo' 26 | settings.OH_ACTIVITY_PAGE = 'foobar.com' 27 | settings.OH_REDIRECT_URI = 'foo.com' 28 | self.oh_member = OpenHumansMember.create(oh_id='1234567890abcdef', 29 | access_token='foo', 30 | refresh_token='bar', 31 | expires_in=2000) 32 | self.oh_member.save() 33 | self.user = self.oh_member.user 34 | self.user.set_password('foobar') 35 | self.user.save() 36 | 37 | def test_index(self): 38 | """ 39 | Tests the index view function. 40 | """ 41 | c = Client() 42 | response = c.get('/users/') 43 | self.assertEqual(response.status_code, 200) 44 | self.assertTemplateUsed(response, 'users/index.html') 45 | self.assertEqual(response.context['client_id'], 'foo') 46 | self.assertEqual(response.context['oh_proj_page'], 'foobar.com') 47 | self.assertEqual(response.context['redirect_uri'], 'foo.com') 48 | 49 | def test_index_when_authenticated(self): 50 | """ 51 | Tests the index view function when authenticated. 52 | """ 53 | c = Client() 54 | c.login(username=self.user.username, password='foobar') 55 | response = c.get('/users/') 56 | self.assertRedirects(response, '/users/dashboard/', 57 | status_code=302, target_status_code=200) 58 | 59 | 60 | class DeleteTestCase(TestCase): 61 | """ 62 | Test cases for the delete_account view function. 63 | """ 64 | 65 | def setUp(self): 66 | """ 67 | Set up the app for following test. 68 | """ 69 | settings.DEBUG = True 70 | self.oh_member = OpenHumansMember.create(oh_id='1234567890abcdef', 71 | access_token='foo', 72 | refresh_token='bar', 73 | expires_in=2000) 74 | self.oh_member.save() 75 | self.user = self.oh_member.user 76 | self.user.set_password('foobar') 77 | self.user.save() 78 | 79 | def test_delete(self): 80 | """ 81 | Tests the delete_account view function. 82 | """ 83 | c = Client() 84 | response = c.get('/users/delete/') 85 | self.assertRedirects(response, '/', 86 | status_code=302, target_status_code=302) 87 | 88 | def test_delete_when_authenticated(self): 89 | """ 90 | Tests the delete_account view function when authenticated. 91 | """ 92 | c = Client() 93 | c.login(username=self.user.username, password='foobar') 94 | response = c.get('/users/delete/') 95 | self.assertRedirects(response, '/', 96 | status_code=302, target_status_code=302) 97 | self.assertEqual( 98 | c.login(username=self.user.username, password='foobar'), 99 | False) 100 | 101 | 102 | class DashboardTestCase(TestCase): 103 | """ 104 | Test cases for the dashboard view function. 105 | """ 106 | 107 | def setUp(self): 108 | """ 109 | Set up the app for following test. 110 | """ 111 | settings.DEBUG = True 112 | settings.OH_ACTIVITY_PAGE = 'foobar.com' 113 | settings.OH_CLIENT_ID = 'foo' 114 | self.oh_member = OpenHumansMember.create(oh_id='1234567890abcdef', 115 | access_token='foo', 116 | refresh_token='bar', 117 | expires_in=2000) 118 | self.oh_member.save() 119 | self.user = self.oh_member.user 120 | self.user.set_password('foobar') 121 | self.user.save() 122 | 123 | def test_dashboard(self): 124 | """ 125 | Tests the dashboard view function 126 | """ 127 | c = Client() 128 | response = c.get('/users/dashboard/') 129 | self.assertRedirects(response, '/', 130 | status_code=302, target_status_code=302) 131 | 132 | def test_dashboard_when_authenticated(self): 133 | """ 134 | Tests the dashboard view function when authenticated 135 | """ 136 | c = Client() 137 | c.login(username=self.user.username, password='foobar') 138 | response = c.get('/users/dashboard/') 139 | self.assertTemplateUsed(response, 'users/dashboard.html') 140 | self.assertEqual(response.status_code, 200) 141 | self.assertEqual(response.context['section'], 'home') 142 | self.assertEqual(response.context['oh_proj_page'], 'foobar.com') 143 | self.assertEqual(response.context['oh_member'], self.oh_member) 144 | self.assertEqual(response.context['has_data'], False) 145 | self.assertEqual(response.context['client_id'], 'foo') 146 | 147 | 148 | class AccessSwitchTestCase(TestCase): 149 | """ 150 | Test cases for the access_switch view function. 151 | """ 152 | 153 | def setUp(self): 154 | """ 155 | Set up the app for following test. 156 | """ 157 | settings.DEBUG = True 158 | self.oh_member = OpenHumansMember.create(oh_id='1234567890abcdef', 159 | access_token='foo', 160 | refresh_token='bar', 161 | expires_in=2000,) 162 | self.oh_member.save() 163 | self.user = self.oh_member.user 164 | self.user.set_password('foobar') 165 | self.user.save() 166 | 167 | def test_access_switch_unauthenticated(self): 168 | """ 169 | Tests the access_switch view function 170 | when unauthenticated. 171 | """ 172 | c = Client() 173 | response = c.get('/users/access_switch/') 174 | self.assertRedirects(response, '/users/dashboard/', 175 | status_code=302, target_status_code=302) 176 | 177 | def test_access_switch(self): 178 | """ 179 | Tests the access_switch view function. 180 | """ 181 | c = Client() 182 | c.login(username=self.user.username, password='foobar') 183 | retrieved = OpenHumansMember.objects.get( 184 | oh_id=self.user.openhumansmember.oh_id) 185 | self.assertEqual(retrieved.public, False) 186 | c.get('/users/access_switch/') 187 | retrieved = OpenHumansMember.objects.get( 188 | oh_id=self.user.openhumansmember.oh_id) 189 | self.assertEqual(retrieved.public, True) 190 | 191 | 192 | class UploadTestCase(TestCase): 193 | """ 194 | Tests for upload_file_to_oh and upload_old. 195 | """ 196 | 197 | def setUp(self): 198 | """ 199 | Set up the app for following tests 200 | """ 201 | settings.DEBUG = True 202 | self.oh_member = OpenHumansMember.create(oh_id='1234567890abcdef', 203 | access_token='foo', 204 | refresh_token='bar', 205 | expires_in=2000) 206 | self.oh_member.save() 207 | self.user = self.oh_member.user 208 | self.user.set_password('foobar') 209 | self.user.save() 210 | 211 | def test_upload_old(self): 212 | """ 213 | Tests the upload_old function. 214 | """ 215 | c = Client() 216 | response = c.get('/users/upload_simple/') 217 | self.assertRedirects(response, '/users/dashboard/', 218 | status_code=302, target_status_code=302) 219 | 220 | def test_upload_old_authenticated(self): 221 | """ 222 | Tests the upload_old function when authenticated. 223 | """ 224 | c = Client() 225 | c.login(username=self.user.username, password='foobar') 226 | response = c.get('/users/upload_simple/') 227 | self.assertTemplateUsed(response, 'users/upload_old.html') 228 | 229 | def test_upload_function(self): 230 | """ 231 | Tests upload feature 232 | """ 233 | with requests_mock.Mocker() as m: 234 | # API-upload-URL 235 | upload_url = '{}?access_token={}'.format( 236 | OH_DIRECT_UPLOAD, self.oh_member.access_token) 237 | # mock delete-API call 238 | m.register_uri('POST', 239 | OH_API_BASE + "/project/files/delete/", 240 | status_code=200) 241 | # mock request 1 to initiate upload, get AWS link 242 | m.register_uri('POST', 243 | upload_url, 244 | json={'url': 245 | 'http://example.com/upload', 246 | 'id': 1234}, 247 | status_code=201) 248 | # mock AWS link 249 | m.register_uri('PUT', 250 | 'http://example.com/upload', 251 | status_code=200) 252 | # mock completed link 253 | m.register_uri('POST', 254 | OH_DIRECT_UPLOAD_COMPLETE, 255 | status_code=200) 256 | with patch('builtins.open', 257 | mock_open(read_data='foobar'), 258 | create=True): 259 | fake_file = open('foo') 260 | upload_file_to_oh(self.oh_member, 261 | fake_file, 262 | {'tags': '["foo"]'}) 263 | 264 | def test_upload_function_first_fail(self): 265 | """ 266 | Tests upload feature. 267 | """ 268 | with requests_mock.Mocker() as m: 269 | # API-upload-URL 270 | upload_url = '{}?access_token={}'.format( 271 | OH_DIRECT_UPLOAD, self.oh_member.access_token) 272 | # mock delete-API call 273 | m.register_uri('POST', 274 | OH_API_BASE + "/project/files/delete/", 275 | status_code=200) 276 | # mock request 1 to initiate upload, get AWS link 277 | m.register_uri('POST', 278 | upload_url, 279 | json={'url': 280 | 'http://example.com/upload', 281 | 'id': 1234}, 282 | status_code=404) 283 | # mock AWS link 284 | m.register_uri('PUT', 285 | 'http://example.com/upload', 286 | status_code=200) 287 | # mock completed link 288 | m.register_uri('POST', 289 | OH_DIRECT_UPLOAD_COMPLETE, 290 | status_code=200) 291 | with patch('builtins.open', 292 | mock_open(read_data='foobar'), 293 | create=True): 294 | fake_file = open('foo') 295 | self.assertRaises(HTTPError, upload_file_to_oh, 296 | self.oh_member, fake_file, 297 | {'tags': '["foo"]'}) 298 | 299 | def test_upload_function_second_fail(self): 300 | """ 301 | Tests upload feature 302 | """ 303 | with requests_mock.Mocker() as m: 304 | # API-upload-URL 305 | upload_url = '{}?access_token={}'.format( 306 | OH_DIRECT_UPLOAD, self.oh_member.access_token) 307 | # mock delete-API call 308 | m.register_uri('POST', 309 | OH_API_BASE + "/project/files/delete/", 310 | status_code=200) 311 | # mock request 1 to initiate upload, get AWS link 312 | m.register_uri('POST', 313 | upload_url, 314 | json={'url': 315 | 'http://example.com/upload', 316 | 'id': 1234}, 317 | status_code=201) 318 | # mock AWS link 319 | m.register_uri('PUT', 320 | 'http://example.com/upload', 321 | status_code=404) 322 | # mock completed link 323 | m.register_uri('POST', 324 | OH_DIRECT_UPLOAD_COMPLETE, 325 | status_code=200) 326 | with patch('builtins.open', 327 | mock_open(read_data='foobar'), 328 | create=True): 329 | fake_file = open('foo') 330 | self.assertRaises(HTTPError, upload_file_to_oh, 331 | self.oh_member, fake_file, 332 | {'tags': '["foo"]'}) 333 | 334 | def test_upload_function_third_fail(self): 335 | """ 336 | Tests upload feature 337 | """ 338 | with requests_mock.Mocker() as m: 339 | # API-upload-URL 340 | upload_url = '{}?access_token={}'.format( 341 | OH_DIRECT_UPLOAD, self.oh_member.access_token) 342 | # mock delete-API call 343 | m.register_uri('POST', 344 | OH_API_BASE + "/project/files/delete/", 345 | status_code=200) 346 | # mock request 1 to initiate upload, get AWS link 347 | m.register_uri('POST', 348 | upload_url, 349 | json={'url': 350 | 'http://example.com/upload', 351 | 'id': 1234}, 352 | status_code=201) 353 | # mock AWS link 354 | m.register_uri('PUT', 355 | 'http://example.com/upload', 356 | status_code=200) 357 | # mock completed link 358 | m.register_uri('POST', 359 | OH_DIRECT_UPLOAD_COMPLETE, 360 | status_code=404) 361 | with patch('builtins.open', 362 | mock_open(read_data='foobar'), 363 | create=True): 364 | fake_file = open('foo') 365 | self.assertRaises(HTTPError, upload_file_to_oh, 366 | self.oh_member, fake_file, 367 | {'tags': '["foo"]'}) 368 | 369 | 370 | class CompleteTestCase(TestCase): 371 | """ 372 | Tests for complete function. 373 | """ 374 | 375 | def setUp(self): 376 | """ 377 | Set up the app for following tests 378 | """ 379 | settings.DEBUG = True 380 | self.oh_member = OpenHumansMember.create(oh_id='1234567890abcdef', 381 | access_token='foo', 382 | refresh_token='bar', 383 | expires_in=2000) 384 | self.oh_member.save() 385 | self.user = self.oh_member.user 386 | self.user.set_password('foobar') 387 | self.user.save() 388 | 389 | def test_complete_unauthenticated(self): 390 | """ 391 | Tests making a get request to complete 392 | when not authenticated. 393 | """ 394 | with self.assertLogs(logger='users.views', level='DEBUG') as log: 395 | c = Client() 396 | response = c.get("/users/complete/", {'code': 'mytestcode'}) 397 | self.assertIn( 398 | "Invalid code exchange. User returned to start page.", 399 | log.output[len(log.output)-1]) 400 | self.assertEqual(response.status_code, 302) 401 | self.assertRedirects(response, '/', 402 | status_code=302, target_status_code=302) 403 | 404 | 405 | class RegenerategraphsTestCase(TestCase): 406 | """ 407 | Tests regenerate_graphs. 408 | """ 409 | 410 | def setUp(self): 411 | """ 412 | Set up the app for following tests 413 | """ 414 | settings.DEBUG = True 415 | self.oh_member = OpenHumansMember.create(oh_id='1234567890abcdef', 416 | access_token='foo', 417 | refresh_token='bar', 418 | expires_in=2000) 419 | self.oh_member.save() 420 | self.user = self.oh_member.user 421 | self.user.set_password('foobar') 422 | self.user.save() 423 | 424 | def test_regenerate_graphs_get_unauthenticated(self): 425 | """ 426 | Test making a get request to regenerate_graphs 427 | when not authenticated. 428 | """ 429 | c = Client() 430 | response = c.get('/users/regenerate/') 431 | self.assertRedirects(response, '/users/dashboard/', 432 | status_code=302, target_status_code=302) 433 | 434 | def test_regenerate_graphs_get_authenticated(self): 435 | """ 436 | Test making a get request to regenerate_graphs 437 | when authenticated. 438 | """ 439 | c = Client() 440 | c.login(username=self.user.username, password='foobar') 441 | response = c.get('/users/regenerate/') 442 | self.assertRedirects(response, '/users/dashboard/', 443 | status_code=302, target_status_code=200) 444 | -------------------------------------------------------------------------------- /users/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | 3 | from . import views 4 | 5 | urlpatterns = [ 6 | url(r'^$', views.index, name='user_home'), 7 | url(r'^dashboard/$', views.dashboard, name='dashboard'), 8 | url(r'complete/?$', views.complete, name='complete'), 9 | url(r'delete/?$', views.delete_account, name='user.delete'), 10 | url(r'access_switch/?$', views.access_switch, name='user.access'), 11 | url(r'regenerate/?$', views.regenerate_graphs, name='regenerate'), 12 | url(r'public_data/?$', views.public_data, name='public_data'), 13 | url(r'^upload_simple/?$', views.upload_old, name='upload_old'), 14 | ] 15 | -------------------------------------------------------------------------------- /users/views.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | try: 5 | from urllib2 import HTTPError 6 | except ImportError: 7 | from urllib.error import HTTPError 8 | 9 | from django.conf import settings 10 | from django.contrib.auth import login 11 | from django.shortcuts import redirect, render 12 | from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger 13 | 14 | import requests 15 | 16 | from tweet_display.helper import get_file_url 17 | from tweet_display.tasks import import_data 18 | 19 | from .models import OpenHumansMember 20 | from .forms import UploadFileForm 21 | 22 | # Open Humans settings 23 | OH_BASE_URL = settings.OH_BASE_URL 24 | OH_API_BASE = OH_BASE_URL + '/api/direct-sharing' 25 | OH_DELETE_FILES = OH_API_BASE + '/project/files/delete/' 26 | OH_DIRECT_UPLOAD = OH_API_BASE + '/project/files/upload/direct/' 27 | OH_DIRECT_UPLOAD_COMPLETE = OH_API_BASE + '/project/files/upload/complete/' 28 | 29 | APP_BASE_URL = os.getenv('APP_BASE_URL', 'http://127.0.0.1:5000/users') 30 | APP_PROJ_PAGE = 'https://www.openhumans.org/activity/twitter-archive-analyzer/' 31 | 32 | # Set up logging. 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | def oh_get_member_data(token): 37 | """ 38 | Exchange OAuth2 token for member data. 39 | """ 40 | req = requests.get( 41 | '{}/api/direct-sharing/project/exchange-member/'.format(OH_BASE_URL), 42 | params={'access_token': token}) 43 | if req.status_code == 200: 44 | return req.json() 45 | raise Exception('Status code {}'.format(req.status_code)) 46 | return None 47 | 48 | 49 | def oh_code_to_member(code): 50 | """ 51 | Exchange code for token, use this to create and return OpenHumansMember. 52 | If a matching OpenHumansMember already exists in db, update and return it. 53 | """ 54 | if settings.OH_CLIENT_SECRET and settings.OH_CLIENT_ID and code: 55 | data = { 56 | 'grant_type': 'authorization_code', 57 | 'redirect_uri': '{}/complete'.format(APP_BASE_URL), 58 | 'code': code, 59 | } 60 | req = requests.post( 61 | '{}/oauth2/token/'.format(OH_BASE_URL), 62 | data=data, 63 | auth=requests.auth.HTTPBasicAuth( 64 | settings.OH_CLIENT_ID, 65 | settings.OH_CLIENT_SECRET 66 | )) 67 | data = req.json() 68 | if 'access_token' in data: 69 | oh_id = oh_get_member_data( 70 | data['access_token'])['project_member_id'] 71 | try: 72 | oh_member = OpenHumansMember.objects.get(oh_id=oh_id) 73 | logger.debug('Member {} re-authorized.'.format(oh_id)) 74 | oh_member.access_token = data['access_token'] 75 | oh_member.refresh_token = data['refresh_token'] 76 | oh_member.token_expires = OpenHumansMember.get_expiration( 77 | data['expires_in']) 78 | except OpenHumansMember.DoesNotExist: 79 | oh_member = OpenHumansMember.create( 80 | oh_id=oh_id, 81 | access_token=data['access_token'], 82 | refresh_token=data['refresh_token'], 83 | expires_in=data['expires_in']) 84 | logger.debug('Member {} created.'.format(oh_id)) 85 | oh_member.save() 86 | 87 | return oh_member 88 | elif 'error' in req.json(): 89 | logger.debug('Error in token exchange: {}'.format(req.json())) 90 | else: 91 | logger.warning('Neither token nor error info in OH response!') 92 | else: 93 | logger.error('OH_CLIENT_SECRET or code are unavailable') 94 | return None 95 | 96 | 97 | def delete_all_oh_files(oh_member): 98 | """ 99 | Delete all current project files in Open Humans for this project member. 100 | """ 101 | requests.post( 102 | OH_DELETE_FILES, 103 | params={'access_token': oh_member.get_access_token()}, 104 | data={'project_member_id': oh_member.oh_id, 105 | 'all_files': True}) 106 | 107 | 108 | def upload_file_to_oh(oh_member, filehandle, metadata): 109 | """ 110 | This demonstrates using the Open Humans "large file" upload process. 111 | The small file upload process is simpler, but it can time out. This 112 | alternate approach is required for large files, and still appropriate 113 | for small files. 114 | This process is "direct to S3" using three steps: 1. get S3 target URL from 115 | Open Humans, 2. Perform the upload, 3. Notify Open Humans when complete. 116 | """ 117 | # Remove any previous file - replace with this one. 118 | delete_all_oh_files(oh_member) 119 | 120 | # Get the S3 target from Open Humans. 121 | upload_url = '{}?access_token={}'.format( 122 | OH_DIRECT_UPLOAD, oh_member.get_access_token()) 123 | req1 = requests.post( 124 | upload_url, 125 | data={'project_member_id': oh_member.oh_id, 126 | 'filename': filehandle.name, 127 | 'metadata': json.dumps(metadata)}) 128 | if req1.status_code != 201: 129 | raise raise_http_error(upload_url, req1, 130 | 'Bad response when starting file upload.') 131 | 132 | # Upload to S3 target. 133 | req2 = requests.put(url=req1.json()['url'], data=filehandle) 134 | if req2.status_code != 200: 135 | raise raise_http_error(req1.json()['url'], req2, 136 | 'Bad response when uploading to target.') 137 | 138 | # Report completed upload to Open Humans. 139 | complete_url = ('{}?access_token={}'.format( 140 | OH_DIRECT_UPLOAD_COMPLETE, oh_member.get_access_token())) 141 | req3 = requests.post( 142 | complete_url, 143 | data={'project_member_id': oh_member.oh_id, 144 | 'file_id': req1.json()['id']}) 145 | if req3.status_code != 200: 146 | raise raise_http_error(complete_url, req2, 147 | 'Bad response when completing upload.') 148 | 149 | # print('Upload done: "{}" for member {}.'.format( 150 | # os.path.basename(filehandle.name), oh_member.oh_id)) 151 | 152 | 153 | def raise_http_error(url, response, message): 154 | raise HTTPError(url, response.status_code, message, hdrs=None, fp=None) 155 | 156 | 157 | def index(request): 158 | """ 159 | Starting page for app. 160 | """ 161 | context = {'client_id': settings.OH_CLIENT_ID, 162 | 'oh_proj_page': settings.OH_ACTIVITY_PAGE, 163 | 'inactive_project': settings.INACTIVE_PROJECT, 164 | 'redirect_uri': settings.OH_REDIRECT_URI} 165 | if request.user.is_authenticated: 166 | return redirect('dashboard') 167 | return render(request, 'users/index.html', context=context) 168 | 169 | 170 | def complete(request): 171 | """ 172 | Receive user from Open Humans. Store data, start data upload task. 173 | """ 174 | logger.debug("Received user returning from Open Humans.") 175 | 176 | form = None 177 | 178 | if request.method == 'GET': 179 | # Exchange code for token. 180 | # This creates an OpenHumansMember and associated User account. 181 | code = request.GET.get('code', '') 182 | oh_member = oh_code_to_member(code=code) 183 | if oh_member: 184 | # Log in the user. 185 | user = oh_member.user 186 | login(request, user, 187 | backend='django.contrib.auth.backends.ModelBackend') 188 | elif not request.user.is_authenticated: 189 | logger.debug('Invalid code exchange. User returned to start page.') 190 | return redirect('/') 191 | else: 192 | oh_member = request.user.openhumansmember 193 | 194 | if get_file_url(oh_member.oh_id) is not None: 195 | return redirect('dashboard') 196 | 197 | form = UploadFileForm() 198 | context = {'oh_id': oh_member.oh_id, 199 | 'oh_member': oh_member, 200 | 'oh_proj_page': settings.OH_ACTIVITY_PAGE, 201 | 'form': form} 202 | return render(request, 'users/complete.html', 203 | context=context) 204 | 205 | elif request.method == 'POST': 206 | form = UploadFileForm(request.POST, request.FILES) 207 | if form.is_valid(): 208 | metadata = {'tags': ['twitter', 'twitter-archive'], 209 | 'description': 'Twitter achive file.'} 210 | upload_file_to_oh( 211 | request.user.openhumansmember, 212 | request.FILES['file'], 213 | metadata) 214 | else: 215 | logger.debug('INVALID FORM') 216 | import_data.delay(request.user.openhumansmember.oh_id) 217 | return redirect('dashboard') 218 | 219 | 220 | def public_data(request): 221 | public_user_list = OpenHumansMember.objects.filter( 222 | public=True).order_by( 223 | 'oh_id') 224 | paginator = Paginator(public_user_list, 20) # Show 20 contacts per page 225 | page = request.GET.get('page') 226 | try: 227 | public_users = paginator.page(page) 228 | except PageNotAnInteger: 229 | # If page is not an integer, deliver first page. 230 | public_users = paginator.page(1) 231 | except EmptyPage: 232 | # If page is out of range (e.g. 9999), deliver last page of results. 233 | public_users = paginator.page(paginator.num_pages) 234 | return render(request, 'users/public_data.html', 235 | {'public_users': public_users, 236 | 'section': 'public_data'}) 237 | 238 | 239 | def dashboard(request): 240 | """ 241 | Give options to delete account, make data public/private, 242 | reupload archive, trigger new parsing of archive. 243 | """ 244 | if request.user.is_authenticated: 245 | oh_member = request.user.openhumansmember 246 | has_data = bool(get_file_url(oh_member.oh_id)) 247 | context = {'client_id': settings.OH_CLIENT_ID, 248 | 'oh_proj_page': settings.OH_ACTIVITY_PAGE, 249 | 'oh_member': oh_member, 250 | 'has_data': has_data, 251 | 'section': 'home'} 252 | 253 | return render(request, 'users/dashboard.html', context=context) 254 | return redirect("/") 255 | 256 | 257 | def delete_account(request): 258 | if request.user.is_authenticated: 259 | oh_member = request.user.openhumansmember 260 | oh_member.delete() 261 | request.user.delete() 262 | return redirect("/") 263 | 264 | 265 | def access_switch(request): 266 | if request.user.is_authenticated: 267 | oh_member = request.user.openhumansmember 268 | if oh_member.public: 269 | oh_member.public = False 270 | else: 271 | oh_member.public = True 272 | oh_member.save() 273 | return redirect('dashboard') 274 | 275 | 276 | def regenerate_graphs(request): 277 | if request.method == 'POST' and request.user.is_authenticated: 278 | import_data.delay(request.user.openhumansmember.oh_id) 279 | return redirect('dashboard') 280 | 281 | 282 | def upload_old(request): 283 | if request.user.is_authenticated: 284 | return render(request, 'users/upload_old.html') 285 | return redirect('dashboard') 286 | --------------------------------------------------------------------------------