├── twitter_stream ├── __init__.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── stream_from_file.py │ │ └── stream.py ├── migrations │ ├── __init__.py │ ├── 0004_auto__del_field_tweet_analyzed_by.py │ ├── 0006_auto__chg_field_tweet_id.py │ ├── 0002_auto__add_index_tweet_analyzed_by__add_index_tweet_created_at.py │ ├── 0003_auto__add_field_streamprocess_memory_usage.py │ ├── 0005_auto__del_field_apikey_name__add_field_apikey_user_name__add_field_api.py │ └── 0001_initial.py ├── tests │ ├── __init__.py │ ├── test_stream_process.py │ └── test_tweet.py ├── utils │ ├── __init__.py │ ├── file_stream.py │ └── streaming.py ├── admin.py ├── urls.py ├── static │ └── twitter_stream │ │ ├── status.css │ │ └── status.js ├── settings.py ├── templates │ └── twitter_stream │ │ ├── status.html │ │ └── status_display.html ├── fields.py ├── views.py └── models.py ├── manage.py ├── .gitignore ├── test_settings.py ├── LICENSE ├── setup.py └── README.md /twitter_stream/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /twitter_stream/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /twitter_stream/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /twitter_stream/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /twitter_stream/tests/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_tweet import * 2 | from .test_stream_process import * 3 | -------------------------------------------------------------------------------- /twitter_stream/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .file_stream import FakeTwitterStream, FakeTermChecker 2 | from .streaming import FeelsTermChecker, QueueStreamListener 3 | -------------------------------------------------------------------------------- /twitter_stream/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from . import models 4 | 5 | admin.site.register(models.FilterTerm) 6 | admin.site.register(models.ApiKey) 7 | -------------------------------------------------------------------------------- /twitter_stream/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import patterns, url 2 | 3 | urlpatterns = patterns('twitter_stream.views', 4 | url(r'^$', 'status', name='status'), 5 | url(r'^update/', 'json_status', name='update'), 6 | ) 7 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "test_settings") 7 | 8 | from django.core.management import execute_from_command_line 9 | 10 | execute_from_command_line(sys.argv) 11 | -------------------------------------------------------------------------------- /twitter_stream/tests/test_stream_process.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | from twitter_stream import settings 3 | from twitter_stream.models import StreamProcess 4 | 5 | class StreamProcessTest(TestCase): 6 | 7 | def test_get_memory_usage(self): 8 | import os 9 | 10 | process = StreamProcess() 11 | usage = process.get_memory_usage() 12 | if os.name == 'nt': 13 | self.assertEqual(usage, "Unknown") 14 | else: 15 | self.assertRegexpMatches(usage, r"\d+.\d+ MB") 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.pot 3 | *.pyc 4 | 5 | *.py[cod] 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Packages 11 | *.egg 12 | *.egg-info 13 | dist 14 | build 15 | eggs 16 | parts 17 | bin 18 | var 19 | sdist 20 | develop-eggs 21 | .installed.cfg 22 | lib 23 | lib64 24 | __pycache__ 25 | 26 | # Installer logs 27 | pip-log.txt 28 | 29 | # Unit test / coverage reports 30 | .coverage 31 | .tox 32 | nosetests.xml 33 | coverage.xml 34 | pep8.txt 35 | 36 | # Translations 37 | *.mo 38 | 39 | # Mr Developer 40 | .mr.developer.cfg 41 | .project 42 | .pydevproject 43 | 44 | /venv 45 | /.idea 46 | /*.db -------------------------------------------------------------------------------- /test_settings.py: -------------------------------------------------------------------------------- 1 | from os.path import abspath, dirname, join, normpath 2 | 3 | # Absolute filesystem path to the Django project directory: 4 | DJANGO_ROOT = dirname(dirname(abspath(__file__))) 5 | 6 | 7 | SECRET_KEY = 'secret' 8 | 9 | DATABASES = { 10 | 'default': { 11 | 'ENGINE': 'django.db.backends.sqlite3', 12 | 'NAME': 'test_database.db', 13 | } 14 | } 15 | 16 | STATICFILES_FINDERS = ( 17 | 'django.contrib.staticfiles.finders.FileSystemFinder', 18 | 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 19 | ) 20 | 21 | TEMPLATE_LOADERS = ( 22 | 'django.template.loaders.filesystem.Loader', 23 | 'django.template.loaders.app_directories.Loader', 24 | ) 25 | 26 | TEMPLATE_DIRS = ( 27 | normpath(join(DJANGO_ROOT, 'templates')), 28 | ) 29 | 30 | INSTALLED_APPS = ( 31 | 'django.contrib.humanize', 32 | 'django.contrib.staticfiles', 33 | 'bootstrap3', 34 | 'twitter_stream', 35 | 'south', 36 | ) -------------------------------------------------------------------------------- /twitter_stream/static/twitter_stream/status.css: -------------------------------------------------------------------------------- 1 | svg { 2 | font-size: 12px; 3 | } 4 | .axis path, 5 | .axis line { 6 | fill: none; 7 | stroke: #888; 8 | shape-rendering: crispEdges; 9 | } 10 | 11 | .axis text { 12 | fill: #888; 13 | } 14 | 15 | .x.axis path { 16 | display: none; 17 | } 18 | 19 | .bars rect { 20 | fill: steelblue; 21 | stroke:none; 22 | shape-rendering: crispEdges; 23 | } 24 | 25 | .bars text { 26 | text-anchor: middle; 27 | fill: #fff; 28 | } 29 | 30 | .bars .filling rect { 31 | fill: #bcd5e8; 32 | } 33 | .bars .filling text { 34 | fill: #333; 35 | } 36 | 37 | .content { 38 | position: relative; 39 | } 40 | 41 | h4 { 42 | margin-bottom: 0; 43 | margin-top: 15px; 44 | } 45 | 46 | .status-label { 47 | position: absolute; 48 | top: 0; 49 | left: 260px; 50 | margin: 20px 0 10px 0; 51 | color: #dd0000; 52 | } 53 | 54 | .status-label.running { 55 | color: #009800; 56 | } 57 | 58 | .status-label span { 59 | font-size: 36px; 60 | } -------------------------------------------------------------------------------- /twitter_stream/settings.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | 3 | DEBUG = getattr(settings, 'DEBUG', False) 4 | USE_TZ = getattr(settings, 'USE_TZ', True) 5 | 6 | _stream_settings = getattr(settings, 'TWITTER_STREAM_SETTINGS', {}) 7 | 8 | # If true, the embedded retweeted_status tweets will be captured 9 | CAPTURE_EMBEDDED = _stream_settings.get('CAPTURE_EMBEDDED', False) 10 | 11 | # The number of seconds in between checks for filter term changes and tweet inserts 12 | POLL_INTERVAL = _stream_settings.get('POLL_INTERVAL', 10) 13 | 14 | # The default keys to use for streaming 15 | DEFAULT_KEYS_NAME = _stream_settings.get('DEFAULT_KEYS_NAME', None) 16 | 17 | # Put the stream in a loop to prevent random termination 18 | PREVENT_EXIT = _stream_settings.get('PREVENT_EXIT', False) 19 | 20 | # Record stats like memory usage in the database 21 | MONITOR_PERFORMANCE = _stream_settings.get('MONITOR_PERFORMANCE', True) 22 | 23 | # The number of tweets to insert into the database at once 24 | INSERT_BATCH_SIZE = _stream_settings.get('INSERT_BATCH_SIZE', 1000) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Michael Brooks 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | # Utility function to read the README file. 5 | # Used for the long_description. It's nice, because now 1) we have a top level 6 | # README file and 2) it's easier to type in the README file than to put a raw 7 | # string in below ... 8 | def read(fname): 9 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 10 | 11 | 12 | setup( 13 | name='django-twitter-stream', 14 | version='0.1.3', 15 | packages=['twitter_stream'], 16 | url='http://github.com/michaelbrooks/django-twitter-stream', 17 | license='MIT', 18 | author='Michael Brooks', 19 | author_email='mjbrooks@uw.edu', 20 | description='A Django app for streaming tweets from the Twitter API into a database.', 21 | long_description=read('README.md'), 22 | classifiers=[ 23 | "Development Status :: 3 - Alpha", 24 | "Topic :: Utilities", 25 | "License :: OSI Approved :: MIT License", 26 | ], 27 | install_requires=[ 28 | "django >= 1.6", 29 | "twitter-monitor >= 0.3.0, < 0.4", 30 | "swapper >= 0.1.1, < 0.2", 31 | "django-jsonview >= 0.2, < 0.5", 32 | "django-bootstrap3 >= 4.3.0" 33 | ], 34 | test_suite="setuptest.setuptest.SetupTestSuite", 35 | tests_require=[ 36 | 'django-setuptest', 37 | ] 38 | ) 39 | -------------------------------------------------------------------------------- /twitter_stream/templates/twitter_stream/status.html: -------------------------------------------------------------------------------- 1 | {% load staticfiles bootstrap3 %} 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Twitter Stream Status 10 | 11 | {% bootstrap_css %} 12 | 13 | 14 | 15 | 19 | 20 | 21 |
22 | {% bootstrap_messages %} 23 | 24 |

Twitter Stream

25 | 26 |
27 |

Most recent 20 minutes of tweets

28 |
29 | 30 |
31 | {% include 'twitter_stream/status_display.html' %} 32 |
33 | 34 | 40 |
41 | 42 | 43 | {% bootstrap_javascript %} 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /twitter_stream/templates/twitter_stream/status_display.html: -------------------------------------------------------------------------------- 1 | {% load humanize %} 2 | 3 |
4 | {% if status.running %} 5 | 6 | {% else %} 7 | 8 | {% endif %} 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |
Tweets Stored:~{{ status.tweet_count }}
Earliest:{{ status.earliest }}
Latest:{{ status.latest }}
Average Rate:{{ status.avg_rate|floatformat }} tweets / second
31 |

32 | 33 |

Current filter terms: 34 | {% for term in status.terms %} 35 | {{ term }} 36 | {% endfor %} 37 |

38 | 39 | {% if status.processes %} 40 |

Recent Twitter streaming processes:

41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | {% for stream in status.processes %} 56 | 60 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | {% if stream.error_count > 0 %} 72 | 73 | {% else %} 74 | 75 | {% endif %} 76 | 77 | {% endfor %} 78 | 79 |
StatusProcessAPI KeyStartedLast HeartbeatTweet Rate (t/s)MemoryErrors
61 | 62 | {{ stream.get_status_display }} 63 | 64 | {{ stream.hostname }}:{{ stream.process_id }}{{ stream.keys }}{{ stream.created_at|naturaltime }}{{ stream.last_heartbeat|naturaltime }}{{ stream.tweet_rate|floatformat }}{{ stream.memory_usage }}{{ stream.error_count }}{{ stream.error_count }}
80 | {% else %} 81 |

No recent Twitter streaming processes.

82 | {% endif %} 83 | 84 | -------------------------------------------------------------------------------- /twitter_stream/fields.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django import forms 3 | from django.core import exceptions 4 | import math 5 | 6 | class PositiveBigIntegerField(models.BigIntegerField): 7 | description = "Positive Big integer" 8 | 9 | def formfield(self, **kwargs): 10 | defaults = {'min_value': 0, 11 | 'max_value': models.BigIntegerField.MAX_BIGINT * 2 - 1} 12 | defaults.update(kwargs) 13 | return super(PositiveBigIntegerField, self).formfield(**defaults) 14 | 15 | def db_type(self, connection): 16 | if 'mysql' in connection.__class__.__module__: 17 | return 'bigint UNSIGNED' 18 | return super(PositiveBigIntegerField, self).db_type(connection) 19 | 20 | 21 | class PositiveBigAutoField(models.AutoField): 22 | description = "Unsigned Big Integer" 23 | empty_strings_allowed = False 24 | MAX_BIGINT = 9223372036854775807 25 | 26 | def db_type(self, connection): 27 | if 'mysql' in connection.__class__.__module__: 28 | return 'bigint UNSIGNED AUTO_INCREMENT' 29 | 30 | return super(PositiveBigAutoField, self).db_type(connection) 31 | 32 | 33 | default_error_messages = { 34 | 'invalid': "'%(value)s' value must be an integer.", 35 | } 36 | 37 | def get_prep_value(self, value): 38 | if value is None: 39 | return None 40 | return int(value) 41 | 42 | def get_prep_lookup(self, lookup_type, value): 43 | if ((lookup_type == 'gte' or lookup_type == 'lt') 44 | and isinstance(value, float)): 45 | value = math.ceil(value) 46 | return super(PositiveBigAutoField, self).get_prep_lookup(lookup_type, value) 47 | 48 | def to_python(self, value): 49 | if value is None: 50 | return value 51 | try: 52 | return int(value) 53 | except (TypeError, ValueError): 54 | raise exceptions.ValidationError( 55 | self.error_messages['invalid'], 56 | code='invalid', 57 | params={'value': value}, 58 | ) 59 | 60 | def formfield(self, **kwargs): 61 | defaults = {'min_value': 0, 62 | 'max_value': PositiveBigAutoField.MAX_BIGINT * 2 - 1, 63 | 'form_class': forms.IntegerField } 64 | defaults.update(kwargs) 65 | return super(PositiveBigAutoField, self).formfield(**defaults) 66 | 67 | 68 | class PositiveBigAutoForeignKey(models.ForeignKey): 69 | """A special foriegn key field for positive big auto fields""" 70 | 71 | def db_type(self, connection): 72 | # The database column type of a ForeignKey is the column type 73 | # of the field to which it points. An exception is if the ForeignKey 74 | # points to an AutoField/PositiveIntegerField/PositiveSmallIntegerField, 75 | # in which case the column type is simply that of an IntegerField. 76 | # If the database needs similar types for key fields however, the only 77 | # thing we can do is making AutoField an IntegerField. 78 | rel_field = self.related_field 79 | if isinstance(rel_field, PositiveBigAutoField): 80 | return PositiveBigIntegerField().db_type(connection=connection) 81 | return rel_field.db_type(connection=connection) 82 | try: 83 | # If we are using south, we need some rules to use these fields 84 | from south.modelsinspector import add_introspection_rules 85 | add_introspection_rules([], ["^twitter_stream\.fields\.PositiveBigAutoField"]) 86 | add_introspection_rules([], ["^twitter_stream\.fields\.PositiveBigIntegerField"]) 87 | add_introspection_rules([], ["^twitter_stream\.fields\.PositiveBigAutoForeignKey"]) 88 | except ImportError: 89 | pass 90 | -------------------------------------------------------------------------------- /twitter_stream/views.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | import json 3 | from django.conf import settings 4 | from django.utils import timezone 5 | from django.template import RequestContext 6 | from django.template.loader import render_to_string 7 | from django.views import generic 8 | from django.contrib.admin.views.decorators import staff_member_required 9 | from jsonview.decorators import json_view 10 | from twitter_stream.models import FilterTerm, StreamProcess 11 | from swapper import load_model 12 | from django.db import models 13 | 14 | 15 | def _render_to_string_request(request, template, dictionary): 16 | """ 17 | Wrapper around render_to_string that includes the request context 18 | This is necessary to get all of the TEMPLATE_CONTEXT_PROCESSORS 19 | activated in the template. 20 | """ 21 | context = RequestContext(request, dictionary) 22 | return render_to_string(template, context_instance=context) 23 | 24 | 25 | def stream_status(): 26 | terms = FilterTerm.objects.filter(enabled=True) 27 | processes = StreamProcess.get_current_stream_processes() 28 | running = False 29 | for p in processes: 30 | if p.status == StreamProcess.STREAM_STATUS_RUNNING: 31 | running = True 32 | break 33 | 34 | Tweet = load_model("twitter_stream", "Tweet") 35 | tweet_count = Tweet.count_approx() 36 | earliest_time = Tweet.get_earliest_created_at() 37 | latest_time = Tweet.get_latest_created_at() 38 | 39 | avg_rate = None 40 | if earliest_time is not None and latest_time is not None: 41 | avg_rate = float(tweet_count) / (latest_time - earliest_time).total_seconds() 42 | 43 | # Get the tweets / minute over the past 10 minutes 44 | tweet_counts = [] 45 | if latest_time is not None: 46 | latest_time_minute = latest_time.replace(second=0, microsecond=0) 47 | 48 | if settings.DATABASES['default']['ENGINE'].endswith('mysql'): 49 | drop_seconds = "created_at - INTERVAL SECOND(created_at) SECOND" 50 | elif settings.DATABASES['default']['ENGINE'].endswith('postgresql_psycopg2'): 51 | drop_seconds = "date_trunc('minute', created_at)" 52 | else: 53 | drop_seconds = "created_at" 54 | 55 | tweet_counts = Tweet.objects.extra(select={ 56 | 'time': drop_seconds 57 | }) \ 58 | .filter(created_at__gt=latest_time_minute - timedelta(minutes=20)) \ 59 | .values('time') \ 60 | .order_by('time') \ 61 | .annotate(tweets=models.Count('id')) 62 | 63 | tweet_counts = list(tweet_counts) 64 | 65 | for row in tweet_counts: 66 | row['time'] = row['time'].isoformat() 67 | 68 | return { 69 | 'running': running, 70 | 'terms': [t.term for t in terms], 71 | 'processes': processes, 72 | 'tweet_count': tweet_count, 73 | 'earliest': earliest_time, 74 | 'latest': latest_time, 75 | 'avg_rate': avg_rate, 76 | 'timeline': tweet_counts 77 | } 78 | 79 | 80 | 81 | class StatusView(generic.TemplateView): 82 | template_name = 'twitter_stream/status.html' 83 | 84 | def get_context_data(self, **kwargs): 85 | status = stream_status() 86 | status['timeline'] = json.dumps(status['timeline']) 87 | return { 88 | 'status': status 89 | } 90 | 91 | status = staff_member_required(StatusView.as_view()) 92 | 93 | @staff_member_required 94 | @json_view 95 | def json_status(request, task=None): 96 | """ 97 | Returns a JSON representation of the status, with 98 | HTML conveniently included. 99 | """ 100 | 101 | status = stream_status() 102 | 103 | display = _render_to_string_request(request, 'twitter_stream/status_display.html', { 104 | 'status': status 105 | }) 106 | 107 | return { 108 | 'display': display, 109 | 'timeline': status['timeline'] 110 | } 111 | 112 | -------------------------------------------------------------------------------- /twitter_stream/management/commands/stream_from_file.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from optparse import make_option 3 | from logging.config import dictConfig 4 | 5 | import time 6 | 7 | from django.core.management.base import BaseCommand 8 | import signal 9 | 10 | from twitter_stream import models 11 | from twitter_stream import utils 12 | from twitter_stream import settings 13 | 14 | # Setup logging if not already configured 15 | logger = logging.getLogger(__name__) 16 | 17 | if not logger.handlers: 18 | dictConfig({ 19 | "version": 1, 20 | "disable_existing_loggers": False, 21 | "handlers": { 22 | "twitter_stream": { 23 | "level": "DEBUG", 24 | "class": "logging.StreamHandler", 25 | }, 26 | }, 27 | "twitter_stream": { 28 | "handlers": ["twitter_stream"], 29 | "level": "DEBUG" 30 | } 31 | }) 32 | 33 | 34 | class Command(BaseCommand): 35 | """ 36 | Streams tweets from an existing file. The file should 37 | be pretty-printed JSON dump from the streaming API. 38 | 39 | Example usage: 40 | python manage.py stream_from_file tweets.json 41 | python manage.py stream_from_file tweets.json --limit 100000 42 | python manage.py stream_from_file tweets.json --rate-limit 25 --poll-interval 25 43 | """ 44 | 45 | option_list = BaseCommand.option_list + ( 46 | make_option( 47 | '--poll-interval', 48 | action='store', 49 | dest='poll_interval', 50 | default=10, 51 | type=int, 52 | help='Seconds between tweet inserts.' 53 | ), 54 | make_option( 55 | '--rate-limit', 56 | action='store', 57 | dest='rate_limit', 58 | default=None, 59 | type=float, 60 | help='Rate to read in tweets.' 61 | ), 62 | make_option( 63 | '--limit', 64 | action='store', 65 | dest='limit', 66 | default=None, 67 | type=int, 68 | help='Limit the number of tweets read.' 69 | ) 70 | ) 71 | args = '' 72 | help = "Fakes a streaming connection to twitter by reading from a file." 73 | 74 | def handle(self, tweets_file=None, *args, **options): 75 | 76 | # The suggested time between hearbeats 77 | poll_interval = float(options.get('poll_interval', settings.POLL_INTERVAL)) 78 | rate_limit = options.get('rate_limit', 50) 79 | limit = options.get('limit', None) 80 | prevent_exit = options.get('prevent_exit', settings.PREVENT_EXIT) 81 | 82 | # First expire any old stream process records that have failed 83 | # to report in for a while 84 | timeout_seconds = 3 * poll_interval 85 | models.StreamProcess.expire_timed_out() 86 | 87 | stream_process = models.StreamProcess.create( 88 | timeout_seconds=timeout_seconds 89 | ) 90 | 91 | listener = utils.QueueStreamListener() 92 | checker = utils.FakeTermChecker(queue_listener=listener, 93 | stream_process=stream_process) 94 | 95 | 96 | def stop(signum, frame): 97 | """ 98 | Register stream's death and exit. 99 | """ 100 | logger.debug("Stopping because of signal") 101 | 102 | if stream_process: 103 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED 104 | stream_process.heartbeat() 105 | 106 | # Let the tweet listener know it should be quitting asap 107 | listener.set_terminate() 108 | 109 | raise SystemExit() 110 | 111 | # Installs signal handlers for handling SIGINT and SIGTERM 112 | # gracefully. 113 | signal.signal(signal.SIGINT, stop) 114 | signal.signal(signal.SIGTERM, stop) 115 | 116 | logger.info("Streaming from %s", tweets_file) 117 | if rate_limit: 118 | logger.info("Rate limit: %f", rate_limit) 119 | 120 | try: 121 | stream = utils.FakeTwitterStream(tweets_file, 122 | listener=listener, term_checker=checker, 123 | limit=limit, rate_limit=rate_limit) 124 | 125 | if prevent_exit: 126 | while checker.ok(): 127 | try: 128 | stream.start_polling(poll_interval) 129 | except Exception as e: 130 | checker.error(e) 131 | time.sleep(1) # to avoid craziness 132 | else: 133 | stream.start_polling(poll_interval) 134 | 135 | logger.error("Stopping because of excess errors") 136 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED 137 | stream_process.heartbeat() 138 | 139 | except Exception as e: 140 | logger.error(e, exc_info=True) 141 | 142 | finally: 143 | stop(None, None) 144 | -------------------------------------------------------------------------------- /twitter_stream/static/twitter_stream/status.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | var interval, 3 | update_chart; 4 | var config = window.twitter_stream_status_data; 5 | 6 | var UPDATE_INTERVAL = 15000; 7 | var CHART_HEIGHT = 250; 8 | 9 | function chart(target_element, config) { 10 | 11 | 12 | var margin = {top: 5, right: 20, bottom: 30, left: 50}, 13 | width = config.width - margin.left - margin.right, 14 | height = config.height - margin.top - margin.bottom; 15 | 16 | var x = d3.time.scale() 17 | .range([0, width]); 18 | 19 | var y = d3.scale.linear() 20 | .range([height, 0]) 21 | .domain([0, 1]); 22 | 23 | var xAxis = d3.svg.axis() 24 | .scale(x) 25 | .orient("bottom"); 26 | 27 | var yAxis = d3.svg.axis() 28 | .scale(y) 29 | .orient("left"); 30 | 31 | var svg = d3.select(target_element).append("svg") 32 | .attr("width", width + margin.left + margin.right) 33 | .attr("height", height + margin.top + margin.bottom) 34 | .append("g") 35 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 36 | 37 | var xAxisGroup = svg.append("g") 38 | .attr("class", "x axis") 39 | .attr("transform", "translate(0," + height + ")"); 40 | 41 | var yAxisGroup = svg.append("g") 42 | .attr("class", "y axis"); 43 | 44 | yAxisGroup.append("text") 45 | .attr("transform", "rotate(-90)") 46 | .attr("y", 6) 47 | .attr("dy", ".71em") 48 | .style("text-anchor", "end") 49 | .text("Tweets"); 50 | 51 | var barsGroup = svg.append("g") 52 | .attr('class', 'bars'); 53 | 54 | return function (data) { 55 | 56 | data.forEach(function (d) { 57 | d.time = new Date(d.time); 58 | }); 59 | 60 | var dateRange = d3.extent(data, function (d) { 61 | return d.time; 62 | }); 63 | 64 | x.domain(dateRange); 65 | 66 | y.domain([0, d3.max(data, function (d) { 67 | return d.tweets; 68 | })]); 69 | 70 | var leftMargin = 22; 71 | var minutesShown = (dateRange[1] - dateRange[0]) / 60000; 72 | var barWidth = Math.floor((width - leftMargin) / minutesShown); 73 | barWidth = Math.max(2, barWidth - (barWidth % 20)); 74 | 75 | x.range([barWidth / 2 + leftMargin, width]); 76 | 77 | xAxisGroup.call(xAxis); 78 | yAxisGroup.call(yAxis); 79 | 80 | var bind = barsGroup.selectAll("g") 81 | .data(data); 82 | 83 | var enter = bind.enter() 84 | .append('g'); 85 | enter.append('rect') 86 | enter.append('text') 87 | .attr("dy", ".75em"); 88 | 89 | bind.exit() 90 | .remove(); 91 | 92 | bind.attr('transform', function (d, i) { 93 | return "translate(" + (x(d.time) - barWidth / 2) + ",0)"; 94 | }) 95 | .classed('filling', function (d, i) { 96 | return i == data.length - 1 97 | }); 98 | 99 | bind.select('rect') 100 | .attr('width', barWidth - 1) 101 | .transition() 102 | .attr("y", function (d) { 103 | return y(d.tweets); 104 | }) 105 | .attr('height', function (d) { 106 | return height - y(d.tweets); 107 | }); 108 | 109 | bind.select('text') 110 | .attr("x", barWidth / 2) 111 | .text(function (d) { 112 | return d.tweets; 113 | }) 114 | .transition() 115 | .attr("y", function (d) { 116 | return y(d.tweets) + 3; 117 | }); 118 | }; 119 | } 120 | 121 | 122 | function update() { 123 | toggle_status_label(false); 124 | $.get(config.update_url) 125 | .done(function (response) { 126 | status_display.html(response.display); 127 | update_chart(response.timeline); 128 | toggle_status_label(true); 129 | }) 130 | .fail(function (err, xhr) { 131 | console.log(err, xhr); 132 | }); 133 | } 134 | 135 | function toggle_status_label(show) { 136 | var label = $('.status-label'); 137 | label[0].borderWidth; 138 | 139 | if (show) { 140 | label.addClass('in'); 141 | } else { 142 | label.removeClass('in'); 143 | } 144 | } 145 | 146 | $(document).ready(function () { 147 | status_display = $('#twitter-stream-display'); 148 | 149 | var chart_element = $('#twitter-stream-chart'); 150 | update_chart = chart(chart_element[0], { 151 | width: chart_element.width(), 152 | height: CHART_HEIGHT 153 | }); 154 | update_chart(config.timeline_data); 155 | interval = setInterval(update, UPDATE_INTERVAL); 156 | toggle_status_label(true); 157 | }); 158 | })(); -------------------------------------------------------------------------------- /twitter_stream/migrations/0004_auto__del_field_tweet_analyzed_by.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Deleting field 'Tweet.analyzed_by' 12 | db.delete_column(u'twitter_stream_tweet', 'analyzed_by') 13 | 14 | 15 | def backwards(self, orm): 16 | # Adding field 'Tweet.analyzed_by' 17 | db.add_column(u'twitter_stream_tweet', 'analyzed_by', 18 | self.gf('django.db.models.fields.SmallIntegerField')(default=0, db_index=True), 19 | keep_default=False) 20 | 21 | 22 | models = { 23 | u'twitter_stream.apikey': { 24 | 'Meta': {'object_name': 'ApiKey'}, 25 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 26 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 27 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 28 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 29 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 30 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}), 31 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 32 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 33 | }, 34 | u'twitter_stream.filterterm': { 35 | 'Meta': {'object_name': 'FilterTerm'}, 36 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 37 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 38 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 39 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 40 | }, 41 | u'twitter_stream.streamprocess': { 42 | 'Meta': {'object_name': 'StreamProcess'}, 43 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 44 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}), 45 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}), 46 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 47 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 48 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}), 49 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}), 50 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}), 51 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 52 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}), 53 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}), 54 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'}) 55 | }, 56 | u'twitter_stream.tweet': { 57 | 'Meta': {'object_name': 'Tweet'}, 58 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), 59 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 60 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}), 61 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 62 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 63 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}), 64 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 65 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 66 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 67 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 68 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 69 | 'truncated': ('django.db.models.fields.BooleanField', [], {}), 70 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}), 71 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 72 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 73 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), 74 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}), 75 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 76 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}), 77 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}), 78 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 79 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 80 | 'user_verified': ('django.db.models.fields.BooleanField', [], {}) 81 | } 82 | } 83 | 84 | complete_apps = ['twitter_stream'] -------------------------------------------------------------------------------- /twitter_stream/migrations/0006_auto__chg_field_tweet_id.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | 12 | # Changing field 'Tweet.id' 13 | db.alter_column(u'twitter_stream_tweet', 'id', self.gf('twitter_stream.fields.PositiveBigAutoField')(primary_key=True)) 14 | 15 | def backwards(self, orm): 16 | 17 | # Changing field 'Tweet.id' 18 | db.alter_column(u'twitter_stream_tweet', u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)) 19 | 20 | models = { 21 | u'twitter_stream.apikey': { 22 | 'Meta': {'object_name': 'ApiKey'}, 23 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 24 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 25 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 26 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 27 | 'app_name': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 28 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 29 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}), 30 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 31 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 32 | }, 33 | u'twitter_stream.filterterm': { 34 | 'Meta': {'object_name': 'FilterTerm'}, 35 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 36 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 37 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 38 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 39 | }, 40 | u'twitter_stream.streamprocess': { 41 | 'Meta': {'object_name': 'StreamProcess'}, 42 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 43 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}), 44 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}), 45 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 46 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 47 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}), 48 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}), 49 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}), 50 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 51 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}), 52 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}), 53 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'}) 54 | }, 55 | u'twitter_stream.tweet': { 56 | 'Meta': {'object_name': 'Tweet'}, 57 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), 58 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 59 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}), 60 | 'id': ('twitter_stream.fields.PositiveBigAutoField', [], {'primary_key': 'True'}), 61 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 62 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}), 63 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 64 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 65 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 66 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 67 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 68 | 'truncated': ('django.db.models.fields.BooleanField', [], {}), 69 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}), 70 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 71 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 72 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), 73 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}), 74 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 75 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}), 76 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}), 77 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 78 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 79 | 'user_verified': ('django.db.models.fields.BooleanField', [], {}) 80 | } 81 | } 82 | 83 | complete_apps = ['twitter_stream'] -------------------------------------------------------------------------------- /twitter_stream/migrations/0002_auto__add_index_tweet_analyzed_by__add_index_tweet_created_at.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Adding index on 'Tweet', fields ['analyzed_by'] 12 | db.create_index(u'twitter_stream_tweet', ['analyzed_by']) 13 | 14 | # Adding index on 'Tweet', fields ['created_at'] 15 | db.create_index(u'twitter_stream_tweet', ['created_at']) 16 | 17 | 18 | def backwards(self, orm): 19 | # Removing index on 'Tweet', fields ['created_at'] 20 | db.delete_index(u'twitter_stream_tweet', ['created_at']) 21 | 22 | # Removing index on 'Tweet', fields ['analyzed_by'] 23 | db.delete_index(u'twitter_stream_tweet', ['analyzed_by']) 24 | 25 | 26 | models = { 27 | u'twitter_stream.apikey': { 28 | 'Meta': {'object_name': 'ApiKey'}, 29 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 30 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 31 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 32 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 33 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 34 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}), 35 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 36 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 37 | }, 38 | u'twitter_stream.filterterm': { 39 | 'Meta': {'object_name': 'FilterTerm'}, 40 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 41 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 42 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 43 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 44 | }, 45 | u'twitter_stream.streamprocess': { 46 | 'Meta': {'object_name': 'StreamProcess'}, 47 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 48 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}), 49 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}), 50 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 51 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 52 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}), 53 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}), 54 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 55 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}), 56 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}), 57 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'}) 58 | }, 59 | u'twitter_stream.tweet': { 60 | 'Meta': {'object_name': 'Tweet'}, 61 | 'analyzed_by': ('django.db.models.fields.SmallIntegerField', [], {'default': '0', 'db_index': 'True'}), 62 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), 63 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 64 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}), 65 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 66 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 67 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}), 68 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 69 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 70 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 71 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 72 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 73 | 'truncated': ('django.db.models.fields.BooleanField', [], {}), 74 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}), 75 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 76 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 77 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), 78 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}), 79 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 80 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}), 81 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}), 82 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 83 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 84 | 'user_verified': ('django.db.models.fields.BooleanField', [], {}) 85 | } 86 | } 87 | 88 | complete_apps = ['twitter_stream'] -------------------------------------------------------------------------------- /twitter_stream/migrations/0003_auto__add_field_streamprocess_memory_usage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Adding field 'StreamProcess.memory_usage' 12 | db.add_column(u'twitter_stream_streamprocess', 'memory_usage', 13 | self.gf('django.db.models.fields.CharField')(default=None, max_length=30, null=True, blank=True), 14 | keep_default=False) 15 | 16 | 17 | def backwards(self, orm): 18 | # Deleting field 'StreamProcess.memory_usage' 19 | db.delete_column(u'twitter_stream_streamprocess', 'memory_usage') 20 | 21 | 22 | models = { 23 | u'twitter_stream.apikey': { 24 | 'Meta': {'object_name': 'ApiKey'}, 25 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 26 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 27 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 28 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 29 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 30 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}), 31 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 32 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 33 | }, 34 | u'twitter_stream.filterterm': { 35 | 'Meta': {'object_name': 'FilterTerm'}, 36 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 37 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 38 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 39 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 40 | }, 41 | u'twitter_stream.streamprocess': { 42 | 'Meta': {'object_name': 'StreamProcess'}, 43 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 44 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}), 45 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}), 46 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 47 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 48 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}), 49 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}), 50 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}), 51 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 52 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}), 53 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}), 54 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'}) 55 | }, 56 | u'twitter_stream.tweet': { 57 | 'Meta': {'object_name': 'Tweet'}, 58 | 'analyzed_by': ('django.db.models.fields.SmallIntegerField', [], {'default': '0', 'db_index': 'True'}), 59 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), 60 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 61 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}), 62 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 63 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 64 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}), 65 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 66 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 67 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 68 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 69 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 70 | 'truncated': ('django.db.models.fields.BooleanField', [], {}), 71 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}), 72 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 73 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 74 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), 75 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}), 76 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 77 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}), 78 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}), 79 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 80 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 81 | 'user_verified': ('django.db.models.fields.BooleanField', [], {}) 82 | } 83 | } 84 | 85 | complete_apps = ['twitter_stream'] -------------------------------------------------------------------------------- /twitter_stream/migrations/0005_auto__del_field_apikey_name__add_field_apikey_user_name__add_field_api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Deleting field 'ApiKey.name' 12 | db.rename_column(u'twitter_stream_apikey', 'name', 'user_name') 13 | 14 | # Adding field 'ApiKey.app_name' 15 | db.add_column(u'twitter_stream_apikey', 'app_name', 16 | self.gf('django.db.models.fields.CharField')(default=None, max_length=250), 17 | keep_default=False) 18 | 19 | 20 | def backwards(self, orm): 21 | 22 | # Deleting field 'ApiKey.user_name' 23 | db.rename_column(u'twitter_stream_apikey', 'user_name', 'name') 24 | 25 | # Deleting field 'ApiKey.app_name' 26 | db.delete_column(u'twitter_stream_apikey', 'app_name') 27 | 28 | 29 | models = { 30 | u'twitter_stream.apikey': { 31 | 'Meta': {'object_name': 'ApiKey'}, 32 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 33 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 34 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 35 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 36 | 'app_name': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 37 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 38 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}), 39 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 40 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 41 | }, 42 | u'twitter_stream.filterterm': { 43 | 'Meta': {'object_name': 'FilterTerm'}, 44 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 45 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 46 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 47 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 48 | }, 49 | u'twitter_stream.streamprocess': { 50 | 'Meta': {'object_name': 'StreamProcess'}, 51 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 52 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}), 53 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}), 54 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 55 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 56 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}), 57 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}), 58 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}), 59 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 60 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}), 61 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}), 62 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'}) 63 | }, 64 | u'twitter_stream.tweet': { 65 | 'Meta': {'object_name': 'Tweet'}, 66 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), 67 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 68 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}), 69 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 70 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 71 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}), 72 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 73 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 74 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 75 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 76 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 77 | 'truncated': ('django.db.models.fields.BooleanField', [], {}), 78 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}), 79 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 80 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 81 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), 82 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}), 83 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 84 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}), 85 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}), 86 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 87 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 88 | 'user_verified': ('django.db.models.fields.BooleanField', [], {}) 89 | } 90 | } 91 | 92 | complete_apps = ['twitter_stream'] -------------------------------------------------------------------------------- /twitter_stream/utils/file_stream.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parses a file containing pretty-printed json objects as produced by Twitter. 3 | 4 | For example: 5 | { 6 | ... some tweet json 7 | }, 8 | { 9 | ... some other tweet json 10 | }, 11 | """ 12 | 13 | import time 14 | import os 15 | import json 16 | import logging 17 | import threading 18 | 19 | import twitter_monitor 20 | from twitter_stream import models 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | class ObjDict(dict): 25 | 26 | def __getattr__(self, item): 27 | return self[item] 28 | 29 | def __setattr__(self, key, value): 30 | self[key] = value 31 | 32 | def __delattr__(self, item): 33 | del self[item] 34 | 35 | class FakeTermChecker(twitter_monitor.TermChecker): 36 | 37 | def __init__(self, queue_listener, stream_process): 38 | super(FakeTermChecker, self).__init__() 39 | 40 | # A queue for tweets that need to be written to the database 41 | self.listener = queue_listener 42 | self.error_count = 0 43 | self.process = stream_process 44 | 45 | def check(self): 46 | """We always return true!""" 47 | 48 | # Process the tweet queue -- this is more important 49 | # to do regularly than updating the tracking terms 50 | # Update the process status in the database 51 | self.process.tweet_rate = self.listener.process_tweet_queue() 52 | self.process.error_count = self.error_count 53 | self.process.status = models.StreamProcess.STREAM_STATUS_RUNNING 54 | self.process.heartbeat() 55 | 56 | return True 57 | 58 | def ok(self): 59 | return self.error_count < 5 60 | 61 | def error(self, exc): 62 | logger.error(exc) 63 | self.error_count += 1 64 | 65 | # the chunk size for reading in the file 66 | TWEETS_BETWEEN_PROGRESS = 7000 67 | 68 | class FakeTwitterStream(object): 69 | """ 70 | A tweet processor with a similar interface to the 71 | DynamicTweetStream class. It launches the tweet file 72 | reading in a separate thread. 73 | """ 74 | def __init__(self, tweets_file, listener, term_checker, 75 | limit=None, rate_limit=None, pretty=False): 76 | 77 | self.tweets_file = tweets_file 78 | 79 | self.limit = limit 80 | self.rate_limit = rate_limit 81 | self.pretty = pretty 82 | 83 | self.listener = listener 84 | self.term_checker = term_checker 85 | 86 | self.tracking_terms = [] 87 | self.polling = False 88 | self.stream = None 89 | self.last_created_at = 0 90 | 91 | self.polling_interrupt = threading.Event() 92 | 93 | def process(self, tweet, raw_tweet): 94 | self.last_created_at = tweet['created_at'] 95 | return self.listener.on_status(tweet) 96 | 97 | def next_tweet_pretty(self, infile): 98 | # start our read loop with valid data 99 | 100 | raw = '' 101 | tweet_start_found = False 102 | 103 | while True: 104 | try: 105 | line = next(infile) 106 | except StopIteration: 107 | return None 108 | 109 | if line[0] == '{': 110 | # start of tweet 111 | tweet_start_found = True 112 | raw = '' 113 | raw += line 114 | elif line[0:2] == '},' and tweet_start_found == True: 115 | # end of tweet 116 | raw += line[0] 117 | tweet_start_found = False 118 | 119 | return raw 120 | 121 | elif tweet_start_found == True: 122 | # some line in the middle 123 | raw += line 124 | 125 | def next_tweet(self, infile): 126 | return next(infile, None) 127 | 128 | def run(self): 129 | 130 | logger.info("Parsing %s..." % self.tweets_file) 131 | if self.limit: 132 | logger.info("up to %d tweets..." % self.limit) 133 | 134 | if hasattr(self.tweets_file, 'read'): 135 | infile = self.tweets_file 136 | else: 137 | infile = open(self.tweets_file, "rt") 138 | 139 | tweet_count = 0 140 | last_report_count = 0 141 | 142 | if self.rate_limit: 143 | time_of_last_tweet = time.time() 144 | time_between_tweets = 1.0 / self.rate_limit 145 | 146 | while True: 147 | if self.pretty: 148 | raw = self.next_tweet_pretty(infile) 149 | else: 150 | raw = self.next_tweet(infile) 151 | 152 | if raw is None: 153 | break 154 | 155 | raw = raw.strip() 156 | if len(raw) == 0: 157 | continue 158 | 159 | tweet = json.loads(raw) 160 | 161 | # make sure it is a tweet 162 | if 'user' in tweet: 163 | 164 | if self.rate_limit: 165 | while time.time() - time_of_last_tweet < time_between_tweets: 166 | time.sleep(time_between_tweets) 167 | 168 | if self.process(tweet, raw) is False: 169 | logger.warn("Stopping file stream") 170 | break 171 | 172 | tweet_count += 1 173 | 174 | if self.rate_limit: 175 | time_of_last_tweet = time.time() 176 | 177 | if tweet_count - last_report_count > TWEETS_BETWEEN_PROGRESS: 178 | last_report_count = tweet_count 179 | 180 | logger.info("Read in %d tweets", tweet_count) 181 | if self.last_created_at: 182 | logger.info('Inserted tweets up to %s', str(self.last_created_at)) 183 | 184 | if self.limit and self.limit < tweet_count: 185 | logger.info("Limit of %d reached.", self.limit) 186 | break 187 | 188 | logger.info("Read in %d tweets (total)", tweet_count) 189 | if self.last_created_at: 190 | logger.info('Tweets stopped at %s', str(self.last_created_at)) 191 | logger.info("Done reading file.") 192 | 193 | def start_polling(self, interval): 194 | """ 195 | Start polling for term updates and streaming. 196 | """ 197 | 198 | self.polling = True 199 | 200 | # clear the stored list of terms - we aren't tracking any 201 | self.term_checker.reset() 202 | 203 | logger.info("Starting polling for changes to the track list") 204 | while self.polling: 205 | loop_start = time.time() 206 | 207 | self.update_stream() 208 | self.handle_exceptions() 209 | 210 | # wait for the interval (compensate for the time taken in the loop 211 | elapsed = (time.time() - loop_start) 212 | self.polling_interrupt.wait(max(0.1, interval - elapsed)) 213 | 214 | logger.warn("Term poll ceased!") 215 | 216 | def update_stream(self): 217 | """ 218 | Restarts the stream with the current list of tracking terms. 219 | """ 220 | 221 | # Check if the tracking list has changed 222 | if not self.term_checker.check(): 223 | return 224 | 225 | # Start a new stream 226 | self.start_stream() 227 | 228 | def start_stream(self): 229 | """ 230 | Starts a stream if not already started. 231 | """ 232 | 233 | if not self.stream: 234 | self.stream = threading.Thread(target=self.run) 235 | self.stream.start() 236 | 237 | def handle_exceptions(self): 238 | # check to see if an exception was raised in the streaming thread 239 | if self.listener.streaming_exception is not None: 240 | logger.warn("Streaming exception: %s", self.listener.streaming_exception) 241 | # propagate outward 242 | raise self.listener.streaming_exception 243 | -------------------------------------------------------------------------------- /twitter_stream/utils/streaming.py: -------------------------------------------------------------------------------- 1 | try: 2 | import queue 3 | except ImportError: 4 | import Queue as queue 5 | import logging 6 | import time 7 | import json 8 | import sys 9 | 10 | import twitter_monitor 11 | from twitter_stream import settings, models 12 | from swapper import load_model 13 | 14 | __all__ = ['FeelsTermChecker', 'QueueStreamListener'] 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class TweetQueue(queue.Queue): 20 | """ 21 | Simply extends the Queue class with get_all methods. 22 | """ 23 | 24 | def get_all(self, block=True, timeout=None): 25 | """Remove and return all the items from the queue. 26 | 27 | If optional args 'block' is true and 'timeout' is None (the default), 28 | block if necessary until an item is available. If 'timeout' is 29 | a non-negative number, it blocks at most 'timeout' seconds and raises 30 | the Empty exception if no item was available within that time. 31 | Otherwise ('block' is false), return an item if one is immediately 32 | available, else raise the Empty exception ('timeout' is ignored 33 | in that case). 34 | """ 35 | self.not_empty.acquire() 36 | try: 37 | if not block: 38 | if not self._qsize(): 39 | raise queue.Empty 40 | elif timeout is None: 41 | while not self._qsize(): 42 | self.not_empty.wait() 43 | elif timeout < 0: 44 | raise ValueError("'timeout' must be a non-negative number") 45 | else: 46 | endtime = time.time() + timeout 47 | while not self._qsize(): 48 | remaining = endtime - time.time() 49 | if remaining <= 0.0: 50 | raise queue.Empty 51 | self.not_empty.wait(remaining) 52 | items = self._get_all() 53 | self.not_full.notify() 54 | return items 55 | finally: 56 | self.not_empty.release() 57 | 58 | def get_all_nowait(self): 59 | """Remove and return all the items from the queue without blocking. 60 | 61 | Only get items if immediately available. Otherwise 62 | raise the Empty exception. 63 | """ 64 | return self.get_all(False) 65 | 66 | def _get_all(self): 67 | """ 68 | Get all the items from the queue. 69 | """ 70 | result = [] 71 | while len(self.queue): 72 | result.append(self.queue.popleft()) 73 | return result 74 | 75 | 76 | class FeelsTermChecker(twitter_monitor.TermChecker): 77 | """ 78 | Checks the database for filter terms. 79 | 80 | Note that because this is run every now and then, and 81 | so as not to block the streaming thread, this 82 | object will actually also insert the tweets into the database. 83 | """ 84 | 85 | def __init__(self, queue_listener, stream_process): 86 | super(FeelsTermChecker, self).__init__() 87 | 88 | # A queue for tweets that need to be written to the database 89 | self.listener = queue_listener 90 | self.error_count = 0 91 | self.process = stream_process 92 | 93 | def update_tracking_terms(self): 94 | 95 | # Process the tweet queue -- this is more important 96 | # to do regularly than updating the tracking terms 97 | # Update the process status in the database 98 | self.process.tweet_rate = self.listener.process_tweet_queue() 99 | self.process.error_count = self.error_count 100 | 101 | # Check for new tracking terms 102 | filter_terms = models.FilterTerm.objects.filter(enabled=True) 103 | 104 | if len(filter_terms): 105 | self.process.status = models.StreamProcess.STREAM_STATUS_RUNNING 106 | else: 107 | self.process.status = models.StreamProcess.STREAM_STATUS_WAITING 108 | 109 | self.process.heartbeat() 110 | 111 | return set([t.term for t in filter_terms]) 112 | 113 | def ok(self): 114 | return self.error_count < 5 115 | 116 | def error(self, exc): 117 | logger.error(exc) 118 | self.error_count += 1 119 | 120 | 121 | class QueueStreamListener(twitter_monitor.JsonStreamListener): 122 | """ 123 | Saves tweets in a queue for later insertion into database 124 | when process_tweet_batch() is called. 125 | 126 | Note that this is operated by the streaming thread. 127 | """ 128 | 129 | def __init__(self, api=None, to_file=None): 130 | """ 131 | Listens for tweets from Tweepy and saves them in the database 132 | when process_tweet_queue() is called (in a separate thread, probably). 133 | 134 | If to_file is given, tweets are written to the file instead. 135 | JSON formatted, one per line. 136 | """ 137 | super(QueueStreamListener, self).__init__(api) 138 | 139 | self.terminate = False 140 | 141 | # A place to put the tweets 142 | self.queue = TweetQueue() 143 | 144 | # For calculating tweets / sec 145 | self.time = time.time() 146 | 147 | # Place for saving tweets if not in the database. 148 | self.to_file = to_file 149 | self._output_file = None 150 | 151 | def on_status(self, status): 152 | # construct a Tweet object from the raw status object. 153 | self.queue.put_nowait(status) 154 | 155 | # If terminate gets set, this should take out the tweepy stream thread 156 | return not self.terminate 157 | 158 | def process_tweet_queue(self): 159 | """ 160 | Inserts any queued tweets into the database. 161 | 162 | It is ok for this to be called on a thread other than the streaming thread. 163 | """ 164 | 165 | # this is for calculating the tps rate 166 | now = time.time() 167 | diff = now - self.time 168 | self.time = now 169 | 170 | try: 171 | batch = self.queue.get_all_nowait() 172 | except queue.Empty: 173 | return 0 174 | 175 | if len(batch) == 0: 176 | return 0 177 | 178 | Tweet = load_model("twitter_stream", "Tweet") 179 | 180 | tweets = [] 181 | for status in batch: 182 | if settings.CAPTURE_EMBEDDED and 'retweeted_status' in status: 183 | if self.to_file: 184 | tweets.append(json.dumps(status['retweeted_status'])) 185 | else: 186 | try: 187 | retweeted = Tweet.create_from_json(status['retweeted_status']) 188 | if retweeted is not None: 189 | tweets.append(retweeted) 190 | except: 191 | logger.error("Failed to parse retweeted %s" % status['retweeted_status']['id_str'], exc_info=True) 192 | 193 | if self.to_file: 194 | if 'retweeted_status' in status: 195 | del status['retweeted_status'] 196 | 197 | tweets.append(json.dumps(status)) 198 | else: 199 | try: 200 | tweet = Tweet.create_from_json(status) 201 | if tweet is not None: 202 | tweets.append(tweet) 203 | except: 204 | logger.error("Failed to parse tweet %s" % status['id_str'], exc_info=True) 205 | 206 | if tweets: 207 | if self.to_file: 208 | if not self._output_file or self._output_file.closed: 209 | self._output_file = open(self.to_file, 'ab') 210 | self._output_file.write("\n".join(tweets) + "\n") 211 | self._output_file.flush() 212 | logger.info("Dumped %s tweets at %s tps to %s" % (len(tweets), len(tweets) / diff, self.to_file)) 213 | else: 214 | Tweet.objects.bulk_create(tweets, settings.INSERT_BATCH_SIZE) 215 | logger.info("Inserted %s tweets at %s tps" % (len(tweets), len(tweets) / diff)) 216 | else: 217 | logger.info("Saved 0 tweets") 218 | 219 | if settings.DEBUG: 220 | # Prevent apparent memory leaks 221 | # https://docs.djangoproject.com/en/dev/faq/models/#why-is-django-leaking-memory 222 | from django import db 223 | db.reset_queries() 224 | 225 | return len(tweets) / diff 226 | 227 | def set_terminate(self): 228 | self.terminate = True 229 | -------------------------------------------------------------------------------- /twitter_stream/management/commands/stream.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from optparse import make_option 3 | from logging.config import dictConfig 4 | import time 5 | import signal 6 | from django.core.exceptions import ObjectDoesNotExist 7 | 8 | from django.core.management.base import BaseCommand 9 | import sys 10 | import tweepy 11 | import twitter_monitor 12 | from twitter_stream import models 13 | from twitter_stream import utils 14 | from twitter_stream import settings 15 | 16 | 17 | # Setup logging if not already configured 18 | logger = logging.getLogger(__name__) 19 | if not logger.handlers: 20 | dictConfig({ 21 | "version": 1, 22 | "disable_existing_loggers": False, 23 | "handlers": { 24 | "twitter_stream": { 25 | "level": "DEBUG", 26 | "class": "logging.StreamHandler", 27 | }, 28 | }, 29 | "twitter_stream": { 30 | "handlers": ["twitter_stream"], 31 | "level": "DEBUG" 32 | } 33 | }) 34 | 35 | 36 | class Command(BaseCommand): 37 | """ 38 | Starts a process that streams data from Twitter. 39 | 40 | Example usage: 41 | python manage.py stream 42 | python manage.py stream --poll-interval 25 43 | python manage.py stream MyCredentialsName 44 | """ 45 | 46 | option_list = BaseCommand.option_list + ( 47 | make_option( 48 | '--poll-interval', 49 | action='store', 50 | dest='poll_interval', 51 | default=settings.POLL_INTERVAL, 52 | help='Seconds between term updates and tweet inserts.' 53 | ), 54 | make_option( 55 | '--prevent-exit', 56 | action='store_true', 57 | dest='prevent_exit', 58 | default=False, 59 | help='Put the stream in a loop to prevent random termination. Use this if you are not running inside a process management system like supervisord.' 60 | ), 61 | make_option( 62 | '--to-file', 63 | action='store', 64 | dest='to_file', 65 | default=None, 66 | help='Write tweets to the given JSON file instead of the database.' 67 | ), 68 | make_option( 69 | '--from-file', 70 | action='store', 71 | dest='from_file', 72 | default=None, 73 | help='Read tweets from a given file, one JSON tweet per line.' 74 | ), 75 | make_option( 76 | '--from-file-long', 77 | action='store', 78 | dest='from_file_long', 79 | default=None, 80 | help='Read tweets from a given file, where JSON tweets are pretty-printed.' 81 | ), 82 | make_option( 83 | '--rate-limit', 84 | action='store', 85 | dest='rate_limit', 86 | default=None, 87 | type=float, 88 | help='Rate to read in tweets, used ONLY if streaming from a file.' 89 | ), 90 | make_option( 91 | '--limit', 92 | action='store', 93 | dest='limit', 94 | default=None, 95 | type=int, 96 | help='Limit the number of tweets, used ONLY if streaming from a file.' 97 | ) 98 | ) 99 | args = '' 100 | help = "Starts a streaming connection to Twitter" 101 | 102 | def handle(self, keys_name=settings.DEFAULT_KEYS_NAME, *args, **options): 103 | 104 | # The suggested time between hearbeats 105 | poll_interval = float(options.get('poll_interval', settings.POLL_INTERVAL)) 106 | prevent_exit = options.get('prevent_exit', settings.PREVENT_EXIT) 107 | to_file = options.get('to_file', None) 108 | from_file = options.get('from_file', None) 109 | from_file_long = options.get('from_file_long', None) 110 | rate_limit = options.get('rate_limit', 50) 111 | limit = options.get('limit', None) 112 | 113 | if from_file and from_file_long: 114 | logger.error("Cannot use both --from-file and --from-file-long") 115 | exit(1) 116 | 117 | # First expire any old stream process records that have failed 118 | # to report in for a while 119 | timeout_seconds = 3 * poll_interval 120 | models.StreamProcess.expire_timed_out() 121 | 122 | # Create the stream process for tracking ourselves 123 | stream_process = models.StreamProcess.create( 124 | timeout_seconds=timeout_seconds 125 | ) 126 | 127 | listener = utils.QueueStreamListener(to_file=to_file) 128 | 129 | if from_file: 130 | checker = utils.FakeTermChecker(queue_listener=listener, 131 | stream_process=stream_process) 132 | else: 133 | checker = utils.FeelsTermChecker(queue_listener=listener, 134 | stream_process=stream_process) 135 | 136 | def stop(signum, frame): 137 | """ 138 | Register stream's death and exit. 139 | """ 140 | 141 | if stream_process: 142 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED 143 | stream_process.heartbeat() 144 | 145 | # Let the tweet listener know it should be quitting asap 146 | listener.set_terminate() 147 | 148 | logger.error("Terminating") 149 | 150 | raise SystemExit() 151 | 152 | # Installs signal handlers for handling SIGINT and SIGTERM 153 | # gracefully. 154 | signal.signal(signal.SIGINT, stop) 155 | signal.signal(signal.SIGTERM, stop) 156 | 157 | keys = None 158 | if not from_file: 159 | # Only need keys if we are connecting to twitter 160 | while not keys: 161 | try: 162 | keys = models.ApiKey.get_keys(keys_name) 163 | except ObjectDoesNotExist: 164 | if keys_name: 165 | logger.error("Keys for '%s' do not exist in the database. Waiting...", keys_name) 166 | else: 167 | logger.warn("No keys in the database. Waiting...") 168 | 169 | time.sleep(5) 170 | stream_process.status = models.StreamProcess.STREAM_STATUS_WAITING 171 | stream_process.heartbeat() 172 | 173 | try: 174 | if keys: 175 | logger.info("Connecting to Twitter with keys for %s/%s", keys.user_name, keys.app_name) 176 | stream_process.keys = keys 177 | stream_process.save() 178 | 179 | # Only need auth if we have keys (i.e. connecting to twitter) 180 | auth = tweepy.OAuthHandler(keys.api_key, keys.api_secret) 181 | auth.set_access_token(keys.access_token, keys.access_token_secret) 182 | 183 | # Start and maintain the streaming connection... 184 | stream = twitter_monitor.DynamicTwitterStream(auth, listener, checker) 185 | 186 | elif from_file or from_file_long: 187 | 188 | read_pretty = False 189 | if from_file_long: 190 | from_file = from_file 191 | read_pretty = True 192 | 193 | if from_file == '-': 194 | from_file = sys.stdin 195 | logger.info("Reading tweets from stdin") 196 | else: 197 | if read_pretty: 198 | logger.info("Reading tweets from JSON file %s (pretty-printed)", from_file) 199 | else: 200 | logger.info("Reading tweets from JSON file %s", from_file) 201 | 202 | stream = utils.FakeTwitterStream(from_file, pretty=read_pretty, 203 | listener=listener, term_checker=checker, 204 | limit=limit, rate_limit=rate_limit) 205 | else: 206 | raise Exception("No api keys and we're not streaming from a file.") 207 | 208 | if to_file: 209 | logger.info("Saving tweets to %s", to_file) 210 | 211 | if prevent_exit: 212 | while checker.ok(): 213 | try: 214 | stream.start_polling(poll_interval) 215 | except Exception as e: 216 | checker.error(e) 217 | time.sleep(1) # to avoid craziness 218 | else: 219 | stream.start_polling(poll_interval) 220 | 221 | logger.error("Stopping because of excess errors") 222 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED 223 | stream_process.heartbeat() 224 | 225 | except Exception as e: 226 | logger.error(e, exc_info=True) 227 | 228 | finally: 229 | stop(None, None) 230 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Django Twitter Stream 2 | ===================== 3 | 4 | A Django app for streaming tweets from the Twitter API into a database. 5 | 6 | You can start a streaming process which will insert 7 | Tweets into the database as they are delivered 8 | by Twitter. The process monitors a table of "filter terms" which 9 | you can update over time if you want. 10 | 11 | This app uses the [tweepy](http://github.com/tweepy/tweepy) library 12 | for connecting to the Twitter API. 13 | 14 | 15 | Installation 16 | ------------ 17 | 18 | Install with pip: 19 | 20 | ```bash 21 | pip install -e git+https://github.com/michaelbrooks/django-twitter-stream.git#egg=django-twitter-stream 22 | ``` 23 | 24 | Add to `INSTALLED_APPS` in your Django settings file: 25 | 26 | ```python 27 | INSTALLED_APPS = ( 28 | # other apps 29 | "twitter_stream", 30 | ) 31 | ``` 32 | 33 | > If you are using MySQL, you need to make sure that your database 34 | is uses the `utf8mb4` character set for storing tweets, since MySQL's `utf8` 35 | character set does not include support for 4-byte characters. 36 | Add the following to you database settings: 37 | 38 | ```python 39 | DATABASES = { 40 | 'default': { 41 | 'ENGINE': 'django.db.backends.mysql', 42 | # username, password, etc... 43 | 'OPTIONS': { 44 | 'charset': 'utf8mb4', 45 | }, 46 | } 47 | } 48 | ``` 49 | 50 | Run `python manage.py syncdb` to update your database. 51 | This project also supports migrations with [South](http://south.aeracode.org/). 52 | If you are using South in your project, you should run `python manage.py migrate`. 53 | 54 | You need to supply your Twitter API keys and set up some filter terms 55 | before you can stream tweets. Instructions for this follow. 56 | 57 | 58 | ### Provide Twitter API Keys 59 | 60 | Once you have added `twitter_stream` to your list of installed apps, 61 | the Django Admin page should include a section for the `ApiKey` model. 62 | You can use this to input your Twitter API keys. 63 | 64 | If you do not have Twitter API keys, you must sign in to the 65 | [Twitter Developers site](http://dev.twitter.com). Next, go to 66 | your [applications list](https://dev.twitter.com/apps). If you do 67 | not have an application already, create one. 68 | Once you have created an application, go to the "API Keys" area, 69 | scroll to the bottom, and click the button to generate access keys for your account. 70 | This can take a few minutes to complete. 71 | 72 | Once you have an application and access keys for your account, 73 | you can copy the necessary values into a new ApiKey entry. 74 | This includes the "API key" and "API secret", located at the 75 | top of your application keys page, and 76 | the "Access Token" and "Access Token Secret", located at 77 | the bottom of your application keys page. 78 | 79 | 80 | ### Customize the Filter Terms 81 | 82 | Currently, this package uses the `filter` endpoint of the 83 | Twitter Streaming API ([more info](https://dev.twitter.com/docs/streaming-apis/streams/public)). 84 | This endpoint accepts a set of tracking terms. Any tweets matching these terms 85 | will be delivered to you as they are created (approximately). 86 | The precise behavior of term filtering is described [here](https://dev.twitter.com/docs/streaming-apis/parameters#track). 87 | 88 | This package defines a FilterTerm model. You can add filter 89 | terms to this table through the Django Admin interface, 90 | or through code. When you change the terms in the database, 91 | the stream will briefly shut itself down and then restart 92 | with the new list. 93 | 94 | If there are no terms in your database, the connection to Twitter will be 95 | closed until some terms are available. Note that connecting to the unfiltered 96 | public stream is not yet supported. 97 | 98 | Due to Twitter's rate limit, the Streaming API appears to return 99 | all of the tweets matching your filter terms *up to* around 1% 100 | of the total volume on Twitter at the present moment. 101 | In my experience, you will get at most around 50 or 60 tweets per second. 102 | 103 | 104 | Start the Streaming Process 105 | --------------------------- 106 | 107 | To start the streaming process, use the `stream` management command: 108 | 109 | ```bash 110 | $ python manage.py stream 111 | ``` 112 | 113 | This will connect to Twitter using API keys and tracking terms from your database. 114 | 115 | If you have stored multiple API keys in your database, you may select a particular 116 | set of API keys by name as an argument to this command: 117 | 118 | You may also choose the rate at which the database will be polled for changes 119 | to the filter terms. This is also the interval at which tweets will be batch-inserted 120 | into your database, so don't set it too long. The default is 10 seconds. 121 | 122 | ```bash 123 | $ python manage.py stream MyAPIKeys --poll-interval 30 124 | ``` 125 | 126 | > *Warning*: Twitter does not allow an account to open more than one streaming 127 | connection at a time. If you repeatedly try to open too many streaming connections, 128 | there may be repercussions. If you start receiving disconnect errors from Twitter, 129 | take a break for a few minutes before trying to reconnect. 130 | 131 | If you need to take your database offline for some reason or just want to stream 132 | tweets to a file instead, you can use the `--to-file` option: 133 | 134 | ```bash 135 | $ python manage.py stream --to-file some_file.json 136 | ``` 137 | 138 | This will append tweets, in JSON format, one-per-line, to "some_file.json". 139 | If you are capturing retweets, they will be separated out onto separate lines. 140 | If you are not, they will be removed from the JSON objects before being printed. 141 | 142 | You may also configure the stream to read from a file (or stdin with '-'): 143 | 144 | ```bash 145 | $ python manage.py stream --from-file some_file.json 146 | $ python manage.py stream --from-file - 147 | ``` 148 | 149 | Settings 150 | -------- 151 | 152 | Settings for this app can be configured by adding the `TWITTER_STREAM_SETTINGS` to your 153 | Django settings file. Below are the default settings: 154 | 155 | ```python 156 | TWITTER_STREAM_SETTINGS = { 157 | 158 | # Set to True to save embedded retweeted_status tweets. Normally these are discarded. 159 | 'CAPTURE_EMBEDDED': False, 160 | 161 | # Change the default term track and tweet insert interval 162 | 'POLL_INTERVAL': 10, 163 | 164 | # The name of the default keys to use for streaming. If not set, we'll just grab one. 165 | 'DEFAULT_KEYS_NAME': None, 166 | 167 | # Put the stream in a loop so random termination will be prevented. 168 | 'PREVENT_EXIT': False, 169 | } 170 | ``` 171 | 172 | Status Page 173 | ----------- 174 | 175 | This app provides a status page that shows how the Twitter stream is doing. 176 | Just add something like this to your url conf: 177 | 178 | ```python 179 | url(r'^stream/', include('twitter_stream.urls', namespace="twitter_stream")), 180 | ``` 181 | 182 | For the twitter stream views to work, you'll need to add this to your `INSTALLED_APPS`: 183 | ```python 184 | INSTALLED_APPS = ( 185 | # other apps 186 | 'django.contrib.humanize', 187 | 'bootstrap3', 188 | 'jsonview', 189 | ) 190 | ``` 191 | 192 | Custom Tweet Classes 193 | -------------------- 194 | 195 | It is possible to swap the provided Tweet class for your own, so that you 196 | can add other fields or whatever. 197 | To do this, in the models.py file for your app (which we will call 'myapp' in this example), 198 | add a class that extends `AbstractTweet`: 199 | 200 | ```python 201 | from twitter_stream.models import AbstractTweet 202 | class MyTweet(AbstractTweet): 203 | """ add whatever here... """ 204 | ``` 205 | 206 | Then, add this to your settings file: 207 | ```python 208 | TWITTER_STREAM_TWEET_MODEL = 'myapp.MyTweet' 209 | ``` 210 | 211 | This is facilitated by the [django-swappable-models](https://github.com/wq/django-swappable-models) package. 212 | 213 | Anywhere you were previously hard-importing the Tweet model, 214 | you will need to replace it with something like this: 215 | 216 | ```python 217 | from swapper import load_model 218 | Tweet = load_model('twitter_stream', 'Tweet') 219 | ``` 220 | 221 | This will load either the original Tweet model or the swapped model 222 | as appropriate. You can also load your `MyTweet` model directly, of course. 223 | 224 | For creating foreign keys pointing to Tweet (or the swapped model) 225 | you can use `swapper.get_model_name('twitter_stream', 'Tweet')`. 226 | 227 | If you are using South migrations and need to migrate from the old Tweet model 228 | to your new model, [this tutorial](http://www.caktusgroup.com/blog/2013/08/07/migrating-custom-user-model-django/) 229 | explains the issues. The basic idea is to do it in these steps: 230 | 231 | 1. Create your new model and change your model loading throughout (i.e. use `load_model`), 232 | but don't set the `TWITTER_STREAM_TWEET_MODEL` to actually swap it out yet. 233 | 2. Create a normal schema migration on `myapp` to make the database table for 234 | your new model. Run the migration. 235 | 3. Write a data migration that copies data from the old `twitter_stream_tweets` table to your new table. 236 | Run the data migration. 237 | 4. Trick South into creating a migration for you that you can use to delete the old table with the `SOUTH_MIGRATION_MODULES` setting. 238 | This step may need adaptation to work with `django-twitter-stream` since it was designed for the migration-less 239 | `django.contrib.auth` app. 240 | 5. Finally, swap the models with the `TWITTER_STREAM_TWEET_MODEL` setting. 241 | 6. Generate new schema migrations for any apps with foreign keys that reference the Tweet model. 242 | 7. Move your stub migration that deletes the twitter_stream_tweets table into your app's migration queue. 243 | 8. Run all the remaining migrations. 244 | 245 | Streaming From a File 246 | --------------------- 247 | 248 | There is also a `stream_from_file` command provided which can parse 249 | a file containing already collected tweets. This can be handy for debugging. 250 | This feature is deprecated. The `stream` command now provides this functionality. 251 | 252 | 253 | Questions and Contributing 254 | -------------------------- 255 | 256 | Feel free to post questions and problems on the issue tracker. Pull requests welcome! 257 | -------------------------------------------------------------------------------- /twitter_stream/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from south.utils import datetime_utils as datetime 3 | from south.db import db 4 | from south.v2 import SchemaMigration 5 | from django.db import models 6 | 7 | 8 | class Migration(SchemaMigration): 9 | 10 | def forwards(self, orm): 11 | # Adding model 'ApiKey' 12 | db.create_table(u'twitter_stream_apikey', ( 13 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 14 | ('created_at', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)), 15 | ('name', self.gf('django.db.models.fields.CharField')(max_length=250)), 16 | ('email', self.gf('django.db.models.fields.EmailField')(default=None, max_length=75, blank=True)), 17 | ('api_key', self.gf('django.db.models.fields.CharField')(max_length=250)), 18 | ('api_secret', self.gf('django.db.models.fields.CharField')(max_length=250)), 19 | ('access_token', self.gf('django.db.models.fields.CharField')(max_length=250)), 20 | ('access_token_secret', self.gf('django.db.models.fields.CharField')(max_length=250)), 21 | )) 22 | db.send_create_signal(u'twitter_stream', ['ApiKey']) 23 | 24 | # Adding model 'StreamProcess' 25 | db.create_table(u'twitter_stream_streamprocess', ( 26 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 27 | ('created_at', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)), 28 | ('timeout_seconds', self.gf('django.db.models.fields.PositiveIntegerField')()), 29 | ('expires_at', self.gf('django.db.models.fields.DateTimeField')()), 30 | ('last_heartbeat', self.gf('django.db.models.fields.DateTimeField')()), 31 | ('keys', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['twitter_stream.ApiKey'], null=True)), 32 | ('hostname', self.gf('django.db.models.fields.CharField')(max_length=250)), 33 | ('process_id', self.gf('django.db.models.fields.PositiveIntegerField')()), 34 | ('status', self.gf('django.db.models.fields.CharField')(default='WAITING', max_length=10)), 35 | ('tweet_rate', self.gf('django.db.models.fields.FloatField')(default=0)), 36 | ('error_count', self.gf('django.db.models.fields.PositiveSmallIntegerField')(default=0)), 37 | )) 38 | db.send_create_signal(u'twitter_stream', ['StreamProcess']) 39 | 40 | # Adding model 'Tweet' 41 | db.create_table(u'twitter_stream_tweet', ( 42 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 43 | ('tweet_id', self.gf('django.db.models.fields.BigIntegerField')()), 44 | ('text', self.gf('django.db.models.fields.CharField')(max_length=250)), 45 | ('truncated', self.gf('django.db.models.fields.BooleanField')()), 46 | ('lang', self.gf('django.db.models.fields.CharField')(default=None, max_length=9, null=True, blank=True)), 47 | ('user_id', self.gf('django.db.models.fields.BigIntegerField')()), 48 | ('user_screen_name', self.gf('django.db.models.fields.CharField')(max_length=50)), 49 | ('user_name', self.gf('django.db.models.fields.CharField')(max_length=150)), 50 | ('user_verified', self.gf('django.db.models.fields.BooleanField')()), 51 | ('created_at', self.gf('django.db.models.fields.DateTimeField')()), 52 | ('user_utc_offset', self.gf('django.db.models.fields.IntegerField')(default=None, null=True, blank=True)), 53 | ('user_time_zone', self.gf('django.db.models.fields.CharField')(default=None, max_length=150, null=True, blank=True)), 54 | ('filter_level', self.gf('django.db.models.fields.CharField')(default=None, max_length=6, null=True, blank=True)), 55 | ('latitude', self.gf('django.db.models.fields.FloatField')(default=None, null=True, blank=True)), 56 | ('longitude', self.gf('django.db.models.fields.FloatField')(default=None, null=True, blank=True)), 57 | ('user_geo_enabled', self.gf('django.db.models.fields.BooleanField')(default=False)), 58 | ('user_location', self.gf('django.db.models.fields.CharField')(default=None, max_length=150, null=True, blank=True)), 59 | ('favorite_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), 60 | ('retweet_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), 61 | ('user_followers_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), 62 | ('user_friends_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)), 63 | ('in_reply_to_status_id', self.gf('django.db.models.fields.BigIntegerField')(default=None, null=True, blank=True)), 64 | ('retweeted_status_id', self.gf('django.db.models.fields.BigIntegerField')(default=None, null=True, blank=True)), 65 | ('analyzed_by', self.gf('django.db.models.fields.SmallIntegerField')(default=0)), 66 | )) 67 | db.send_create_signal(u'twitter_stream', ['Tweet']) 68 | 69 | # Adding model 'FilterTerm' 70 | db.create_table(u'twitter_stream_filterterm', ( 71 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), 72 | ('created_at', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)), 73 | ('term', self.gf('django.db.models.fields.CharField')(max_length=250)), 74 | ('enabled', self.gf('django.db.models.fields.BooleanField')(default=True)), 75 | )) 76 | db.send_create_signal(u'twitter_stream', ['FilterTerm']) 77 | 78 | 79 | def backwards(self, orm): 80 | # Deleting model 'ApiKey' 81 | db.delete_table(u'twitter_stream_apikey') 82 | 83 | # Deleting model 'StreamProcess' 84 | db.delete_table(u'twitter_stream_streamprocess') 85 | 86 | # Deleting model 'Tweet' 87 | db.delete_table(u'twitter_stream_tweet') 88 | 89 | # Deleting model 'FilterTerm' 90 | db.delete_table(u'twitter_stream_filterterm') 91 | 92 | 93 | models = { 94 | u'twitter_stream.apikey': { 95 | 'Meta': {'object_name': 'ApiKey'}, 96 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 97 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 98 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 99 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 100 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 101 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}), 102 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 103 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 104 | }, 105 | u'twitter_stream.filterterm': { 106 | 'Meta': {'object_name': 'FilterTerm'}, 107 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 108 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), 109 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 110 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'}) 111 | }, 112 | u'twitter_stream.streamprocess': { 113 | 'Meta': {'object_name': 'StreamProcess'}, 114 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), 115 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}), 116 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}), 117 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 118 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 119 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}), 120 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}), 121 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}), 122 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}), 123 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}), 124 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'}) 125 | }, 126 | u'twitter_stream.tweet': { 127 | 'Meta': {'object_name': 'Tweet'}, 128 | 'analyzed_by': ('django.db.models.fields.SmallIntegerField', [], {'default': '0'}), 129 | 'created_at': ('django.db.models.fields.DateTimeField', [], {}), 130 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 131 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}), 132 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 133 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 134 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}), 135 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 136 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 137 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 138 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 139 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}), 140 | 'truncated': ('django.db.models.fields.BooleanField', [], {}), 141 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}), 142 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 143 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}), 144 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), 145 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}), 146 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 147 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}), 148 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}), 149 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}), 150 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), 151 | 'user_verified': ('django.db.models.fields.BooleanField', [], {}) 152 | } 153 | } 154 | 155 | complete_apps = ['twitter_stream'] -------------------------------------------------------------------------------- /twitter_stream/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models, connection 2 | from django.conf import settings as django_settings 3 | from datetime import datetime, timedelta 4 | from email.utils import parsedate 5 | from django.utils import timezone 6 | import os 7 | import socket 8 | from . import settings 9 | from django.core.exceptions import ObjectDoesNotExist 10 | from swapper import swappable_setting 11 | from . import fields 12 | 13 | current_timezone = timezone.get_current_timezone() 14 | 15 | def parse_datetime(string): 16 | if settings.USE_TZ: 17 | return datetime(*(parsedate(string)[:6]), tzinfo=current_timezone) 18 | else: 19 | return datetime(*(parsedate(string)[:6])) 20 | 21 | class ApiKey(models.Model): 22 | """ 23 | Keys for accessing the Twitter Streaming API. 24 | """ 25 | 26 | created_at = models.DateTimeField(auto_now_add=True) 27 | 28 | user_name = models.CharField(max_length=250) 29 | app_name = models.CharField(max_length=250) 30 | email = models.EmailField(default=None, blank=True) 31 | 32 | api_key = models.CharField(max_length=250) 33 | api_secret = models.CharField(max_length=250) 34 | 35 | access_token = models.CharField(max_length=250) 36 | access_token_secret = models.CharField(max_length=250) 37 | 38 | def __unicode__(self): 39 | return "%s/%s" % (self.user_name, self.app_name) 40 | 41 | @classmethod 42 | def get_keys(cls, keys_name): 43 | if keys_name: 44 | keys = ApiKey.objects.get(user_name=keys_name) 45 | else: 46 | keys = ApiKey.objects.first() 47 | 48 | if not keys: 49 | raise ObjectDoesNotExist("Unknown keys %s" % keys_name) 50 | 51 | return keys 52 | 53 | class StreamProcess(models.Model): 54 | """ 55 | Tracks information about the stream process in the database. 56 | """ 57 | 58 | created_at = models.DateTimeField(auto_now_add=True) 59 | timeout_seconds = models.PositiveIntegerField() 60 | expires_at = models.DateTimeField() 61 | last_heartbeat = models.DateTimeField() 62 | 63 | keys = models.ForeignKey(ApiKey, null=True) 64 | hostname = models.CharField(max_length=250) 65 | process_id = models.PositiveIntegerField() 66 | memory_usage = models.CharField(max_length=30, default=None, null=True, blank=True) 67 | 68 | STREAM_STATUS_RUNNING = "RUNNING" 69 | STREAM_STATUS_WAITING = "WAITING" # No terms currently being tracked 70 | STREAM_STATUS_STOPPED = "STOPPED" 71 | status = models.CharField(max_length=10, 72 | choices=( 73 | (STREAM_STATUS_RUNNING, "Running"), 74 | (STREAM_STATUS_WAITING, "Waiting"), 75 | (STREAM_STATUS_STOPPED, "Stopped") 76 | ), 77 | default=STREAM_STATUS_WAITING) 78 | 79 | tweet_rate = models.FloatField(default=0) 80 | error_count = models.PositiveSmallIntegerField(default=0) 81 | 82 | @property 83 | def lifetime(self): 84 | """Get the age of the streaming process""" 85 | return self.last_heartbeat - self.created_at 86 | 87 | def get_memory_usage(self): 88 | try: 89 | import resource 90 | except ImportError: 91 | return "Unknown" 92 | 93 | kb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 94 | return "%.1f MB" % (0.0009765625 * kb) 95 | 96 | def heartbeat(self, save=True): 97 | self.last_heartbeat = timezone.now() 98 | self.expires_at = self.last_heartbeat + timedelta(seconds=self.timeout_seconds) 99 | 100 | if settings.MONITOR_PERFORMANCE: 101 | self.memory_usage = self.get_memory_usage() 102 | 103 | if save: 104 | self.save() 105 | 106 | def __unicode__(self): 107 | return "%s:%d %s (%s)" % (self.hostname, self.process_id, self.status, self.lifetime) 108 | 109 | @classmethod 110 | def create(cls, timeout_seconds): 111 | now = timezone.now() 112 | expires_at = now + timedelta(seconds=timeout_seconds) 113 | return StreamProcess( 114 | process_id=os.getpid(), 115 | hostname=socket.gethostname(), 116 | last_heartbeat=now, 117 | expires_at=expires_at, 118 | timeout_seconds=timeout_seconds 119 | ) 120 | 121 | @classmethod 122 | def get_current_stream_processes(cls, minutes_ago=10): 123 | 124 | # some maintenance 125 | cls.expire_timed_out() 126 | 127 | minutes_ago_dt = timezone.now() - timedelta(minutes=minutes_ago) 128 | return StreamProcess.objects \ 129 | .filter(last_heartbeat__gt=minutes_ago_dt) \ 130 | .order_by('-last_heartbeat') 131 | 132 | 133 | @classmethod 134 | def expire_timed_out(cls): 135 | StreamProcess.objects \ 136 | .filter(expires_at__lt=timezone.now()) \ 137 | .update(status=StreamProcess.STREAM_STATUS_STOPPED) 138 | 139 | 140 | class AbstractTweet(models.Model): 141 | """ 142 | Selected fields from a Twitter Status object. 143 | Incorporates several fields from the associated User object. 144 | 145 | For details see https://dev.twitter.com/docs/platform-objects/tweets 146 | 147 | Note that we are not using tweet_id as a primary key -- this application 148 | does not enforce integrity w/ regard to individual tweets. 149 | We just add them to the database as they come in, even if we've seen 150 | them before. 151 | """ 152 | 153 | class Meta: 154 | abstract = True 155 | 156 | id = fields.PositiveBigAutoField(primary_key=True) 157 | 158 | # Basic tweet info 159 | tweet_id = models.BigIntegerField() 160 | text = models.CharField(max_length=250) 161 | truncated = models.BooleanField(default=False) 162 | lang = models.CharField(max_length=9, null=True, blank=True, default=None) 163 | 164 | # Basic user info 165 | user_id = models.BigIntegerField() 166 | user_screen_name = models.CharField(max_length=50) 167 | user_name = models.CharField(max_length=150) 168 | user_verified = models.BooleanField(default=False) 169 | 170 | # Timing parameters 171 | created_at = models.DateTimeField(db_index=True) # should be UTC 172 | user_utc_offset = models.IntegerField(null=True, blank=True, default=None) 173 | user_time_zone = models.CharField(max_length=150, null=True, blank=True, default=None) 174 | 175 | # none, low, or medium 176 | filter_level = models.CharField(max_length=6, null=True, blank=True, default=None) 177 | 178 | # Geo parameters 179 | latitude = models.FloatField(null=True, blank=True, default=None) 180 | longitude = models.FloatField(null=True, blank=True, default=None) 181 | user_geo_enabled = models.BooleanField(default=False) 182 | user_location = models.CharField(max_length=150, null=True, blank=True, default=None) 183 | 184 | # Engagement - not likely to be very useful for streamed tweets but whatever 185 | favorite_count = models.PositiveIntegerField(null=True, blank=True) 186 | retweet_count = models.PositiveIntegerField(null=True, blank=True) 187 | user_followers_count = models.PositiveIntegerField(null=True, blank=True) 188 | user_friends_count = models.PositiveIntegerField(null=True, blank=True) 189 | 190 | # Relation to other tweets 191 | in_reply_to_status_id = models.BigIntegerField(null=True, blank=True, default=None) 192 | retweeted_status_id = models.BigIntegerField(null=True, blank=True, default=None) 193 | 194 | @property 195 | def is_retweet(self): 196 | return self.retweeted_status_id is not None 197 | 198 | @classmethod 199 | def create_from_json(cls, raw): 200 | """ 201 | Given a *parsed* json status object, construct a new Tweet model. 202 | """ 203 | 204 | user = raw['user'] 205 | retweeted_status = raw.get('retweeted_status') 206 | if retweeted_status is None: 207 | retweeted_status = {'id': None} 208 | 209 | # The "coordinates" entry looks like this: 210 | # 211 | # "coordinates": 212 | # { 213 | # "coordinates": 214 | # [ 215 | # -75.14310264, 216 | # 40.05701649 217 | # ], 218 | # "type":"Point" 219 | # } 220 | 221 | coordinates = (None, None) 222 | if raw['coordinates']: 223 | coordinates = raw['coordinates']['coordinates'] 224 | 225 | # Replace negative counts with None to indicate missing data 226 | counts = { 227 | 'favorite_count': raw.get('favorite_count'), 228 | 'retweet_count': raw.get('retweet_count'), 229 | 'user_followers_count': user.get('followers_count'), 230 | 'user_friends_count': user.get('friends_count'), 231 | } 232 | for key in counts: 233 | if counts[key] is not None and counts[key] < 0: 234 | counts[key] = None 235 | 236 | return cls( 237 | # Basic tweet info 238 | tweet_id=raw['id'], 239 | text=raw['text'], 240 | truncated=raw['truncated'], 241 | lang=raw.get('lang'), 242 | 243 | # Basic user info 244 | user_id=user['id'], 245 | user_screen_name=user['screen_name'], 246 | user_name=user['name'], 247 | user_verified=user['verified'], 248 | 249 | # Timing parameters 250 | created_at=parse_datetime(raw['created_at']), 251 | user_utc_offset=user.get('utc_offset'), 252 | user_time_zone=user.get('time_zone'), 253 | 254 | # none, low, or medium 255 | filter_level=raw.get('filter_level'), 256 | 257 | # Geo parameters 258 | latitude=coordinates[1], 259 | longitude=coordinates[0], 260 | user_geo_enabled=user.get('geo_enabled'), 261 | user_location=user.get('location'), 262 | 263 | # Engagement - not likely to be very useful for streamed tweets but whatever 264 | favorite_count=counts.get('favorite_count'), 265 | retweet_count=counts.get('retweet_count'), 266 | user_followers_count=counts.get('user_followers_count'), 267 | user_friends_count=counts.get('user_friends_count'), 268 | 269 | # Relation to other tweets 270 | in_reply_to_status_id=raw.get('in_reply_to_status_id'), 271 | retweeted_status_id=retweeted_status['id'] 272 | ) 273 | 274 | @classmethod 275 | def get_created_in_range(cls, start, end): 276 | """ 277 | Returns all the tweets between start and end. 278 | """ 279 | return cls.objects.filter(created_at__gte=start, created_at__lt=end) 280 | 281 | @classmethod 282 | def get_earliest_created_at(cls): 283 | """ 284 | Returns the earliest created_at time, or None 285 | """ 286 | result = cls.objects.aggregate(earliest_created_at=models.Min('created_at')) 287 | return result['earliest_created_at'] 288 | 289 | @classmethod 290 | def get_latest_created_at(cls): 291 | """ 292 | Returns the latest created_at time, or None 293 | """ 294 | result = cls.objects.aggregate(latest_created_at=models.Max('created_at')) 295 | return result['latest_created_at'] 296 | 297 | @classmethod 298 | def count_approx(cls): 299 | """ 300 | Get the approximate number of tweets. 301 | Executes quickly, even on large InnoDB tables. 302 | """ 303 | if django_settings.DATABASES['default']['ENGINE'].endswith('mysql'): 304 | query = "SHOW TABLE STATUS WHERE Name = %s" 305 | cursor = connection.cursor() 306 | cursor.execute(query, [cls._meta.db_table]) 307 | 308 | desc = cursor.description 309 | row = cursor.fetchone() 310 | row = dict(zip([col[0].lower() for col in desc], row)) 311 | 312 | return int(row['rows']) 313 | else: 314 | return cls.objects.count() 315 | 316 | class Tweet(AbstractTweet): 317 | """ 318 | Load this class with swapper.load_model("twitter_stream", "Tweet") 319 | in case it has been swapped out. 320 | 321 | To swap it out for your own class (extending AbstractTweet), 322 | just add this to your settings: 323 | TWITTER_STREAM_TWEET_MODEL = "myapp.MyTweetModel" 324 | """ 325 | 326 | class Meta(AbstractTweet.Meta): 327 | swappable = swappable_setting('twitter_stream', 'Tweet') 328 | 329 | 330 | class FilterTerm(models.Model): 331 | created_at = models.DateTimeField(auto_now_add=True) 332 | term = models.CharField(max_length=250) 333 | enabled = models.BooleanField(default=True) 334 | 335 | def __unicode__(self): 336 | return self.term 337 | -------------------------------------------------------------------------------- /twitter_stream/tests/test_tweet.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime 3 | 4 | from django.test import TestCase 5 | from django.utils import timezone 6 | from twitter_stream import settings 7 | from twitter_stream.models import Tweet 8 | 9 | 10 | class TweetCreateFromJsonTest(TestCase): 11 | 12 | def validate_json(self, tweet_json, correct_data): 13 | """ 14 | create_from_json() should return a Tweet object with 15 | the fields set to their proper values. 16 | 17 | Checks that all the fields match up. 18 | The tweet_json is raw JSON text from the Twitter api and documentation, 19 | The correct_data is corresponding manually-extracted data. 20 | """ 21 | 22 | raw_tweet = json.loads(tweet_json) 23 | tweet = Tweet.create_from_json(raw_tweet) 24 | self.assertIsInstance(tweet, Tweet) 25 | 26 | # check for model validity 27 | tweet.clean_fields() 28 | 29 | self.assertEqual(tweet.tweet_id, correct_data['tweet_id'], 'tweet_id matches') 30 | self.assertEqual(tweet.text, correct_data['text'], 'text matches') 31 | self.assertEqual(tweet.truncated, correct_data['truncated'], 'truncated matches') 32 | self.assertEqual(tweet.lang, correct_data['lang'], 'lang matches') 33 | 34 | # Basic user info 35 | self.assertEqual(tweet.user_id, correct_data['user_id'], 'user_id matches') 36 | self.assertEqual(tweet.user_screen_name, correct_data['user_screen_name'], 'user_screen_name matches') 37 | self.assertEqual(tweet.user_name, correct_data['user_name'], 'user_name matches') 38 | self.assertEqual(tweet.user_verified, correct_data['user_verified'], 'user_verified matches') 39 | 40 | # Timing parameters 41 | # May need to convert the date depending on timezone settings 42 | if settings.USE_TZ: 43 | correct_data['created_at'] = timezone.make_aware(correct_data['created_at'], timezone.get_current_timezone()) 44 | self.assertEqual(tweet.created_at, correct_data['created_at'], 'created_at matches') 45 | self.assertEqual(tweet.user_utc_offset, correct_data['user_utc_offset'], 'user_utc_offset matches') 46 | self.assertEqual(tweet.user_time_zone, correct_data['user_time_zone'], 'user_time_zone matches') 47 | 48 | # none, low, or medium 49 | self.assertEqual(tweet.filter_level, correct_data['filter_level'], 'filter_level matches') 50 | 51 | # Geo parameters 52 | self.assertEqual(tweet.latitude, correct_data['latitude'], 'latitude matches') 53 | self.assertEqual(tweet.longitude, correct_data['longitude'], 'longitude matches') 54 | self.assertEqual(tweet.user_geo_enabled, correct_data['user_geo_enabled'], 'user_geo_enabled matches') 55 | self.assertEqual(tweet.user_location, correct_data['user_location'], 'user_location matches') 56 | 57 | # Engagement - not likely to be very useful for streamed tweets but whatever 58 | self.assertEqual(tweet.favorite_count, correct_data['favorite_count'], 'favorite_count matches') 59 | self.assertEqual(tweet.retweet_count, correct_data['retweet_count'], 'retweet_count matches') 60 | self.assertEqual(tweet.user_followers_count, correct_data['user_followers_count'], 'user_followers_count matches') 61 | self.assertEqual(tweet.user_friends_count, correct_data['user_friends_count'], 'user_friends_count matches') 62 | 63 | # Relation to other tweets 64 | self.assertEqual(tweet.in_reply_to_status_id, correct_data['in_reply_to_status_id'], 65 | 'in_reply_to_status_id matches') 66 | self.assertEqual(tweet.retweeted_status_id, correct_data['retweeted_status_id'], 'retweeted_status_id matches') 67 | 68 | @classmethod 69 | def add_test(cls, name, json, correct_data): 70 | setattr(cls, "test_%s" % name, lambda self: self.validate_json(json, correct_data)) 71 | 72 | # This example has lots of stuff that is null 73 | # Example tweet from https://dev.twitter.com/docs/api/1.1/get/statuses/show/%3Aid 74 | TweetCreateFromJsonTest.add_test('null_fields', r"""{ 75 | "coordinates": null, 76 | "favorited": false, 77 | "truncated": false, 78 | "created_at": "Wed Jun 06 20:07:10 +0000 2012", 79 | "id_str": "210462857140252672", 80 | "entities": { 81 | "urls": [ 82 | { 83 | "expanded_url": "https://dev.twitter.com/terms/display-guidelines", 84 | "url": "https://t.co/Ed4omjYs", 85 | "indices": [ 86 | 76, 87 | 97 88 | ], 89 | "display_url": "dev.twitter.com/terms/display-\u2026" 90 | } 91 | ], 92 | "hashtags": [ 93 | { 94 | "text": "Twitterbird", 95 | "indices": [ 96 | 19, 97 | 31 98 | ] 99 | } 100 | ], 101 | "user_mentions": [ 102 | 103 | ] 104 | }, 105 | "in_reply_to_user_id_str": null, 106 | "contributors": [ 107 | 14927800 108 | ], 109 | "text": "Along with our new #Twitterbird, we've also updated our Display Guidelines: https://t.co/Ed4omjYs ^JC", 110 | "retweet_count": 66, 111 | "in_reply_to_status_id_str": null, 112 | "id": 210462857140252672, 113 | "geo": null, 114 | "retweeted": true, 115 | "possibly_sensitive": false, 116 | "in_reply_to_user_id": null, 117 | "place": null, 118 | "user": { 119 | "profile_sidebar_fill_color": "DDEEF6", 120 | "profile_sidebar_border_color": "C0DEED", 121 | "profile_background_tile": false, 122 | "name": "Twitter API", 123 | "profile_image_url": "http://a0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png", 124 | "created_at": "Wed May 23 06:01:13 +0000 2007", 125 | "location": "San Francisco, CA", 126 | "follow_request_sent": false, 127 | "profile_link_color": "0084B4", 128 | "is_translator": false, 129 | "id_str": "6253282", 130 | "entities": { 131 | "url": { 132 | "urls": [ 133 | { 134 | "expanded_url": null, 135 | "url": "http://dev.twitter.com", 136 | "indices": [ 137 | 0, 138 | 22 139 | ] 140 | } 141 | ] 142 | }, 143 | "description": { 144 | "urls": [ 145 | 146 | ] 147 | } 148 | }, 149 | "default_profile": true, 150 | "contributors_enabled": true, 151 | "favourites_count": 24, 152 | "url": "http://dev.twitter.com", 153 | "profile_image_url_https": "https://si0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png", 154 | "utc_offset": -28800, 155 | "id": 6253282, 156 | "profile_use_background_image": true, 157 | "listed_count": 10774, 158 | "profile_text_color": "333333", 159 | "lang": "en", 160 | "followers_count": 1212963, 161 | "protected": false, 162 | "notifications": null, 163 | "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme1/bg.png", 164 | "profile_background_color": "C0DEED", 165 | "verified": true, 166 | "geo_enabled": true, 167 | "time_zone": "Pacific Time (US & Canada)", 168 | "description": "The Real Twitter API. I tweet about API changes, service issues and happily answer questions about Twitter and our API. Don't get an answer? It's on my website.", 169 | "default_profile_image": false, 170 | "profile_background_image_url": "http://a0.twimg.com/images/themes/theme1/bg.png", 171 | "statuses_count": 3333, 172 | "friends_count": 31, 173 | "following": true, 174 | "show_all_inline_media": false, 175 | "screen_name": "twitterapi" 176 | }, 177 | "in_reply_to_screen_name": null, 178 | "source": "web", 179 | "in_reply_to_status_id": null 180 | }""", { 181 | # Basic tweet info 182 | 'tweet_id': 210462857140252672, 183 | 'text': "Along with our new #Twitterbird, we've also updated " 184 | "our Display Guidelines: https://t.co/Ed4omjYs ^JC", 185 | 'truncated': False, 186 | 'lang': None, 187 | 188 | # Basic user info 189 | 'user_id': 6253282, 190 | 'user_screen_name': 'twitterapi', 191 | 'user_name': 'Twitter API', 192 | 'user_verified': True, 193 | 194 | # Timing parameters 195 | 'created_at': datetime(2012, 6, 6, hour=20, minute=7, second=10, microsecond=0), 196 | 'user_utc_offset': -28800, 197 | 'user_time_zone': "Pacific Time (US & Canada)", 198 | 199 | # none, low, or medium 200 | 'filter_level': None, 201 | 202 | # Geo parameters 203 | 'latitude': None, 204 | 'longitude': None, 205 | 'user_geo_enabled': True, 206 | 'user_location': "San Francisco, CA", 207 | 208 | # Engagement - not likely to be very useful for streamed tweets but whatever 209 | 'favorite_count': None, 210 | 'retweet_count': 66, 211 | 'user_followers_count': 1212963, 212 | 'user_friends_count': 31, 213 | 214 | 'in_reply_to_status_id': None, 215 | 'retweeted_status_id': None 216 | }) 217 | 218 | # A captured tweet (anonymized) 219 | # This example has location data 220 | TweetCreateFromJsonTest.add_test('location_data', r"""{ 221 | "contributors": null, 222 | "coordinates": { 223 | "coordinates": [ 224 | -118.722583202, 225 | 34.983424651 226 | ], 227 | "type": "Point" 228 | }, 229 | "created_at": "Tue Feb 11 18:43:27 +0000 2014", 230 | "entities": { 231 | "hashtags": [], 232 | "symbols": [], 233 | "urls": [], 234 | "user_mentions": [] 235 | }, 236 | "favorite_count": 0, 237 | "favorited": false, 238 | "filter_level": "medium", 239 | "geo": { 240 | "coordinates": [ 241 | 34.983424651, 242 | -118.722583202 243 | ], 244 | "type": "Point" 245 | }, 246 | "id": 458121938375806432, 247 | "id_str": "458121938375806432", 248 | "in_reply_to_screen_name": null, 249 | "in_reply_to_status_id": null, 250 | "in_reply_to_status_id_str": null, 251 | "in_reply_to_user_id": null, 252 | "in_reply_to_user_id_str": null, 253 | "lang": "en", 254 | "place": { 255 | "attributes": {}, 256 | "bounding_box": { 257 | "coordinates": [ 258 | [ 259 | [ 260 | -118.0, 261 | 34.0 262 | ], 263 | [ 264 | -118.0, 265 | 34.0 266 | ], 267 | [ 268 | -118.0, 269 | 34.0 270 | ], 271 | [ 272 | -118.0, 273 | 34.0 274 | ] 275 | ] 276 | ], 277 | "type": "Polygon" 278 | }, 279 | "contained_within": [], 280 | "country": "United States", 281 | "country_code": "US", 282 | "full_name": "Place, CA", 283 | "id": "540563418", 284 | "name": "Place", 285 | "place_type": "city", 286 | "url": "https://api.twitter.com/1.1/geo/id/540563418.json" 287 | }, 288 | "retweet_count": 0, 289 | "retweeted": false, 290 | "source": "Twitter for iPhone", 291 | "text": "Blah blah blah blah blah blah blah blah!", 292 | "truncated": false, 293 | "user": { 294 | "contributors_enabled": false, 295 | "created_at": "Thu Jul 26 14:02:08 +0000 2012", 296 | "default_profile": true, 297 | "default_profile_image": false, 298 | "description": null, 299 | "favourites_count": 2, 300 | "follow_request_sent": null, 301 | "followers_count": 4, 302 | "following": null, 303 | "friends_count": 13, 304 | "geo_enabled": true, 305 | "id": 687069798, 306 | "id_str": "687069798", 307 | "is_translation_enabled": false, 308 | "is_translator": false, 309 | "lang": "en", 310 | "listed_count": 0, 311 | "location": "", 312 | "name": "some_user_name", 313 | "notifications": null, 314 | "profile_background_color": "C0DEED", 315 | "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", 316 | "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", 317 | "profile_background_tile": false, 318 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 319 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 320 | "profile_link_color": "0084B4", 321 | "profile_sidebar_border_color": "C0DEED", 322 | "profile_sidebar_fill_color": "DDEEF6", 323 | "profile_text_color": "333333", 324 | "profile_use_background_image": true, 325 | "protected": false, 326 | "screen_name": "some_screen_name", 327 | "statuses_count": 7, 328 | "time_zone": "Pacific Time (US & Canada)", 329 | "url": null, 330 | "utc_offset": null, 331 | "verified": false 332 | } 333 | }""", { 334 | # Basic tweet info 335 | 'tweet_id': 458121938375806432, 336 | 'text': "Blah blah blah blah blah blah blah blah!", 337 | 'truncated': False, 338 | 'lang': "en", 339 | 340 | # Basic user info 341 | 'user_id': 687069798, 342 | 'user_screen_name': 'some_screen_name', 343 | 'user_name': 'some_user_name', 344 | 'user_verified': False, 345 | 346 | # Timing parameters 347 | 'created_at': datetime(2014, 2, 11, hour=18, minute=43, second=27, microsecond=0), 348 | 'user_utc_offset': None, 349 | 'user_time_zone': "Pacific Time (US & Canada)", 350 | 351 | # none, low, or medium 352 | 'filter_level': 'medium', 353 | 354 | # Geo parameters 355 | 'latitude': 34.983424651, 356 | 'longitude': -118.722583202, 357 | 'user_geo_enabled': True, 358 | 'user_location': "", 359 | 360 | # Engagement - not likely to be very useful for streamed tweets but whatever 361 | 'favorite_count': 0, 362 | 'retweet_count': 0, 363 | 'user_followers_count': 4, 364 | 'user_friends_count': 13, 365 | 366 | 'in_reply_to_status_id': None, 367 | 'retweeted_status_id': None 368 | }) 369 | 370 | # A captured tweet (anonymized) 371 | # This example is a retweet 372 | TweetCreateFromJsonTest.add_test('retweet', r"""{ 373 | "contributors": null, 374 | "coordinates": null, 375 | "created_at": "Tue Feb 11 18:43:27 +0000 2014", 376 | "entities": { 377 | "hashtags": [], 378 | "symbols": [], 379 | "urls": [], 380 | "user_mentions": [ 381 | { 382 | "id": 600695731, 383 | "id_str": "600695731", 384 | "indices": [ 385 | 3, 386 | 12 387 | ], 388 | "name": "somebody", 389 | "screen_name": "somebody124" 390 | } 391 | ] 392 | }, 393 | "favorite_count": 0, 394 | "favorited": false, 395 | "filter_level": "medium", 396 | "geo": null, 397 | "id": 664439253345490274, 398 | "id_str": "664439253345490274", 399 | "in_reply_to_screen_name": null, 400 | "in_reply_to_status_id": null, 401 | "in_reply_to_status_id_str": null, 402 | "in_reply_to_user_id": null, 403 | "in_reply_to_user_id_str": null, 404 | "lang": "en", 405 | "place": null, 406 | "retweet_count": 0, 407 | "retweeted": false, 408 | "retweeted_status": { 409 | "contributors": null, 410 | "coordinates": null, 411 | "created_at": "Tue Feb 11 18:28:05 +0000 2014", 412 | "entities": { 413 | "hashtags": [], 414 | "symbols": [], 415 | "urls": [], 416 | "user_mentions": [] 417 | }, 418 | "favorite_count": 12, 419 | "favorited": false, 420 | "geo": null, 421 | "id": 552293876248595761, 422 | "id_str": "552293876248595761", 423 | "in_reply_to_screen_name": null, 424 | "in_reply_to_status_id": null, 425 | "in_reply_to_status_id_str": null, 426 | "in_reply_to_user_id": null, 427 | "in_reply_to_user_id_str": null, 428 | "lang": "en", 429 | "place": null, 430 | "retweet_count": 10, 431 | "retweeted": false, 432 | "source": "Twitter for iPhone", 433 | "text": "I am an amazing tweet blah blah blah blah blah blah blah", 434 | "truncated": false, 435 | "user": { 436 | "contributors_enabled": false, 437 | "created_at": "Thu Jan 26 21:45:50 +0000 2012", 438 | "default_profile": false, 439 | "default_profile_image": false, 440 | "description": "my user description goes here", 441 | "favourites_count": 12772, 442 | "follow_request_sent": null, 443 | "followers_count": 5201, 444 | "following": null, 445 | "friends_count": 836, 446 | "geo_enabled": false, 447 | "id": 557753453, 448 | "id_str": "557753453", 449 | "is_translation_enabled": false, 450 | "is_translator": false, 451 | "lang": "en", 452 | "listed_count": 10, 453 | "location": "some place", 454 | "name": "my name", 455 | "notifications": null, 456 | "profile_background_color": "090A0A", 457 | "profile_background_image_url": "http://pbs.twimg.com/profile_background_images/fake_fake_fake.jpeg", 458 | "profile_background_image_url_https": "https://pbs.twimg.com/profile_background_images/fake_fake_fake.jpeg", 459 | "profile_background_tile": true, 460 | "profile_banner_url": "https://pbs.twimg.com/profile_banners/fake_fake_fake", 461 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 462 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 463 | "profile_link_color": "2CC7C7", 464 | "profile_sidebar_border_color": "000000", 465 | "profile_sidebar_fill_color": "E6E4E4", 466 | "profile_text_color": "404040", 467 | "profile_use_background_image": false, 468 | "protected": false, 469 | "screen_name": "my_screen_name", 470 | "statuses_count": 15670, 471 | "time_zone": "Central Time (US & Canada)", 472 | "url": null, 473 | "utc_offset": -21600, 474 | "verified": false 475 | } 476 | }, 477 | "source": "Twitter for iPhone", 478 | "text": "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah", 479 | "truncated": false, 480 | "user": { 481 | "contributors_enabled": false, 482 | "created_at": "Fri Nov 13 23:51:33 +0000 2009", 483 | "default_profile": false, 484 | "default_profile_image": false, 485 | "description": "An inspiring quote, #belieber", 486 | "favourites_count": 6009, 487 | "follow_request_sent": null, 488 | "followers_count": 442, 489 | "following": null, 490 | "friends_count": 380, 491 | "geo_enabled": true, 492 | "id": 165087803, 493 | "id_str": "165087803", 494 | "is_translation_enabled": false, 495 | "is_translator": false, 496 | "lang": "en", 497 | "listed_count": 2, 498 | "location": "", 499 | "name": "My Real Name", 500 | "notifications": null, 501 | "profile_background_color": "642D8B", 502 | "profile_background_image_url": "http://abs.twimg.com/images/themes/theme10/bg.gif", 503 | "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme10/bg.gif", 504 | "profile_background_tile": true, 505 | "profile_banner_url": "https://pbs.twimg.com/profile_banners/fake_fake_fake", 506 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 507 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 508 | "profile_link_color": "FF0000", 509 | "profile_sidebar_border_color": "65B0DA", 510 | "profile_sidebar_fill_color": "7AC3EE", 511 | "profile_text_color": "3D1957", 512 | "profile_use_background_image": true, 513 | "protected": false, 514 | "screen_name": "screen_name", 515 | "statuses_count": 8006, 516 | "time_zone": "Central Time (US & Canada)", 517 | "url": null, 518 | "utc_offset": -21600, 519 | "verified": false 520 | } 521 | }""", { 522 | # Basic tweet info 523 | 'tweet_id': 664439253345490274, 524 | 'text': "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah", 525 | 'truncated': False, 526 | 'lang': "en", 527 | 528 | # Basic user info 529 | 'user_id': 165087803, 530 | 'user_screen_name': "screen_name", 531 | 'user_name': 'My Real Name', 532 | 'user_verified': False, 533 | 534 | # Timing parameters 535 | 'created_at': datetime(2014, 2, 11, hour=18, minute=43, second=27, microsecond=0), 536 | 'user_utc_offset': -21600, 537 | 'user_time_zone': "Central Time (US & Canada)", 538 | 539 | # none, low, or medium 540 | 'filter_level': 'medium', 541 | 542 | # Geo parameters 543 | 'latitude': None, 544 | 'longitude': None, 545 | 'user_geo_enabled': True, 546 | 'user_location': "", 547 | 548 | # Engagement - not likely to be very useful for streamed tweets but whatever 549 | 'favorite_count': 0, 550 | 'retweet_count': 0, 551 | 'user_followers_count': 442, 552 | 'user_friends_count': 380, 553 | 554 | 'in_reply_to_status_id': None, 555 | 'retweeted_status_id': 552293876248595761 556 | }) 557 | 558 | 559 | # A captured tweet (anonymized) 560 | # This example has negative counts 561 | # a la https://dev.twitter.com/docs/streaming-apis/processing#Missing_counts 562 | TweetCreateFromJsonTest.add_test('negative_counts', r"""{ 563 | "contributors": null, 564 | "coordinates": null, 565 | "created_at": "Tue Feb 11 18:43:27 +0000 2014", 566 | "entities": { 567 | "hashtags": [], 568 | "symbols": [], 569 | "urls": [], 570 | "user_mentions": [ 571 | { 572 | "id": 600695731, 573 | "id_str": "600695731", 574 | "indices": [ 575 | 3, 576 | 12 577 | ], 578 | "name": "somebody", 579 | "screen_name": "somebody124" 580 | } 581 | ] 582 | }, 583 | "favorite_count": -1, 584 | "favorited": false, 585 | "filter_level": "medium", 586 | "geo": null, 587 | "id": 664439253345490274, 588 | "id_str": "664439253345490274", 589 | "in_reply_to_screen_name": null, 590 | "in_reply_to_status_id": null, 591 | "in_reply_to_status_id_str": null, 592 | "in_reply_to_user_id": null, 593 | "in_reply_to_user_id_str": null, 594 | "lang": "en", 595 | "place": null, 596 | "retweet_count": -1, 597 | "retweeted": false, 598 | "retweeted_status": null, 599 | "source": "Twitter for iPhone", 600 | "text": "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah", 601 | "truncated": false, 602 | "user": { 603 | "contributors_enabled": false, 604 | "created_at": "Fri Nov 13 23:51:33 +0000 2009", 605 | "default_profile": false, 606 | "default_profile_image": false, 607 | "description": "An inspiring quote, #belieber", 608 | "favourites_count": -1, 609 | "follow_request_sent": null, 610 | "followers_count": -1, 611 | "following": null, 612 | "friends_count": -1, 613 | "geo_enabled": true, 614 | "id": 165087803, 615 | "id_str": "165087803", 616 | "is_translation_enabled": false, 617 | "is_translator": false, 618 | "lang": "en", 619 | "listed_count": -1, 620 | "location": "", 621 | "name": "My Real Name", 622 | "notifications": null, 623 | "profile_background_color": "642D8B", 624 | "profile_background_image_url": "http://abs.twimg.com/images/themes/theme10/bg.gif", 625 | "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme10/bg.gif", 626 | "profile_background_tile": true, 627 | "profile_banner_url": "https://pbs.twimg.com/profile_banners/fake_fake_fake", 628 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 629 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg", 630 | "profile_link_color": "FF0000", 631 | "profile_sidebar_border_color": "65B0DA", 632 | "profile_sidebar_fill_color": "7AC3EE", 633 | "profile_text_color": "3D1957", 634 | "profile_use_background_image": true, 635 | "protected": false, 636 | "screen_name": "screen_name", 637 | "statuses_count": -1, 638 | "time_zone": "Central Time (US & Canada)", 639 | "url": null, 640 | "utc_offset": -21600, 641 | "verified": false 642 | } 643 | }""", { 644 | # Basic tweet info 645 | 'tweet_id': 664439253345490274, 646 | 'text': "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah", 647 | 'truncated': False, 648 | 'lang': "en", 649 | 650 | # Basic user info 651 | 'user_id': 165087803, 652 | 'user_screen_name': "screen_name", 653 | 'user_name': 'My Real Name', 654 | 'user_verified': False, 655 | 656 | # Timing parameters 657 | 'created_at': datetime(2014, 2, 11, hour=18, minute=43, second=27, microsecond=0), 658 | 'user_utc_offset': -21600, 659 | 'user_time_zone': "Central Time (US & Canada)", 660 | 661 | # none, low, or medium 662 | 'filter_level': 'medium', 663 | 664 | # Geo parameters 665 | 'latitude': None, 666 | 'longitude': None, 667 | 'user_geo_enabled': True, 668 | 'user_location': "", 669 | 670 | # Engagement - not likely to be very useful for streamed tweets but whatever 671 | 'favorite_count': None, 672 | 'retweet_count': None, 673 | 'user_followers_count': None, 674 | 'user_friends_count': None, 675 | 676 | 'in_reply_to_status_id': None, 677 | 'retweeted_status_id': None 678 | }) 679 | --------------------------------------------------------------------------------