├── twitter_stream
├── __init__.py
├── management
│ ├── __init__.py
│ └── commands
│ │ ├── __init__.py
│ │ ├── stream_from_file.py
│ │ └── stream.py
├── migrations
│ ├── __init__.py
│ ├── 0004_auto__del_field_tweet_analyzed_by.py
│ ├── 0006_auto__chg_field_tweet_id.py
│ ├── 0002_auto__add_index_tweet_analyzed_by__add_index_tweet_created_at.py
│ ├── 0003_auto__add_field_streamprocess_memory_usage.py
│ ├── 0005_auto__del_field_apikey_name__add_field_apikey_user_name__add_field_api.py
│ └── 0001_initial.py
├── tests
│ ├── __init__.py
│ ├── test_stream_process.py
│ └── test_tweet.py
├── utils
│ ├── __init__.py
│ ├── file_stream.py
│ └── streaming.py
├── admin.py
├── urls.py
├── static
│ └── twitter_stream
│ │ ├── status.css
│ │ └── status.js
├── settings.py
├── templates
│ └── twitter_stream
│ │ ├── status.html
│ │ └── status_display.html
├── fields.py
├── views.py
└── models.py
├── manage.py
├── .gitignore
├── test_settings.py
├── LICENSE
├── setup.py
└── README.md
/twitter_stream/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/twitter_stream/management/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/twitter_stream/migrations/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/twitter_stream/management/commands/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/twitter_stream/tests/__init__.py:
--------------------------------------------------------------------------------
1 | from .test_tweet import *
2 | from .test_stream_process import *
3 |
--------------------------------------------------------------------------------
/twitter_stream/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .file_stream import FakeTwitterStream, FakeTermChecker
2 | from .streaming import FeelsTermChecker, QueueStreamListener
3 |
--------------------------------------------------------------------------------
/twitter_stream/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 |
3 | from . import models
4 |
5 | admin.site.register(models.FilterTerm)
6 | admin.site.register(models.ApiKey)
7 |
--------------------------------------------------------------------------------
/twitter_stream/urls.py:
--------------------------------------------------------------------------------
1 | from django.conf.urls import patterns, url
2 |
3 | urlpatterns = patterns('twitter_stream.views',
4 | url(r'^$', 'status', name='status'),
5 | url(r'^update/', 'json_status', name='update'),
6 | )
7 |
--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == "__main__":
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "test_settings")
7 |
8 | from django.core.management import execute_from_command_line
9 |
10 | execute_from_command_line(sys.argv)
11 |
--------------------------------------------------------------------------------
/twitter_stream/tests/test_stream_process.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 | from twitter_stream import settings
3 | from twitter_stream.models import StreamProcess
4 |
5 | class StreamProcessTest(TestCase):
6 |
7 | def test_get_memory_usage(self):
8 | import os
9 |
10 | process = StreamProcess()
11 | usage = process.get_memory_usage()
12 | if os.name == 'nt':
13 | self.assertEqual(usage, "Unknown")
14 | else:
15 | self.assertRegexpMatches(usage, r"\d+.\d+ MB")
16 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 | *.pot
3 | *.pyc
4 |
5 | *.py[cod]
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Packages
11 | *.egg
12 | *.egg-info
13 | dist
14 | build
15 | eggs
16 | parts
17 | bin
18 | var
19 | sdist
20 | develop-eggs
21 | .installed.cfg
22 | lib
23 | lib64
24 | __pycache__
25 |
26 | # Installer logs
27 | pip-log.txt
28 |
29 | # Unit test / coverage reports
30 | .coverage
31 | .tox
32 | nosetests.xml
33 | coverage.xml
34 | pep8.txt
35 |
36 | # Translations
37 | *.mo
38 |
39 | # Mr Developer
40 | .mr.developer.cfg
41 | .project
42 | .pydevproject
43 |
44 | /venv
45 | /.idea
46 | /*.db
--------------------------------------------------------------------------------
/test_settings.py:
--------------------------------------------------------------------------------
1 | from os.path import abspath, dirname, join, normpath
2 |
3 | # Absolute filesystem path to the Django project directory:
4 | DJANGO_ROOT = dirname(dirname(abspath(__file__)))
5 |
6 |
7 | SECRET_KEY = 'secret'
8 |
9 | DATABASES = {
10 | 'default': {
11 | 'ENGINE': 'django.db.backends.sqlite3',
12 | 'NAME': 'test_database.db',
13 | }
14 | }
15 |
16 | STATICFILES_FINDERS = (
17 | 'django.contrib.staticfiles.finders.FileSystemFinder',
18 | 'django.contrib.staticfiles.finders.AppDirectoriesFinder',
19 | )
20 |
21 | TEMPLATE_LOADERS = (
22 | 'django.template.loaders.filesystem.Loader',
23 | 'django.template.loaders.app_directories.Loader',
24 | )
25 |
26 | TEMPLATE_DIRS = (
27 | normpath(join(DJANGO_ROOT, 'templates')),
28 | )
29 |
30 | INSTALLED_APPS = (
31 | 'django.contrib.humanize',
32 | 'django.contrib.staticfiles',
33 | 'bootstrap3',
34 | 'twitter_stream',
35 | 'south',
36 | )
--------------------------------------------------------------------------------
/twitter_stream/static/twitter_stream/status.css:
--------------------------------------------------------------------------------
1 | svg {
2 | font-size: 12px;
3 | }
4 | .axis path,
5 | .axis line {
6 | fill: none;
7 | stroke: #888;
8 | shape-rendering: crispEdges;
9 | }
10 |
11 | .axis text {
12 | fill: #888;
13 | }
14 |
15 | .x.axis path {
16 | display: none;
17 | }
18 |
19 | .bars rect {
20 | fill: steelblue;
21 | stroke:none;
22 | shape-rendering: crispEdges;
23 | }
24 |
25 | .bars text {
26 | text-anchor: middle;
27 | fill: #fff;
28 | }
29 |
30 | .bars .filling rect {
31 | fill: #bcd5e8;
32 | }
33 | .bars .filling text {
34 | fill: #333;
35 | }
36 |
37 | .content {
38 | position: relative;
39 | }
40 |
41 | h4 {
42 | margin-bottom: 0;
43 | margin-top: 15px;
44 | }
45 |
46 | .status-label {
47 | position: absolute;
48 | top: 0;
49 | left: 260px;
50 | margin: 20px 0 10px 0;
51 | color: #dd0000;
52 | }
53 |
54 | .status-label.running {
55 | color: #009800;
56 | }
57 |
58 | .status-label span {
59 | font-size: 36px;
60 | }
--------------------------------------------------------------------------------
/twitter_stream/settings.py:
--------------------------------------------------------------------------------
1 | from django.conf import settings
2 |
3 | DEBUG = getattr(settings, 'DEBUG', False)
4 | USE_TZ = getattr(settings, 'USE_TZ', True)
5 |
6 | _stream_settings = getattr(settings, 'TWITTER_STREAM_SETTINGS', {})
7 |
8 | # If true, the embedded retweeted_status tweets will be captured
9 | CAPTURE_EMBEDDED = _stream_settings.get('CAPTURE_EMBEDDED', False)
10 |
11 | # The number of seconds in between checks for filter term changes and tweet inserts
12 | POLL_INTERVAL = _stream_settings.get('POLL_INTERVAL', 10)
13 |
14 | # The default keys to use for streaming
15 | DEFAULT_KEYS_NAME = _stream_settings.get('DEFAULT_KEYS_NAME', None)
16 |
17 | # Put the stream in a loop to prevent random termination
18 | PREVENT_EXIT = _stream_settings.get('PREVENT_EXIT', False)
19 |
20 | # Record stats like memory usage in the database
21 | MONITOR_PERFORMANCE = _stream_settings.get('MONITOR_PERFORMANCE', True)
22 |
23 | # The number of tweets to insert into the database at once
24 | INSERT_BATCH_SIZE = _stream_settings.get('INSERT_BATCH_SIZE', 1000)
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014 Michael Brooks
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from setuptools import setup
3 |
4 | # Utility function to read the README file.
5 | # Used for the long_description. It's nice, because now 1) we have a top level
6 | # README file and 2) it's easier to type in the README file than to put a raw
7 | # string in below ...
8 | def read(fname):
9 | return open(os.path.join(os.path.dirname(__file__), fname)).read()
10 |
11 |
12 | setup(
13 | name='django-twitter-stream',
14 | version='0.1.3',
15 | packages=['twitter_stream'],
16 | url='http://github.com/michaelbrooks/django-twitter-stream',
17 | license='MIT',
18 | author='Michael Brooks',
19 | author_email='mjbrooks@uw.edu',
20 | description='A Django app for streaming tweets from the Twitter API into a database.',
21 | long_description=read('README.md'),
22 | classifiers=[
23 | "Development Status :: 3 - Alpha",
24 | "Topic :: Utilities",
25 | "License :: OSI Approved :: MIT License",
26 | ],
27 | install_requires=[
28 | "django >= 1.6",
29 | "twitter-monitor >= 0.3.0, < 0.4",
30 | "swapper >= 0.1.1, < 0.2",
31 | "django-jsonview >= 0.2, < 0.5",
32 | "django-bootstrap3 >= 4.3.0"
33 | ],
34 | test_suite="setuptest.setuptest.SetupTestSuite",
35 | tests_require=[
36 | 'django-setuptest',
37 | ]
38 | )
39 |
--------------------------------------------------------------------------------
/twitter_stream/templates/twitter_stream/status.html:
--------------------------------------------------------------------------------
1 | {% load staticfiles bootstrap3 %}
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Twitter Stream Status
10 |
11 | {% bootstrap_css %}
12 |
13 |
14 |
15 |
19 |
20 |
21 |
22 | {% bootstrap_messages %}
23 |
24 |
Twitter Stream
25 |
26 |
29 |
30 |
33 |
34 |
40 |
41 |
42 |
43 | {% bootstrap_javascript %}
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/twitter_stream/templates/twitter_stream/status_display.html:
--------------------------------------------------------------------------------
1 | {% load humanize %}
2 |
3 |
4 | {% if status.running %}
5 |
6 | {% else %}
7 |
8 | {% endif %}
9 |
10 |
11 |
12 |
13 |
14 | | Tweets Stored: |
15 | ~{{ status.tweet_count }} |
16 |
17 |
18 | | Earliest: |
19 | {{ status.earliest }} |
20 |
21 |
22 | | Latest: |
23 | {{ status.latest }} |
24 |
25 |
26 | | Average Rate: |
27 | {{ status.avg_rate|floatformat }} tweets / second |
28 |
29 |
30 |
31 |
32 |
33 | Current filter terms:
34 | {% for term in status.terms %}
35 | {{ term }}
36 | {% endfor %}
37 |
38 |
39 | {% if status.processes %}
40 | Recent Twitter streaming processes:
41 |
42 |
43 |
44 | | Status |
45 | Process |
46 | API Key |
47 | Started |
48 | Last Heartbeat |
49 | Tweet Rate (t/s) |
50 | Memory |
51 | Errors |
52 |
53 |
54 |
55 | {% for stream in status.processes %}
56 |
60 | |
61 |
62 | {{ stream.get_status_display }}
63 |
64 | |
65 | {{ stream.hostname }}:{{ stream.process_id }} |
66 | {{ stream.keys }} |
67 | {{ stream.created_at|naturaltime }} |
68 | {{ stream.last_heartbeat|naturaltime }} |
69 | {{ stream.tweet_rate|floatformat }} |
70 | {{ stream.memory_usage }} |
71 | {% if stream.error_count > 0 %}
72 | {{ stream.error_count }} |
73 | {% else %}
74 | {{ stream.error_count }} |
75 | {% endif %}
76 |
77 | {% endfor %}
78 |
79 |
80 | {% else %}
81 | No recent Twitter streaming processes.
82 | {% endif %}
83 |
84 |
--------------------------------------------------------------------------------
/twitter_stream/fields.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | from django import forms
3 | from django.core import exceptions
4 | import math
5 |
6 | class PositiveBigIntegerField(models.BigIntegerField):
7 | description = "Positive Big integer"
8 |
9 | def formfield(self, **kwargs):
10 | defaults = {'min_value': 0,
11 | 'max_value': models.BigIntegerField.MAX_BIGINT * 2 - 1}
12 | defaults.update(kwargs)
13 | return super(PositiveBigIntegerField, self).formfield(**defaults)
14 |
15 | def db_type(self, connection):
16 | if 'mysql' in connection.__class__.__module__:
17 | return 'bigint UNSIGNED'
18 | return super(PositiveBigIntegerField, self).db_type(connection)
19 |
20 |
21 | class PositiveBigAutoField(models.AutoField):
22 | description = "Unsigned Big Integer"
23 | empty_strings_allowed = False
24 | MAX_BIGINT = 9223372036854775807
25 |
26 | def db_type(self, connection):
27 | if 'mysql' in connection.__class__.__module__:
28 | return 'bigint UNSIGNED AUTO_INCREMENT'
29 |
30 | return super(PositiveBigAutoField, self).db_type(connection)
31 |
32 |
33 | default_error_messages = {
34 | 'invalid': "'%(value)s' value must be an integer.",
35 | }
36 |
37 | def get_prep_value(self, value):
38 | if value is None:
39 | return None
40 | return int(value)
41 |
42 | def get_prep_lookup(self, lookup_type, value):
43 | if ((lookup_type == 'gte' or lookup_type == 'lt')
44 | and isinstance(value, float)):
45 | value = math.ceil(value)
46 | return super(PositiveBigAutoField, self).get_prep_lookup(lookup_type, value)
47 |
48 | def to_python(self, value):
49 | if value is None:
50 | return value
51 | try:
52 | return int(value)
53 | except (TypeError, ValueError):
54 | raise exceptions.ValidationError(
55 | self.error_messages['invalid'],
56 | code='invalid',
57 | params={'value': value},
58 | )
59 |
60 | def formfield(self, **kwargs):
61 | defaults = {'min_value': 0,
62 | 'max_value': PositiveBigAutoField.MAX_BIGINT * 2 - 1,
63 | 'form_class': forms.IntegerField }
64 | defaults.update(kwargs)
65 | return super(PositiveBigAutoField, self).formfield(**defaults)
66 |
67 |
68 | class PositiveBigAutoForeignKey(models.ForeignKey):
69 | """A special foriegn key field for positive big auto fields"""
70 |
71 | def db_type(self, connection):
72 | # The database column type of a ForeignKey is the column type
73 | # of the field to which it points. An exception is if the ForeignKey
74 | # points to an AutoField/PositiveIntegerField/PositiveSmallIntegerField,
75 | # in which case the column type is simply that of an IntegerField.
76 | # If the database needs similar types for key fields however, the only
77 | # thing we can do is making AutoField an IntegerField.
78 | rel_field = self.related_field
79 | if isinstance(rel_field, PositiveBigAutoField):
80 | return PositiveBigIntegerField().db_type(connection=connection)
81 | return rel_field.db_type(connection=connection)
82 | try:
83 | # If we are using south, we need some rules to use these fields
84 | from south.modelsinspector import add_introspection_rules
85 | add_introspection_rules([], ["^twitter_stream\.fields\.PositiveBigAutoField"])
86 | add_introspection_rules([], ["^twitter_stream\.fields\.PositiveBigIntegerField"])
87 | add_introspection_rules([], ["^twitter_stream\.fields\.PositiveBigAutoForeignKey"])
88 | except ImportError:
89 | pass
90 |
--------------------------------------------------------------------------------
/twitter_stream/views.py:
--------------------------------------------------------------------------------
1 | from datetime import timedelta
2 | import json
3 | from django.conf import settings
4 | from django.utils import timezone
5 | from django.template import RequestContext
6 | from django.template.loader import render_to_string
7 | from django.views import generic
8 | from django.contrib.admin.views.decorators import staff_member_required
9 | from jsonview.decorators import json_view
10 | from twitter_stream.models import FilterTerm, StreamProcess
11 | from swapper import load_model
12 | from django.db import models
13 |
14 |
15 | def _render_to_string_request(request, template, dictionary):
16 | """
17 | Wrapper around render_to_string that includes the request context
18 | This is necessary to get all of the TEMPLATE_CONTEXT_PROCESSORS
19 | activated in the template.
20 | """
21 | context = RequestContext(request, dictionary)
22 | return render_to_string(template, context_instance=context)
23 |
24 |
25 | def stream_status():
26 | terms = FilterTerm.objects.filter(enabled=True)
27 | processes = StreamProcess.get_current_stream_processes()
28 | running = False
29 | for p in processes:
30 | if p.status == StreamProcess.STREAM_STATUS_RUNNING:
31 | running = True
32 | break
33 |
34 | Tweet = load_model("twitter_stream", "Tweet")
35 | tweet_count = Tweet.count_approx()
36 | earliest_time = Tweet.get_earliest_created_at()
37 | latest_time = Tweet.get_latest_created_at()
38 |
39 | avg_rate = None
40 | if earliest_time is not None and latest_time is not None:
41 | avg_rate = float(tweet_count) / (latest_time - earliest_time).total_seconds()
42 |
43 | # Get the tweets / minute over the past 10 minutes
44 | tweet_counts = []
45 | if latest_time is not None:
46 | latest_time_minute = latest_time.replace(second=0, microsecond=0)
47 |
48 | if settings.DATABASES['default']['ENGINE'].endswith('mysql'):
49 | drop_seconds = "created_at - INTERVAL SECOND(created_at) SECOND"
50 | elif settings.DATABASES['default']['ENGINE'].endswith('postgresql_psycopg2'):
51 | drop_seconds = "date_trunc('minute', created_at)"
52 | else:
53 | drop_seconds = "created_at"
54 |
55 | tweet_counts = Tweet.objects.extra(select={
56 | 'time': drop_seconds
57 | }) \
58 | .filter(created_at__gt=latest_time_minute - timedelta(minutes=20)) \
59 | .values('time') \
60 | .order_by('time') \
61 | .annotate(tweets=models.Count('id'))
62 |
63 | tweet_counts = list(tweet_counts)
64 |
65 | for row in tweet_counts:
66 | row['time'] = row['time'].isoformat()
67 |
68 | return {
69 | 'running': running,
70 | 'terms': [t.term for t in terms],
71 | 'processes': processes,
72 | 'tweet_count': tweet_count,
73 | 'earliest': earliest_time,
74 | 'latest': latest_time,
75 | 'avg_rate': avg_rate,
76 | 'timeline': tweet_counts
77 | }
78 |
79 |
80 |
81 | class StatusView(generic.TemplateView):
82 | template_name = 'twitter_stream/status.html'
83 |
84 | def get_context_data(self, **kwargs):
85 | status = stream_status()
86 | status['timeline'] = json.dumps(status['timeline'])
87 | return {
88 | 'status': status
89 | }
90 |
91 | status = staff_member_required(StatusView.as_view())
92 |
93 | @staff_member_required
94 | @json_view
95 | def json_status(request, task=None):
96 | """
97 | Returns a JSON representation of the status, with
98 | HTML conveniently included.
99 | """
100 |
101 | status = stream_status()
102 |
103 | display = _render_to_string_request(request, 'twitter_stream/status_display.html', {
104 | 'status': status
105 | })
106 |
107 | return {
108 | 'display': display,
109 | 'timeline': status['timeline']
110 | }
111 |
112 |
--------------------------------------------------------------------------------
/twitter_stream/management/commands/stream_from_file.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from optparse import make_option
3 | from logging.config import dictConfig
4 |
5 | import time
6 |
7 | from django.core.management.base import BaseCommand
8 | import signal
9 |
10 | from twitter_stream import models
11 | from twitter_stream import utils
12 | from twitter_stream import settings
13 |
14 | # Setup logging if not already configured
15 | logger = logging.getLogger(__name__)
16 |
17 | if not logger.handlers:
18 | dictConfig({
19 | "version": 1,
20 | "disable_existing_loggers": False,
21 | "handlers": {
22 | "twitter_stream": {
23 | "level": "DEBUG",
24 | "class": "logging.StreamHandler",
25 | },
26 | },
27 | "twitter_stream": {
28 | "handlers": ["twitter_stream"],
29 | "level": "DEBUG"
30 | }
31 | })
32 |
33 |
34 | class Command(BaseCommand):
35 | """
36 | Streams tweets from an existing file. The file should
37 | be pretty-printed JSON dump from the streaming API.
38 |
39 | Example usage:
40 | python manage.py stream_from_file tweets.json
41 | python manage.py stream_from_file tweets.json --limit 100000
42 | python manage.py stream_from_file tweets.json --rate-limit 25 --poll-interval 25
43 | """
44 |
45 | option_list = BaseCommand.option_list + (
46 | make_option(
47 | '--poll-interval',
48 | action='store',
49 | dest='poll_interval',
50 | default=10,
51 | type=int,
52 | help='Seconds between tweet inserts.'
53 | ),
54 | make_option(
55 | '--rate-limit',
56 | action='store',
57 | dest='rate_limit',
58 | default=None,
59 | type=float,
60 | help='Rate to read in tweets.'
61 | ),
62 | make_option(
63 | '--limit',
64 | action='store',
65 | dest='limit',
66 | default=None,
67 | type=int,
68 | help='Limit the number of tweets read.'
69 | )
70 | )
71 | args = ''
72 | help = "Fakes a streaming connection to twitter by reading from a file."
73 |
74 | def handle(self, tweets_file=None, *args, **options):
75 |
76 | # The suggested time between hearbeats
77 | poll_interval = float(options.get('poll_interval', settings.POLL_INTERVAL))
78 | rate_limit = options.get('rate_limit', 50)
79 | limit = options.get('limit', None)
80 | prevent_exit = options.get('prevent_exit', settings.PREVENT_EXIT)
81 |
82 | # First expire any old stream process records that have failed
83 | # to report in for a while
84 | timeout_seconds = 3 * poll_interval
85 | models.StreamProcess.expire_timed_out()
86 |
87 | stream_process = models.StreamProcess.create(
88 | timeout_seconds=timeout_seconds
89 | )
90 |
91 | listener = utils.QueueStreamListener()
92 | checker = utils.FakeTermChecker(queue_listener=listener,
93 | stream_process=stream_process)
94 |
95 |
96 | def stop(signum, frame):
97 | """
98 | Register stream's death and exit.
99 | """
100 | logger.debug("Stopping because of signal")
101 |
102 | if stream_process:
103 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED
104 | stream_process.heartbeat()
105 |
106 | # Let the tweet listener know it should be quitting asap
107 | listener.set_terminate()
108 |
109 | raise SystemExit()
110 |
111 | # Installs signal handlers for handling SIGINT and SIGTERM
112 | # gracefully.
113 | signal.signal(signal.SIGINT, stop)
114 | signal.signal(signal.SIGTERM, stop)
115 |
116 | logger.info("Streaming from %s", tweets_file)
117 | if rate_limit:
118 | logger.info("Rate limit: %f", rate_limit)
119 |
120 | try:
121 | stream = utils.FakeTwitterStream(tweets_file,
122 | listener=listener, term_checker=checker,
123 | limit=limit, rate_limit=rate_limit)
124 |
125 | if prevent_exit:
126 | while checker.ok():
127 | try:
128 | stream.start_polling(poll_interval)
129 | except Exception as e:
130 | checker.error(e)
131 | time.sleep(1) # to avoid craziness
132 | else:
133 | stream.start_polling(poll_interval)
134 |
135 | logger.error("Stopping because of excess errors")
136 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED
137 | stream_process.heartbeat()
138 |
139 | except Exception as e:
140 | logger.error(e, exc_info=True)
141 |
142 | finally:
143 | stop(None, None)
144 |
--------------------------------------------------------------------------------
/twitter_stream/static/twitter_stream/status.js:
--------------------------------------------------------------------------------
1 | (function () {
2 | var interval,
3 | update_chart;
4 | var config = window.twitter_stream_status_data;
5 |
6 | var UPDATE_INTERVAL = 15000;
7 | var CHART_HEIGHT = 250;
8 |
9 | function chart(target_element, config) {
10 |
11 |
12 | var margin = {top: 5, right: 20, bottom: 30, left: 50},
13 | width = config.width - margin.left - margin.right,
14 | height = config.height - margin.top - margin.bottom;
15 |
16 | var x = d3.time.scale()
17 | .range([0, width]);
18 |
19 | var y = d3.scale.linear()
20 | .range([height, 0])
21 | .domain([0, 1]);
22 |
23 | var xAxis = d3.svg.axis()
24 | .scale(x)
25 | .orient("bottom");
26 |
27 | var yAxis = d3.svg.axis()
28 | .scale(y)
29 | .orient("left");
30 |
31 | var svg = d3.select(target_element).append("svg")
32 | .attr("width", width + margin.left + margin.right)
33 | .attr("height", height + margin.top + margin.bottom)
34 | .append("g")
35 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
36 |
37 | var xAxisGroup = svg.append("g")
38 | .attr("class", "x axis")
39 | .attr("transform", "translate(0," + height + ")");
40 |
41 | var yAxisGroup = svg.append("g")
42 | .attr("class", "y axis");
43 |
44 | yAxisGroup.append("text")
45 | .attr("transform", "rotate(-90)")
46 | .attr("y", 6)
47 | .attr("dy", ".71em")
48 | .style("text-anchor", "end")
49 | .text("Tweets");
50 |
51 | var barsGroup = svg.append("g")
52 | .attr('class', 'bars');
53 |
54 | return function (data) {
55 |
56 | data.forEach(function (d) {
57 | d.time = new Date(d.time);
58 | });
59 |
60 | var dateRange = d3.extent(data, function (d) {
61 | return d.time;
62 | });
63 |
64 | x.domain(dateRange);
65 |
66 | y.domain([0, d3.max(data, function (d) {
67 | return d.tweets;
68 | })]);
69 |
70 | var leftMargin = 22;
71 | var minutesShown = (dateRange[1] - dateRange[0]) / 60000;
72 | var barWidth = Math.floor((width - leftMargin) / minutesShown);
73 | barWidth = Math.max(2, barWidth - (barWidth % 20));
74 |
75 | x.range([barWidth / 2 + leftMargin, width]);
76 |
77 | xAxisGroup.call(xAxis);
78 | yAxisGroup.call(yAxis);
79 |
80 | var bind = barsGroup.selectAll("g")
81 | .data(data);
82 |
83 | var enter = bind.enter()
84 | .append('g');
85 | enter.append('rect')
86 | enter.append('text')
87 | .attr("dy", ".75em");
88 |
89 | bind.exit()
90 | .remove();
91 |
92 | bind.attr('transform', function (d, i) {
93 | return "translate(" + (x(d.time) - barWidth / 2) + ",0)";
94 | })
95 | .classed('filling', function (d, i) {
96 | return i == data.length - 1
97 | });
98 |
99 | bind.select('rect')
100 | .attr('width', barWidth - 1)
101 | .transition()
102 | .attr("y", function (d) {
103 | return y(d.tweets);
104 | })
105 | .attr('height', function (d) {
106 | return height - y(d.tweets);
107 | });
108 |
109 | bind.select('text')
110 | .attr("x", barWidth / 2)
111 | .text(function (d) {
112 | return d.tweets;
113 | })
114 | .transition()
115 | .attr("y", function (d) {
116 | return y(d.tweets) + 3;
117 | });
118 | };
119 | }
120 |
121 |
122 | function update() {
123 | toggle_status_label(false);
124 | $.get(config.update_url)
125 | .done(function (response) {
126 | status_display.html(response.display);
127 | update_chart(response.timeline);
128 | toggle_status_label(true);
129 | })
130 | .fail(function (err, xhr) {
131 | console.log(err, xhr);
132 | });
133 | }
134 |
135 | function toggle_status_label(show) {
136 | var label = $('.status-label');
137 | label[0].borderWidth;
138 |
139 | if (show) {
140 | label.addClass('in');
141 | } else {
142 | label.removeClass('in');
143 | }
144 | }
145 |
146 | $(document).ready(function () {
147 | status_display = $('#twitter-stream-display');
148 |
149 | var chart_element = $('#twitter-stream-chart');
150 | update_chart = chart(chart_element[0], {
151 | width: chart_element.width(),
152 | height: CHART_HEIGHT
153 | });
154 | update_chart(config.timeline_data);
155 | interval = setInterval(update, UPDATE_INTERVAL);
156 | toggle_status_label(true);
157 | });
158 | })();
--------------------------------------------------------------------------------
/twitter_stream/migrations/0004_auto__del_field_tweet_analyzed_by.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from south.utils import datetime_utils as datetime
3 | from south.db import db
4 | from south.v2 import SchemaMigration
5 | from django.db import models
6 |
7 |
8 | class Migration(SchemaMigration):
9 |
10 | def forwards(self, orm):
11 | # Deleting field 'Tweet.analyzed_by'
12 | db.delete_column(u'twitter_stream_tweet', 'analyzed_by')
13 |
14 |
15 | def backwards(self, orm):
16 | # Adding field 'Tweet.analyzed_by'
17 | db.add_column(u'twitter_stream_tweet', 'analyzed_by',
18 | self.gf('django.db.models.fields.SmallIntegerField')(default=0, db_index=True),
19 | keep_default=False)
20 |
21 |
22 | models = {
23 | u'twitter_stream.apikey': {
24 | 'Meta': {'object_name': 'ApiKey'},
25 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
26 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
27 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
28 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
29 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
30 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}),
31 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
32 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'})
33 | },
34 | u'twitter_stream.filterterm': {
35 | 'Meta': {'object_name': 'FilterTerm'},
36 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
37 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
38 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
39 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'})
40 | },
41 | u'twitter_stream.streamprocess': {
42 | 'Meta': {'object_name': 'StreamProcess'},
43 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
44 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}),
45 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}),
46 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
47 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
48 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}),
49 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}),
50 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}),
51 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
52 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}),
53 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}),
54 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'})
55 | },
56 | u'twitter_stream.tweet': {
57 | 'Meta': {'object_name': 'Tweet'},
58 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
59 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
60 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}),
61 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
62 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
63 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}),
64 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
65 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
66 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
67 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
68 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
69 | 'truncated': ('django.db.models.fields.BooleanField', [], {}),
70 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}),
71 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
72 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
73 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
74 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}),
75 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
76 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}),
77 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
78 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
79 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
80 | 'user_verified': ('django.db.models.fields.BooleanField', [], {})
81 | }
82 | }
83 |
84 | complete_apps = ['twitter_stream']
--------------------------------------------------------------------------------
/twitter_stream/migrations/0006_auto__chg_field_tweet_id.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from south.utils import datetime_utils as datetime
3 | from south.db import db
4 | from south.v2 import SchemaMigration
5 | from django.db import models
6 |
7 |
8 | class Migration(SchemaMigration):
9 |
10 | def forwards(self, orm):
11 |
12 | # Changing field 'Tweet.id'
13 | db.alter_column(u'twitter_stream_tweet', 'id', self.gf('twitter_stream.fields.PositiveBigAutoField')(primary_key=True))
14 |
15 | def backwards(self, orm):
16 |
17 | # Changing field 'Tweet.id'
18 | db.alter_column(u'twitter_stream_tweet', u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True))
19 |
20 | models = {
21 | u'twitter_stream.apikey': {
22 | 'Meta': {'object_name': 'ApiKey'},
23 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
24 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
25 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
26 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
27 | 'app_name': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
28 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
29 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}),
30 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
31 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '250'})
32 | },
33 | u'twitter_stream.filterterm': {
34 | 'Meta': {'object_name': 'FilterTerm'},
35 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
36 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
37 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
38 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'})
39 | },
40 | u'twitter_stream.streamprocess': {
41 | 'Meta': {'object_name': 'StreamProcess'},
42 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
43 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}),
44 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}),
45 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
46 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
47 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}),
48 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}),
49 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}),
50 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
51 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}),
52 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}),
53 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'})
54 | },
55 | u'twitter_stream.tweet': {
56 | 'Meta': {'object_name': 'Tweet'},
57 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
58 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
59 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}),
60 | 'id': ('twitter_stream.fields.PositiveBigAutoField', [], {'primary_key': 'True'}),
61 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
62 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}),
63 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
64 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
65 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
66 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
67 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
68 | 'truncated': ('django.db.models.fields.BooleanField', [], {}),
69 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}),
70 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
71 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
72 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
73 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}),
74 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
75 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}),
76 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
77 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
78 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
79 | 'user_verified': ('django.db.models.fields.BooleanField', [], {})
80 | }
81 | }
82 |
83 | complete_apps = ['twitter_stream']
--------------------------------------------------------------------------------
/twitter_stream/migrations/0002_auto__add_index_tweet_analyzed_by__add_index_tweet_created_at.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from south.utils import datetime_utils as datetime
3 | from south.db import db
4 | from south.v2 import SchemaMigration
5 | from django.db import models
6 |
7 |
8 | class Migration(SchemaMigration):
9 |
10 | def forwards(self, orm):
11 | # Adding index on 'Tweet', fields ['analyzed_by']
12 | db.create_index(u'twitter_stream_tweet', ['analyzed_by'])
13 |
14 | # Adding index on 'Tweet', fields ['created_at']
15 | db.create_index(u'twitter_stream_tweet', ['created_at'])
16 |
17 |
18 | def backwards(self, orm):
19 | # Removing index on 'Tweet', fields ['created_at']
20 | db.delete_index(u'twitter_stream_tweet', ['created_at'])
21 |
22 | # Removing index on 'Tweet', fields ['analyzed_by']
23 | db.delete_index(u'twitter_stream_tweet', ['analyzed_by'])
24 |
25 |
26 | models = {
27 | u'twitter_stream.apikey': {
28 | 'Meta': {'object_name': 'ApiKey'},
29 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
30 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
31 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
32 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
33 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
34 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}),
35 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
36 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'})
37 | },
38 | u'twitter_stream.filterterm': {
39 | 'Meta': {'object_name': 'FilterTerm'},
40 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
41 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
42 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
43 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'})
44 | },
45 | u'twitter_stream.streamprocess': {
46 | 'Meta': {'object_name': 'StreamProcess'},
47 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
48 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}),
49 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}),
50 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
51 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
52 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}),
53 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}),
54 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
55 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}),
56 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}),
57 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'})
58 | },
59 | u'twitter_stream.tweet': {
60 | 'Meta': {'object_name': 'Tweet'},
61 | 'analyzed_by': ('django.db.models.fields.SmallIntegerField', [], {'default': '0', 'db_index': 'True'}),
62 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
63 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
64 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}),
65 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
66 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
67 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}),
68 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
69 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
70 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
71 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
72 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
73 | 'truncated': ('django.db.models.fields.BooleanField', [], {}),
74 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}),
75 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
76 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
77 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
78 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}),
79 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
80 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}),
81 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
82 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
83 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
84 | 'user_verified': ('django.db.models.fields.BooleanField', [], {})
85 | }
86 | }
87 |
88 | complete_apps = ['twitter_stream']
--------------------------------------------------------------------------------
/twitter_stream/migrations/0003_auto__add_field_streamprocess_memory_usage.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from south.utils import datetime_utils as datetime
3 | from south.db import db
4 | from south.v2 import SchemaMigration
5 | from django.db import models
6 |
7 |
8 | class Migration(SchemaMigration):
9 |
10 | def forwards(self, orm):
11 | # Adding field 'StreamProcess.memory_usage'
12 | db.add_column(u'twitter_stream_streamprocess', 'memory_usage',
13 | self.gf('django.db.models.fields.CharField')(default=None, max_length=30, null=True, blank=True),
14 | keep_default=False)
15 |
16 |
17 | def backwards(self, orm):
18 | # Deleting field 'StreamProcess.memory_usage'
19 | db.delete_column(u'twitter_stream_streamprocess', 'memory_usage')
20 |
21 |
22 | models = {
23 | u'twitter_stream.apikey': {
24 | 'Meta': {'object_name': 'ApiKey'},
25 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
26 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
27 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
28 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
29 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
30 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}),
31 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
32 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'})
33 | },
34 | u'twitter_stream.filterterm': {
35 | 'Meta': {'object_name': 'FilterTerm'},
36 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
37 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
38 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
39 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'})
40 | },
41 | u'twitter_stream.streamprocess': {
42 | 'Meta': {'object_name': 'StreamProcess'},
43 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
44 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}),
45 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}),
46 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
47 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
48 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}),
49 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}),
50 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}),
51 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
52 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}),
53 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}),
54 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'})
55 | },
56 | u'twitter_stream.tweet': {
57 | 'Meta': {'object_name': 'Tweet'},
58 | 'analyzed_by': ('django.db.models.fields.SmallIntegerField', [], {'default': '0', 'db_index': 'True'}),
59 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
60 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
61 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}),
62 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
63 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
64 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}),
65 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
66 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
67 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
68 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
69 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
70 | 'truncated': ('django.db.models.fields.BooleanField', [], {}),
71 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}),
72 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
73 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
74 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
75 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}),
76 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
77 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}),
78 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
79 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
80 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
81 | 'user_verified': ('django.db.models.fields.BooleanField', [], {})
82 | }
83 | }
84 |
85 | complete_apps = ['twitter_stream']
--------------------------------------------------------------------------------
/twitter_stream/migrations/0005_auto__del_field_apikey_name__add_field_apikey_user_name__add_field_api.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from south.utils import datetime_utils as datetime
3 | from south.db import db
4 | from south.v2 import SchemaMigration
5 | from django.db import models
6 |
7 |
8 | class Migration(SchemaMigration):
9 |
10 | def forwards(self, orm):
11 | # Deleting field 'ApiKey.name'
12 | db.rename_column(u'twitter_stream_apikey', 'name', 'user_name')
13 |
14 | # Adding field 'ApiKey.app_name'
15 | db.add_column(u'twitter_stream_apikey', 'app_name',
16 | self.gf('django.db.models.fields.CharField')(default=None, max_length=250),
17 | keep_default=False)
18 |
19 |
20 | def backwards(self, orm):
21 |
22 | # Deleting field 'ApiKey.user_name'
23 | db.rename_column(u'twitter_stream_apikey', 'user_name', 'name')
24 |
25 | # Deleting field 'ApiKey.app_name'
26 | db.delete_column(u'twitter_stream_apikey', 'app_name')
27 |
28 |
29 | models = {
30 | u'twitter_stream.apikey': {
31 | 'Meta': {'object_name': 'ApiKey'},
32 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
33 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
34 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
35 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
36 | 'app_name': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
37 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
38 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}),
39 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
40 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '250'})
41 | },
42 | u'twitter_stream.filterterm': {
43 | 'Meta': {'object_name': 'FilterTerm'},
44 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
45 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
46 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
47 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'})
48 | },
49 | u'twitter_stream.streamprocess': {
50 | 'Meta': {'object_name': 'StreamProcess'},
51 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
52 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}),
53 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}),
54 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
55 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
56 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}),
57 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}),
58 | 'memory_usage': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '30', 'null': 'True', 'blank': 'True'}),
59 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
60 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}),
61 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}),
62 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'})
63 | },
64 | u'twitter_stream.tweet': {
65 | 'Meta': {'object_name': 'Tweet'},
66 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
67 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
68 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}),
69 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
70 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
71 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}),
72 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
73 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
74 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
75 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
76 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
77 | 'truncated': ('django.db.models.fields.BooleanField', [], {}),
78 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}),
79 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
80 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
81 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
82 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}),
83 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
84 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}),
85 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
86 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
87 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
88 | 'user_verified': ('django.db.models.fields.BooleanField', [], {})
89 | }
90 | }
91 |
92 | complete_apps = ['twitter_stream']
--------------------------------------------------------------------------------
/twitter_stream/utils/file_stream.py:
--------------------------------------------------------------------------------
1 | """
2 | Parses a file containing pretty-printed json objects as produced by Twitter.
3 |
4 | For example:
5 | {
6 | ... some tweet json
7 | },
8 | {
9 | ... some other tweet json
10 | },
11 | """
12 |
13 | import time
14 | import os
15 | import json
16 | import logging
17 | import threading
18 |
19 | import twitter_monitor
20 | from twitter_stream import models
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 | class ObjDict(dict):
25 |
26 | def __getattr__(self, item):
27 | return self[item]
28 |
29 | def __setattr__(self, key, value):
30 | self[key] = value
31 |
32 | def __delattr__(self, item):
33 | del self[item]
34 |
35 | class FakeTermChecker(twitter_monitor.TermChecker):
36 |
37 | def __init__(self, queue_listener, stream_process):
38 | super(FakeTermChecker, self).__init__()
39 |
40 | # A queue for tweets that need to be written to the database
41 | self.listener = queue_listener
42 | self.error_count = 0
43 | self.process = stream_process
44 |
45 | def check(self):
46 | """We always return true!"""
47 |
48 | # Process the tweet queue -- this is more important
49 | # to do regularly than updating the tracking terms
50 | # Update the process status in the database
51 | self.process.tweet_rate = self.listener.process_tweet_queue()
52 | self.process.error_count = self.error_count
53 | self.process.status = models.StreamProcess.STREAM_STATUS_RUNNING
54 | self.process.heartbeat()
55 |
56 | return True
57 |
58 | def ok(self):
59 | return self.error_count < 5
60 |
61 | def error(self, exc):
62 | logger.error(exc)
63 | self.error_count += 1
64 |
65 | # the chunk size for reading in the file
66 | TWEETS_BETWEEN_PROGRESS = 7000
67 |
68 | class FakeTwitterStream(object):
69 | """
70 | A tweet processor with a similar interface to the
71 | DynamicTweetStream class. It launches the tweet file
72 | reading in a separate thread.
73 | """
74 | def __init__(self, tweets_file, listener, term_checker,
75 | limit=None, rate_limit=None, pretty=False):
76 |
77 | self.tweets_file = tweets_file
78 |
79 | self.limit = limit
80 | self.rate_limit = rate_limit
81 | self.pretty = pretty
82 |
83 | self.listener = listener
84 | self.term_checker = term_checker
85 |
86 | self.tracking_terms = []
87 | self.polling = False
88 | self.stream = None
89 | self.last_created_at = 0
90 |
91 | self.polling_interrupt = threading.Event()
92 |
93 | def process(self, tweet, raw_tweet):
94 | self.last_created_at = tweet['created_at']
95 | return self.listener.on_status(tweet)
96 |
97 | def next_tweet_pretty(self, infile):
98 | # start our read loop with valid data
99 |
100 | raw = ''
101 | tweet_start_found = False
102 |
103 | while True:
104 | try:
105 | line = next(infile)
106 | except StopIteration:
107 | return None
108 |
109 | if line[0] == '{':
110 | # start of tweet
111 | tweet_start_found = True
112 | raw = ''
113 | raw += line
114 | elif line[0:2] == '},' and tweet_start_found == True:
115 | # end of tweet
116 | raw += line[0]
117 | tweet_start_found = False
118 |
119 | return raw
120 |
121 | elif tweet_start_found == True:
122 | # some line in the middle
123 | raw += line
124 |
125 | def next_tweet(self, infile):
126 | return next(infile, None)
127 |
128 | def run(self):
129 |
130 | logger.info("Parsing %s..." % self.tweets_file)
131 | if self.limit:
132 | logger.info("up to %d tweets..." % self.limit)
133 |
134 | if hasattr(self.tweets_file, 'read'):
135 | infile = self.tweets_file
136 | else:
137 | infile = open(self.tweets_file, "rt")
138 |
139 | tweet_count = 0
140 | last_report_count = 0
141 |
142 | if self.rate_limit:
143 | time_of_last_tweet = time.time()
144 | time_between_tweets = 1.0 / self.rate_limit
145 |
146 | while True:
147 | if self.pretty:
148 | raw = self.next_tweet_pretty(infile)
149 | else:
150 | raw = self.next_tweet(infile)
151 |
152 | if raw is None:
153 | break
154 |
155 | raw = raw.strip()
156 | if len(raw) == 0:
157 | continue
158 |
159 | tweet = json.loads(raw)
160 |
161 | # make sure it is a tweet
162 | if 'user' in tweet:
163 |
164 | if self.rate_limit:
165 | while time.time() - time_of_last_tweet < time_between_tweets:
166 | time.sleep(time_between_tweets)
167 |
168 | if self.process(tweet, raw) is False:
169 | logger.warn("Stopping file stream")
170 | break
171 |
172 | tweet_count += 1
173 |
174 | if self.rate_limit:
175 | time_of_last_tweet = time.time()
176 |
177 | if tweet_count - last_report_count > TWEETS_BETWEEN_PROGRESS:
178 | last_report_count = tweet_count
179 |
180 | logger.info("Read in %d tweets", tweet_count)
181 | if self.last_created_at:
182 | logger.info('Inserted tweets up to %s', str(self.last_created_at))
183 |
184 | if self.limit and self.limit < tweet_count:
185 | logger.info("Limit of %d reached.", self.limit)
186 | break
187 |
188 | logger.info("Read in %d tweets (total)", tweet_count)
189 | if self.last_created_at:
190 | logger.info('Tweets stopped at %s', str(self.last_created_at))
191 | logger.info("Done reading file.")
192 |
193 | def start_polling(self, interval):
194 | """
195 | Start polling for term updates and streaming.
196 | """
197 |
198 | self.polling = True
199 |
200 | # clear the stored list of terms - we aren't tracking any
201 | self.term_checker.reset()
202 |
203 | logger.info("Starting polling for changes to the track list")
204 | while self.polling:
205 | loop_start = time.time()
206 |
207 | self.update_stream()
208 | self.handle_exceptions()
209 |
210 | # wait for the interval (compensate for the time taken in the loop
211 | elapsed = (time.time() - loop_start)
212 | self.polling_interrupt.wait(max(0.1, interval - elapsed))
213 |
214 | logger.warn("Term poll ceased!")
215 |
216 | def update_stream(self):
217 | """
218 | Restarts the stream with the current list of tracking terms.
219 | """
220 |
221 | # Check if the tracking list has changed
222 | if not self.term_checker.check():
223 | return
224 |
225 | # Start a new stream
226 | self.start_stream()
227 |
228 | def start_stream(self):
229 | """
230 | Starts a stream if not already started.
231 | """
232 |
233 | if not self.stream:
234 | self.stream = threading.Thread(target=self.run)
235 | self.stream.start()
236 |
237 | def handle_exceptions(self):
238 | # check to see if an exception was raised in the streaming thread
239 | if self.listener.streaming_exception is not None:
240 | logger.warn("Streaming exception: %s", self.listener.streaming_exception)
241 | # propagate outward
242 | raise self.listener.streaming_exception
243 |
--------------------------------------------------------------------------------
/twitter_stream/utils/streaming.py:
--------------------------------------------------------------------------------
1 | try:
2 | import queue
3 | except ImportError:
4 | import Queue as queue
5 | import logging
6 | import time
7 | import json
8 | import sys
9 |
10 | import twitter_monitor
11 | from twitter_stream import settings, models
12 | from swapper import load_model
13 |
14 | __all__ = ['FeelsTermChecker', 'QueueStreamListener']
15 |
16 | logger = logging.getLogger(__name__)
17 |
18 |
19 | class TweetQueue(queue.Queue):
20 | """
21 | Simply extends the Queue class with get_all methods.
22 | """
23 |
24 | def get_all(self, block=True, timeout=None):
25 | """Remove and return all the items from the queue.
26 |
27 | If optional args 'block' is true and 'timeout' is None (the default),
28 | block if necessary until an item is available. If 'timeout' is
29 | a non-negative number, it blocks at most 'timeout' seconds and raises
30 | the Empty exception if no item was available within that time.
31 | Otherwise ('block' is false), return an item if one is immediately
32 | available, else raise the Empty exception ('timeout' is ignored
33 | in that case).
34 | """
35 | self.not_empty.acquire()
36 | try:
37 | if not block:
38 | if not self._qsize():
39 | raise queue.Empty
40 | elif timeout is None:
41 | while not self._qsize():
42 | self.not_empty.wait()
43 | elif timeout < 0:
44 | raise ValueError("'timeout' must be a non-negative number")
45 | else:
46 | endtime = time.time() + timeout
47 | while not self._qsize():
48 | remaining = endtime - time.time()
49 | if remaining <= 0.0:
50 | raise queue.Empty
51 | self.not_empty.wait(remaining)
52 | items = self._get_all()
53 | self.not_full.notify()
54 | return items
55 | finally:
56 | self.not_empty.release()
57 |
58 | def get_all_nowait(self):
59 | """Remove and return all the items from the queue without blocking.
60 |
61 | Only get items if immediately available. Otherwise
62 | raise the Empty exception.
63 | """
64 | return self.get_all(False)
65 |
66 | def _get_all(self):
67 | """
68 | Get all the items from the queue.
69 | """
70 | result = []
71 | while len(self.queue):
72 | result.append(self.queue.popleft())
73 | return result
74 |
75 |
76 | class FeelsTermChecker(twitter_monitor.TermChecker):
77 | """
78 | Checks the database for filter terms.
79 |
80 | Note that because this is run every now and then, and
81 | so as not to block the streaming thread, this
82 | object will actually also insert the tweets into the database.
83 | """
84 |
85 | def __init__(self, queue_listener, stream_process):
86 | super(FeelsTermChecker, self).__init__()
87 |
88 | # A queue for tweets that need to be written to the database
89 | self.listener = queue_listener
90 | self.error_count = 0
91 | self.process = stream_process
92 |
93 | def update_tracking_terms(self):
94 |
95 | # Process the tweet queue -- this is more important
96 | # to do regularly than updating the tracking terms
97 | # Update the process status in the database
98 | self.process.tweet_rate = self.listener.process_tweet_queue()
99 | self.process.error_count = self.error_count
100 |
101 | # Check for new tracking terms
102 | filter_terms = models.FilterTerm.objects.filter(enabled=True)
103 |
104 | if len(filter_terms):
105 | self.process.status = models.StreamProcess.STREAM_STATUS_RUNNING
106 | else:
107 | self.process.status = models.StreamProcess.STREAM_STATUS_WAITING
108 |
109 | self.process.heartbeat()
110 |
111 | return set([t.term for t in filter_terms])
112 |
113 | def ok(self):
114 | return self.error_count < 5
115 |
116 | def error(self, exc):
117 | logger.error(exc)
118 | self.error_count += 1
119 |
120 |
121 | class QueueStreamListener(twitter_monitor.JsonStreamListener):
122 | """
123 | Saves tweets in a queue for later insertion into database
124 | when process_tweet_batch() is called.
125 |
126 | Note that this is operated by the streaming thread.
127 | """
128 |
129 | def __init__(self, api=None, to_file=None):
130 | """
131 | Listens for tweets from Tweepy and saves them in the database
132 | when process_tweet_queue() is called (in a separate thread, probably).
133 |
134 | If to_file is given, tweets are written to the file instead.
135 | JSON formatted, one per line.
136 | """
137 | super(QueueStreamListener, self).__init__(api)
138 |
139 | self.terminate = False
140 |
141 | # A place to put the tweets
142 | self.queue = TweetQueue()
143 |
144 | # For calculating tweets / sec
145 | self.time = time.time()
146 |
147 | # Place for saving tweets if not in the database.
148 | self.to_file = to_file
149 | self._output_file = None
150 |
151 | def on_status(self, status):
152 | # construct a Tweet object from the raw status object.
153 | self.queue.put_nowait(status)
154 |
155 | # If terminate gets set, this should take out the tweepy stream thread
156 | return not self.terminate
157 |
158 | def process_tweet_queue(self):
159 | """
160 | Inserts any queued tweets into the database.
161 |
162 | It is ok for this to be called on a thread other than the streaming thread.
163 | """
164 |
165 | # this is for calculating the tps rate
166 | now = time.time()
167 | diff = now - self.time
168 | self.time = now
169 |
170 | try:
171 | batch = self.queue.get_all_nowait()
172 | except queue.Empty:
173 | return 0
174 |
175 | if len(batch) == 0:
176 | return 0
177 |
178 | Tweet = load_model("twitter_stream", "Tweet")
179 |
180 | tweets = []
181 | for status in batch:
182 | if settings.CAPTURE_EMBEDDED and 'retweeted_status' in status:
183 | if self.to_file:
184 | tweets.append(json.dumps(status['retweeted_status']))
185 | else:
186 | try:
187 | retweeted = Tweet.create_from_json(status['retweeted_status'])
188 | if retweeted is not None:
189 | tweets.append(retweeted)
190 | except:
191 | logger.error("Failed to parse retweeted %s" % status['retweeted_status']['id_str'], exc_info=True)
192 |
193 | if self.to_file:
194 | if 'retweeted_status' in status:
195 | del status['retweeted_status']
196 |
197 | tweets.append(json.dumps(status))
198 | else:
199 | try:
200 | tweet = Tweet.create_from_json(status)
201 | if tweet is not None:
202 | tweets.append(tweet)
203 | except:
204 | logger.error("Failed to parse tweet %s" % status['id_str'], exc_info=True)
205 |
206 | if tweets:
207 | if self.to_file:
208 | if not self._output_file or self._output_file.closed:
209 | self._output_file = open(self.to_file, 'ab')
210 | self._output_file.write("\n".join(tweets) + "\n")
211 | self._output_file.flush()
212 | logger.info("Dumped %s tweets at %s tps to %s" % (len(tweets), len(tweets) / diff, self.to_file))
213 | else:
214 | Tweet.objects.bulk_create(tweets, settings.INSERT_BATCH_SIZE)
215 | logger.info("Inserted %s tweets at %s tps" % (len(tweets), len(tweets) / diff))
216 | else:
217 | logger.info("Saved 0 tweets")
218 |
219 | if settings.DEBUG:
220 | # Prevent apparent memory leaks
221 | # https://docs.djangoproject.com/en/dev/faq/models/#why-is-django-leaking-memory
222 | from django import db
223 | db.reset_queries()
224 |
225 | return len(tweets) / diff
226 |
227 | def set_terminate(self):
228 | self.terminate = True
229 |
--------------------------------------------------------------------------------
/twitter_stream/management/commands/stream.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from optparse import make_option
3 | from logging.config import dictConfig
4 | import time
5 | import signal
6 | from django.core.exceptions import ObjectDoesNotExist
7 |
8 | from django.core.management.base import BaseCommand
9 | import sys
10 | import tweepy
11 | import twitter_monitor
12 | from twitter_stream import models
13 | from twitter_stream import utils
14 | from twitter_stream import settings
15 |
16 |
17 | # Setup logging if not already configured
18 | logger = logging.getLogger(__name__)
19 | if not logger.handlers:
20 | dictConfig({
21 | "version": 1,
22 | "disable_existing_loggers": False,
23 | "handlers": {
24 | "twitter_stream": {
25 | "level": "DEBUG",
26 | "class": "logging.StreamHandler",
27 | },
28 | },
29 | "twitter_stream": {
30 | "handlers": ["twitter_stream"],
31 | "level": "DEBUG"
32 | }
33 | })
34 |
35 |
36 | class Command(BaseCommand):
37 | """
38 | Starts a process that streams data from Twitter.
39 |
40 | Example usage:
41 | python manage.py stream
42 | python manage.py stream --poll-interval 25
43 | python manage.py stream MyCredentialsName
44 | """
45 |
46 | option_list = BaseCommand.option_list + (
47 | make_option(
48 | '--poll-interval',
49 | action='store',
50 | dest='poll_interval',
51 | default=settings.POLL_INTERVAL,
52 | help='Seconds between term updates and tweet inserts.'
53 | ),
54 | make_option(
55 | '--prevent-exit',
56 | action='store_true',
57 | dest='prevent_exit',
58 | default=False,
59 | help='Put the stream in a loop to prevent random termination. Use this if you are not running inside a process management system like supervisord.'
60 | ),
61 | make_option(
62 | '--to-file',
63 | action='store',
64 | dest='to_file',
65 | default=None,
66 | help='Write tweets to the given JSON file instead of the database.'
67 | ),
68 | make_option(
69 | '--from-file',
70 | action='store',
71 | dest='from_file',
72 | default=None,
73 | help='Read tweets from a given file, one JSON tweet per line.'
74 | ),
75 | make_option(
76 | '--from-file-long',
77 | action='store',
78 | dest='from_file_long',
79 | default=None,
80 | help='Read tweets from a given file, where JSON tweets are pretty-printed.'
81 | ),
82 | make_option(
83 | '--rate-limit',
84 | action='store',
85 | dest='rate_limit',
86 | default=None,
87 | type=float,
88 | help='Rate to read in tweets, used ONLY if streaming from a file.'
89 | ),
90 | make_option(
91 | '--limit',
92 | action='store',
93 | dest='limit',
94 | default=None,
95 | type=int,
96 | help='Limit the number of tweets, used ONLY if streaming from a file.'
97 | )
98 | )
99 | args = ''
100 | help = "Starts a streaming connection to Twitter"
101 |
102 | def handle(self, keys_name=settings.DEFAULT_KEYS_NAME, *args, **options):
103 |
104 | # The suggested time between hearbeats
105 | poll_interval = float(options.get('poll_interval', settings.POLL_INTERVAL))
106 | prevent_exit = options.get('prevent_exit', settings.PREVENT_EXIT)
107 | to_file = options.get('to_file', None)
108 | from_file = options.get('from_file', None)
109 | from_file_long = options.get('from_file_long', None)
110 | rate_limit = options.get('rate_limit', 50)
111 | limit = options.get('limit', None)
112 |
113 | if from_file and from_file_long:
114 | logger.error("Cannot use both --from-file and --from-file-long")
115 | exit(1)
116 |
117 | # First expire any old stream process records that have failed
118 | # to report in for a while
119 | timeout_seconds = 3 * poll_interval
120 | models.StreamProcess.expire_timed_out()
121 |
122 | # Create the stream process for tracking ourselves
123 | stream_process = models.StreamProcess.create(
124 | timeout_seconds=timeout_seconds
125 | )
126 |
127 | listener = utils.QueueStreamListener(to_file=to_file)
128 |
129 | if from_file:
130 | checker = utils.FakeTermChecker(queue_listener=listener,
131 | stream_process=stream_process)
132 | else:
133 | checker = utils.FeelsTermChecker(queue_listener=listener,
134 | stream_process=stream_process)
135 |
136 | def stop(signum, frame):
137 | """
138 | Register stream's death and exit.
139 | """
140 |
141 | if stream_process:
142 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED
143 | stream_process.heartbeat()
144 |
145 | # Let the tweet listener know it should be quitting asap
146 | listener.set_terminate()
147 |
148 | logger.error("Terminating")
149 |
150 | raise SystemExit()
151 |
152 | # Installs signal handlers for handling SIGINT and SIGTERM
153 | # gracefully.
154 | signal.signal(signal.SIGINT, stop)
155 | signal.signal(signal.SIGTERM, stop)
156 |
157 | keys = None
158 | if not from_file:
159 | # Only need keys if we are connecting to twitter
160 | while not keys:
161 | try:
162 | keys = models.ApiKey.get_keys(keys_name)
163 | except ObjectDoesNotExist:
164 | if keys_name:
165 | logger.error("Keys for '%s' do not exist in the database. Waiting...", keys_name)
166 | else:
167 | logger.warn("No keys in the database. Waiting...")
168 |
169 | time.sleep(5)
170 | stream_process.status = models.StreamProcess.STREAM_STATUS_WAITING
171 | stream_process.heartbeat()
172 |
173 | try:
174 | if keys:
175 | logger.info("Connecting to Twitter with keys for %s/%s", keys.user_name, keys.app_name)
176 | stream_process.keys = keys
177 | stream_process.save()
178 |
179 | # Only need auth if we have keys (i.e. connecting to twitter)
180 | auth = tweepy.OAuthHandler(keys.api_key, keys.api_secret)
181 | auth.set_access_token(keys.access_token, keys.access_token_secret)
182 |
183 | # Start and maintain the streaming connection...
184 | stream = twitter_monitor.DynamicTwitterStream(auth, listener, checker)
185 |
186 | elif from_file or from_file_long:
187 |
188 | read_pretty = False
189 | if from_file_long:
190 | from_file = from_file
191 | read_pretty = True
192 |
193 | if from_file == '-':
194 | from_file = sys.stdin
195 | logger.info("Reading tweets from stdin")
196 | else:
197 | if read_pretty:
198 | logger.info("Reading tweets from JSON file %s (pretty-printed)", from_file)
199 | else:
200 | logger.info("Reading tweets from JSON file %s", from_file)
201 |
202 | stream = utils.FakeTwitterStream(from_file, pretty=read_pretty,
203 | listener=listener, term_checker=checker,
204 | limit=limit, rate_limit=rate_limit)
205 | else:
206 | raise Exception("No api keys and we're not streaming from a file.")
207 |
208 | if to_file:
209 | logger.info("Saving tweets to %s", to_file)
210 |
211 | if prevent_exit:
212 | while checker.ok():
213 | try:
214 | stream.start_polling(poll_interval)
215 | except Exception as e:
216 | checker.error(e)
217 | time.sleep(1) # to avoid craziness
218 | else:
219 | stream.start_polling(poll_interval)
220 |
221 | logger.error("Stopping because of excess errors")
222 | stream_process.status = models.StreamProcess.STREAM_STATUS_STOPPED
223 | stream_process.heartbeat()
224 |
225 | except Exception as e:
226 | logger.error(e, exc_info=True)
227 |
228 | finally:
229 | stop(None, None)
230 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Django Twitter Stream
2 | =====================
3 |
4 | A Django app for streaming tweets from the Twitter API into a database.
5 |
6 | You can start a streaming process which will insert
7 | Tweets into the database as they are delivered
8 | by Twitter. The process monitors a table of "filter terms" which
9 | you can update over time if you want.
10 |
11 | This app uses the [tweepy](http://github.com/tweepy/tweepy) library
12 | for connecting to the Twitter API.
13 |
14 |
15 | Installation
16 | ------------
17 |
18 | Install with pip:
19 |
20 | ```bash
21 | pip install -e git+https://github.com/michaelbrooks/django-twitter-stream.git#egg=django-twitter-stream
22 | ```
23 |
24 | Add to `INSTALLED_APPS` in your Django settings file:
25 |
26 | ```python
27 | INSTALLED_APPS = (
28 | # other apps
29 | "twitter_stream",
30 | )
31 | ```
32 |
33 | > If you are using MySQL, you need to make sure that your database
34 | is uses the `utf8mb4` character set for storing tweets, since MySQL's `utf8`
35 | character set does not include support for 4-byte characters.
36 | Add the following to you database settings:
37 |
38 | ```python
39 | DATABASES = {
40 | 'default': {
41 | 'ENGINE': 'django.db.backends.mysql',
42 | # username, password, etc...
43 | 'OPTIONS': {
44 | 'charset': 'utf8mb4',
45 | },
46 | }
47 | }
48 | ```
49 |
50 | Run `python manage.py syncdb` to update your database.
51 | This project also supports migrations with [South](http://south.aeracode.org/).
52 | If you are using South in your project, you should run `python manage.py migrate`.
53 |
54 | You need to supply your Twitter API keys and set up some filter terms
55 | before you can stream tweets. Instructions for this follow.
56 |
57 |
58 | ### Provide Twitter API Keys
59 |
60 | Once you have added `twitter_stream` to your list of installed apps,
61 | the Django Admin page should include a section for the `ApiKey` model.
62 | You can use this to input your Twitter API keys.
63 |
64 | If you do not have Twitter API keys, you must sign in to the
65 | [Twitter Developers site](http://dev.twitter.com). Next, go to
66 | your [applications list](https://dev.twitter.com/apps). If you do
67 | not have an application already, create one.
68 | Once you have created an application, go to the "API Keys" area,
69 | scroll to the bottom, and click the button to generate access keys for your account.
70 | This can take a few minutes to complete.
71 |
72 | Once you have an application and access keys for your account,
73 | you can copy the necessary values into a new ApiKey entry.
74 | This includes the "API key" and "API secret", located at the
75 | top of your application keys page, and
76 | the "Access Token" and "Access Token Secret", located at
77 | the bottom of your application keys page.
78 |
79 |
80 | ### Customize the Filter Terms
81 |
82 | Currently, this package uses the `filter` endpoint of the
83 | Twitter Streaming API ([more info](https://dev.twitter.com/docs/streaming-apis/streams/public)).
84 | This endpoint accepts a set of tracking terms. Any tweets matching these terms
85 | will be delivered to you as they are created (approximately).
86 | The precise behavior of term filtering is described [here](https://dev.twitter.com/docs/streaming-apis/parameters#track).
87 |
88 | This package defines a FilterTerm model. You can add filter
89 | terms to this table through the Django Admin interface,
90 | or through code. When you change the terms in the database,
91 | the stream will briefly shut itself down and then restart
92 | with the new list.
93 |
94 | If there are no terms in your database, the connection to Twitter will be
95 | closed until some terms are available. Note that connecting to the unfiltered
96 | public stream is not yet supported.
97 |
98 | Due to Twitter's rate limit, the Streaming API appears to return
99 | all of the tweets matching your filter terms *up to* around 1%
100 | of the total volume on Twitter at the present moment.
101 | In my experience, you will get at most around 50 or 60 tweets per second.
102 |
103 |
104 | Start the Streaming Process
105 | ---------------------------
106 |
107 | To start the streaming process, use the `stream` management command:
108 |
109 | ```bash
110 | $ python manage.py stream
111 | ```
112 |
113 | This will connect to Twitter using API keys and tracking terms from your database.
114 |
115 | If you have stored multiple API keys in your database, you may select a particular
116 | set of API keys by name as an argument to this command:
117 |
118 | You may also choose the rate at which the database will be polled for changes
119 | to the filter terms. This is also the interval at which tweets will be batch-inserted
120 | into your database, so don't set it too long. The default is 10 seconds.
121 |
122 | ```bash
123 | $ python manage.py stream MyAPIKeys --poll-interval 30
124 | ```
125 |
126 | > *Warning*: Twitter does not allow an account to open more than one streaming
127 | connection at a time. If you repeatedly try to open too many streaming connections,
128 | there may be repercussions. If you start receiving disconnect errors from Twitter,
129 | take a break for a few minutes before trying to reconnect.
130 |
131 | If you need to take your database offline for some reason or just want to stream
132 | tweets to a file instead, you can use the `--to-file` option:
133 |
134 | ```bash
135 | $ python manage.py stream --to-file some_file.json
136 | ```
137 |
138 | This will append tweets, in JSON format, one-per-line, to "some_file.json".
139 | If you are capturing retweets, they will be separated out onto separate lines.
140 | If you are not, they will be removed from the JSON objects before being printed.
141 |
142 | You may also configure the stream to read from a file (or stdin with '-'):
143 |
144 | ```bash
145 | $ python manage.py stream --from-file some_file.json
146 | $ python manage.py stream --from-file -
147 | ```
148 |
149 | Settings
150 | --------
151 |
152 | Settings for this app can be configured by adding the `TWITTER_STREAM_SETTINGS` to your
153 | Django settings file. Below are the default settings:
154 |
155 | ```python
156 | TWITTER_STREAM_SETTINGS = {
157 |
158 | # Set to True to save embedded retweeted_status tweets. Normally these are discarded.
159 | 'CAPTURE_EMBEDDED': False,
160 |
161 | # Change the default term track and tweet insert interval
162 | 'POLL_INTERVAL': 10,
163 |
164 | # The name of the default keys to use for streaming. If not set, we'll just grab one.
165 | 'DEFAULT_KEYS_NAME': None,
166 |
167 | # Put the stream in a loop so random termination will be prevented.
168 | 'PREVENT_EXIT': False,
169 | }
170 | ```
171 |
172 | Status Page
173 | -----------
174 |
175 | This app provides a status page that shows how the Twitter stream is doing.
176 | Just add something like this to your url conf:
177 |
178 | ```python
179 | url(r'^stream/', include('twitter_stream.urls', namespace="twitter_stream")),
180 | ```
181 |
182 | For the twitter stream views to work, you'll need to add this to your `INSTALLED_APPS`:
183 | ```python
184 | INSTALLED_APPS = (
185 | # other apps
186 | 'django.contrib.humanize',
187 | 'bootstrap3',
188 | 'jsonview',
189 | )
190 | ```
191 |
192 | Custom Tweet Classes
193 | --------------------
194 |
195 | It is possible to swap the provided Tweet class for your own, so that you
196 | can add other fields or whatever.
197 | To do this, in the models.py file for your app (which we will call 'myapp' in this example),
198 | add a class that extends `AbstractTweet`:
199 |
200 | ```python
201 | from twitter_stream.models import AbstractTweet
202 | class MyTweet(AbstractTweet):
203 | """ add whatever here... """
204 | ```
205 |
206 | Then, add this to your settings file:
207 | ```python
208 | TWITTER_STREAM_TWEET_MODEL = 'myapp.MyTweet'
209 | ```
210 |
211 | This is facilitated by the [django-swappable-models](https://github.com/wq/django-swappable-models) package.
212 |
213 | Anywhere you were previously hard-importing the Tweet model,
214 | you will need to replace it with something like this:
215 |
216 | ```python
217 | from swapper import load_model
218 | Tweet = load_model('twitter_stream', 'Tweet')
219 | ```
220 |
221 | This will load either the original Tweet model or the swapped model
222 | as appropriate. You can also load your `MyTweet` model directly, of course.
223 |
224 | For creating foreign keys pointing to Tweet (or the swapped model)
225 | you can use `swapper.get_model_name('twitter_stream', 'Tweet')`.
226 |
227 | If you are using South migrations and need to migrate from the old Tweet model
228 | to your new model, [this tutorial](http://www.caktusgroup.com/blog/2013/08/07/migrating-custom-user-model-django/)
229 | explains the issues. The basic idea is to do it in these steps:
230 |
231 | 1. Create your new model and change your model loading throughout (i.e. use `load_model`),
232 | but don't set the `TWITTER_STREAM_TWEET_MODEL` to actually swap it out yet.
233 | 2. Create a normal schema migration on `myapp` to make the database table for
234 | your new model. Run the migration.
235 | 3. Write a data migration that copies data from the old `twitter_stream_tweets` table to your new table.
236 | Run the data migration.
237 | 4. Trick South into creating a migration for you that you can use to delete the old table with the `SOUTH_MIGRATION_MODULES` setting.
238 | This step may need adaptation to work with `django-twitter-stream` since it was designed for the migration-less
239 | `django.contrib.auth` app.
240 | 5. Finally, swap the models with the `TWITTER_STREAM_TWEET_MODEL` setting.
241 | 6. Generate new schema migrations for any apps with foreign keys that reference the Tweet model.
242 | 7. Move your stub migration that deletes the twitter_stream_tweets table into your app's migration queue.
243 | 8. Run all the remaining migrations.
244 |
245 | Streaming From a File
246 | ---------------------
247 |
248 | There is also a `stream_from_file` command provided which can parse
249 | a file containing already collected tweets. This can be handy for debugging.
250 | This feature is deprecated. The `stream` command now provides this functionality.
251 |
252 |
253 | Questions and Contributing
254 | --------------------------
255 |
256 | Feel free to post questions and problems on the issue tracker. Pull requests welcome!
257 |
--------------------------------------------------------------------------------
/twitter_stream/migrations/0001_initial.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from south.utils import datetime_utils as datetime
3 | from south.db import db
4 | from south.v2 import SchemaMigration
5 | from django.db import models
6 |
7 |
8 | class Migration(SchemaMigration):
9 |
10 | def forwards(self, orm):
11 | # Adding model 'ApiKey'
12 | db.create_table(u'twitter_stream_apikey', (
13 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
14 | ('created_at', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)),
15 | ('name', self.gf('django.db.models.fields.CharField')(max_length=250)),
16 | ('email', self.gf('django.db.models.fields.EmailField')(default=None, max_length=75, blank=True)),
17 | ('api_key', self.gf('django.db.models.fields.CharField')(max_length=250)),
18 | ('api_secret', self.gf('django.db.models.fields.CharField')(max_length=250)),
19 | ('access_token', self.gf('django.db.models.fields.CharField')(max_length=250)),
20 | ('access_token_secret', self.gf('django.db.models.fields.CharField')(max_length=250)),
21 | ))
22 | db.send_create_signal(u'twitter_stream', ['ApiKey'])
23 |
24 | # Adding model 'StreamProcess'
25 | db.create_table(u'twitter_stream_streamprocess', (
26 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
27 | ('created_at', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)),
28 | ('timeout_seconds', self.gf('django.db.models.fields.PositiveIntegerField')()),
29 | ('expires_at', self.gf('django.db.models.fields.DateTimeField')()),
30 | ('last_heartbeat', self.gf('django.db.models.fields.DateTimeField')()),
31 | ('keys', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['twitter_stream.ApiKey'], null=True)),
32 | ('hostname', self.gf('django.db.models.fields.CharField')(max_length=250)),
33 | ('process_id', self.gf('django.db.models.fields.PositiveIntegerField')()),
34 | ('status', self.gf('django.db.models.fields.CharField')(default='WAITING', max_length=10)),
35 | ('tweet_rate', self.gf('django.db.models.fields.FloatField')(default=0)),
36 | ('error_count', self.gf('django.db.models.fields.PositiveSmallIntegerField')(default=0)),
37 | ))
38 | db.send_create_signal(u'twitter_stream', ['StreamProcess'])
39 |
40 | # Adding model 'Tweet'
41 | db.create_table(u'twitter_stream_tweet', (
42 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
43 | ('tweet_id', self.gf('django.db.models.fields.BigIntegerField')()),
44 | ('text', self.gf('django.db.models.fields.CharField')(max_length=250)),
45 | ('truncated', self.gf('django.db.models.fields.BooleanField')()),
46 | ('lang', self.gf('django.db.models.fields.CharField')(default=None, max_length=9, null=True, blank=True)),
47 | ('user_id', self.gf('django.db.models.fields.BigIntegerField')()),
48 | ('user_screen_name', self.gf('django.db.models.fields.CharField')(max_length=50)),
49 | ('user_name', self.gf('django.db.models.fields.CharField')(max_length=150)),
50 | ('user_verified', self.gf('django.db.models.fields.BooleanField')()),
51 | ('created_at', self.gf('django.db.models.fields.DateTimeField')()),
52 | ('user_utc_offset', self.gf('django.db.models.fields.IntegerField')(default=None, null=True, blank=True)),
53 | ('user_time_zone', self.gf('django.db.models.fields.CharField')(default=None, max_length=150, null=True, blank=True)),
54 | ('filter_level', self.gf('django.db.models.fields.CharField')(default=None, max_length=6, null=True, blank=True)),
55 | ('latitude', self.gf('django.db.models.fields.FloatField')(default=None, null=True, blank=True)),
56 | ('longitude', self.gf('django.db.models.fields.FloatField')(default=None, null=True, blank=True)),
57 | ('user_geo_enabled', self.gf('django.db.models.fields.BooleanField')(default=False)),
58 | ('user_location', self.gf('django.db.models.fields.CharField')(default=None, max_length=150, null=True, blank=True)),
59 | ('favorite_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)),
60 | ('retweet_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)),
61 | ('user_followers_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)),
62 | ('user_friends_count', self.gf('django.db.models.fields.PositiveIntegerField')(null=True, blank=True)),
63 | ('in_reply_to_status_id', self.gf('django.db.models.fields.BigIntegerField')(default=None, null=True, blank=True)),
64 | ('retweeted_status_id', self.gf('django.db.models.fields.BigIntegerField')(default=None, null=True, blank=True)),
65 | ('analyzed_by', self.gf('django.db.models.fields.SmallIntegerField')(default=0)),
66 | ))
67 | db.send_create_signal(u'twitter_stream', ['Tweet'])
68 |
69 | # Adding model 'FilterTerm'
70 | db.create_table(u'twitter_stream_filterterm', (
71 | (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
72 | ('created_at', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)),
73 | ('term', self.gf('django.db.models.fields.CharField')(max_length=250)),
74 | ('enabled', self.gf('django.db.models.fields.BooleanField')(default=True)),
75 | ))
76 | db.send_create_signal(u'twitter_stream', ['FilterTerm'])
77 |
78 |
79 | def backwards(self, orm):
80 | # Deleting model 'ApiKey'
81 | db.delete_table(u'twitter_stream_apikey')
82 |
83 | # Deleting model 'StreamProcess'
84 | db.delete_table(u'twitter_stream_streamprocess')
85 |
86 | # Deleting model 'Tweet'
87 | db.delete_table(u'twitter_stream_tweet')
88 |
89 | # Deleting model 'FilterTerm'
90 | db.delete_table(u'twitter_stream_filterterm')
91 |
92 |
93 | models = {
94 | u'twitter_stream.apikey': {
95 | 'Meta': {'object_name': 'ApiKey'},
96 | 'access_token': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
97 | 'access_token_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
98 | 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
99 | 'api_secret': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
100 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
101 | 'email': ('django.db.models.fields.EmailField', [], {'default': 'None', 'max_length': '75', 'blank': 'True'}),
102 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
103 | 'name': ('django.db.models.fields.CharField', [], {'max_length': '250'})
104 | },
105 | u'twitter_stream.filterterm': {
106 | 'Meta': {'object_name': 'FilterTerm'},
107 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
108 | 'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
109 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
110 | 'term': ('django.db.models.fields.CharField', [], {'max_length': '250'})
111 | },
112 | u'twitter_stream.streamprocess': {
113 | 'Meta': {'object_name': 'StreamProcess'},
114 | 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
115 | 'error_count': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '0'}),
116 | 'expires_at': ('django.db.models.fields.DateTimeField', [], {}),
117 | 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
118 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
119 | 'keys': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['twitter_stream.ApiKey']", 'null': 'True'}),
120 | 'last_heartbeat': ('django.db.models.fields.DateTimeField', [], {}),
121 | 'process_id': ('django.db.models.fields.PositiveIntegerField', [], {}),
122 | 'status': ('django.db.models.fields.CharField', [], {'default': "'WAITING'", 'max_length': '10'}),
123 | 'timeout_seconds': ('django.db.models.fields.PositiveIntegerField', [], {}),
124 | 'tweet_rate': ('django.db.models.fields.FloatField', [], {'default': '0'})
125 | },
126 | u'twitter_stream.tweet': {
127 | 'Meta': {'object_name': 'Tweet'},
128 | 'analyzed_by': ('django.db.models.fields.SmallIntegerField', [], {'default': '0'}),
129 | 'created_at': ('django.db.models.fields.DateTimeField', [], {}),
130 | 'favorite_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
131 | 'filter_level': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '6', 'null': 'True', 'blank': 'True'}),
132 | u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
133 | 'in_reply_to_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
134 | 'lang': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '9', 'null': 'True', 'blank': 'True'}),
135 | 'latitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
136 | 'longitude': ('django.db.models.fields.FloatField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
137 | 'retweet_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
138 | 'retweeted_status_id': ('django.db.models.fields.BigIntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
139 | 'text': ('django.db.models.fields.CharField', [], {'max_length': '250'}),
140 | 'truncated': ('django.db.models.fields.BooleanField', [], {}),
141 | 'tweet_id': ('django.db.models.fields.BigIntegerField', [], {}),
142 | 'user_followers_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
143 | 'user_friends_count': ('django.db.models.fields.PositiveIntegerField', [], {'null': 'True', 'blank': 'True'}),
144 | 'user_geo_enabled': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
145 | 'user_id': ('django.db.models.fields.BigIntegerField', [], {}),
146 | 'user_location': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
147 | 'user_name': ('django.db.models.fields.CharField', [], {'max_length': '150'}),
148 | 'user_screen_name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
149 | 'user_time_zone': ('django.db.models.fields.CharField', [], {'default': 'None', 'max_length': '150', 'null': 'True', 'blank': 'True'}),
150 | 'user_utc_offset': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
151 | 'user_verified': ('django.db.models.fields.BooleanField', [], {})
152 | }
153 | }
154 |
155 | complete_apps = ['twitter_stream']
--------------------------------------------------------------------------------
/twitter_stream/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models, connection
2 | from django.conf import settings as django_settings
3 | from datetime import datetime, timedelta
4 | from email.utils import parsedate
5 | from django.utils import timezone
6 | import os
7 | import socket
8 | from . import settings
9 | from django.core.exceptions import ObjectDoesNotExist
10 | from swapper import swappable_setting
11 | from . import fields
12 |
13 | current_timezone = timezone.get_current_timezone()
14 |
15 | def parse_datetime(string):
16 | if settings.USE_TZ:
17 | return datetime(*(parsedate(string)[:6]), tzinfo=current_timezone)
18 | else:
19 | return datetime(*(parsedate(string)[:6]))
20 |
21 | class ApiKey(models.Model):
22 | """
23 | Keys for accessing the Twitter Streaming API.
24 | """
25 |
26 | created_at = models.DateTimeField(auto_now_add=True)
27 |
28 | user_name = models.CharField(max_length=250)
29 | app_name = models.CharField(max_length=250)
30 | email = models.EmailField(default=None, blank=True)
31 |
32 | api_key = models.CharField(max_length=250)
33 | api_secret = models.CharField(max_length=250)
34 |
35 | access_token = models.CharField(max_length=250)
36 | access_token_secret = models.CharField(max_length=250)
37 |
38 | def __unicode__(self):
39 | return "%s/%s" % (self.user_name, self.app_name)
40 |
41 | @classmethod
42 | def get_keys(cls, keys_name):
43 | if keys_name:
44 | keys = ApiKey.objects.get(user_name=keys_name)
45 | else:
46 | keys = ApiKey.objects.first()
47 |
48 | if not keys:
49 | raise ObjectDoesNotExist("Unknown keys %s" % keys_name)
50 |
51 | return keys
52 |
53 | class StreamProcess(models.Model):
54 | """
55 | Tracks information about the stream process in the database.
56 | """
57 |
58 | created_at = models.DateTimeField(auto_now_add=True)
59 | timeout_seconds = models.PositiveIntegerField()
60 | expires_at = models.DateTimeField()
61 | last_heartbeat = models.DateTimeField()
62 |
63 | keys = models.ForeignKey(ApiKey, null=True)
64 | hostname = models.CharField(max_length=250)
65 | process_id = models.PositiveIntegerField()
66 | memory_usage = models.CharField(max_length=30, default=None, null=True, blank=True)
67 |
68 | STREAM_STATUS_RUNNING = "RUNNING"
69 | STREAM_STATUS_WAITING = "WAITING" # No terms currently being tracked
70 | STREAM_STATUS_STOPPED = "STOPPED"
71 | status = models.CharField(max_length=10,
72 | choices=(
73 | (STREAM_STATUS_RUNNING, "Running"),
74 | (STREAM_STATUS_WAITING, "Waiting"),
75 | (STREAM_STATUS_STOPPED, "Stopped")
76 | ),
77 | default=STREAM_STATUS_WAITING)
78 |
79 | tweet_rate = models.FloatField(default=0)
80 | error_count = models.PositiveSmallIntegerField(default=0)
81 |
82 | @property
83 | def lifetime(self):
84 | """Get the age of the streaming process"""
85 | return self.last_heartbeat - self.created_at
86 |
87 | def get_memory_usage(self):
88 | try:
89 | import resource
90 | except ImportError:
91 | return "Unknown"
92 |
93 | kb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
94 | return "%.1f MB" % (0.0009765625 * kb)
95 |
96 | def heartbeat(self, save=True):
97 | self.last_heartbeat = timezone.now()
98 | self.expires_at = self.last_heartbeat + timedelta(seconds=self.timeout_seconds)
99 |
100 | if settings.MONITOR_PERFORMANCE:
101 | self.memory_usage = self.get_memory_usage()
102 |
103 | if save:
104 | self.save()
105 |
106 | def __unicode__(self):
107 | return "%s:%d %s (%s)" % (self.hostname, self.process_id, self.status, self.lifetime)
108 |
109 | @classmethod
110 | def create(cls, timeout_seconds):
111 | now = timezone.now()
112 | expires_at = now + timedelta(seconds=timeout_seconds)
113 | return StreamProcess(
114 | process_id=os.getpid(),
115 | hostname=socket.gethostname(),
116 | last_heartbeat=now,
117 | expires_at=expires_at,
118 | timeout_seconds=timeout_seconds
119 | )
120 |
121 | @classmethod
122 | def get_current_stream_processes(cls, minutes_ago=10):
123 |
124 | # some maintenance
125 | cls.expire_timed_out()
126 |
127 | minutes_ago_dt = timezone.now() - timedelta(minutes=minutes_ago)
128 | return StreamProcess.objects \
129 | .filter(last_heartbeat__gt=minutes_ago_dt) \
130 | .order_by('-last_heartbeat')
131 |
132 |
133 | @classmethod
134 | def expire_timed_out(cls):
135 | StreamProcess.objects \
136 | .filter(expires_at__lt=timezone.now()) \
137 | .update(status=StreamProcess.STREAM_STATUS_STOPPED)
138 |
139 |
140 | class AbstractTweet(models.Model):
141 | """
142 | Selected fields from a Twitter Status object.
143 | Incorporates several fields from the associated User object.
144 |
145 | For details see https://dev.twitter.com/docs/platform-objects/tweets
146 |
147 | Note that we are not using tweet_id as a primary key -- this application
148 | does not enforce integrity w/ regard to individual tweets.
149 | We just add them to the database as they come in, even if we've seen
150 | them before.
151 | """
152 |
153 | class Meta:
154 | abstract = True
155 |
156 | id = fields.PositiveBigAutoField(primary_key=True)
157 |
158 | # Basic tweet info
159 | tweet_id = models.BigIntegerField()
160 | text = models.CharField(max_length=250)
161 | truncated = models.BooleanField(default=False)
162 | lang = models.CharField(max_length=9, null=True, blank=True, default=None)
163 |
164 | # Basic user info
165 | user_id = models.BigIntegerField()
166 | user_screen_name = models.CharField(max_length=50)
167 | user_name = models.CharField(max_length=150)
168 | user_verified = models.BooleanField(default=False)
169 |
170 | # Timing parameters
171 | created_at = models.DateTimeField(db_index=True) # should be UTC
172 | user_utc_offset = models.IntegerField(null=True, blank=True, default=None)
173 | user_time_zone = models.CharField(max_length=150, null=True, blank=True, default=None)
174 |
175 | # none, low, or medium
176 | filter_level = models.CharField(max_length=6, null=True, blank=True, default=None)
177 |
178 | # Geo parameters
179 | latitude = models.FloatField(null=True, blank=True, default=None)
180 | longitude = models.FloatField(null=True, blank=True, default=None)
181 | user_geo_enabled = models.BooleanField(default=False)
182 | user_location = models.CharField(max_length=150, null=True, blank=True, default=None)
183 |
184 | # Engagement - not likely to be very useful for streamed tweets but whatever
185 | favorite_count = models.PositiveIntegerField(null=True, blank=True)
186 | retweet_count = models.PositiveIntegerField(null=True, blank=True)
187 | user_followers_count = models.PositiveIntegerField(null=True, blank=True)
188 | user_friends_count = models.PositiveIntegerField(null=True, blank=True)
189 |
190 | # Relation to other tweets
191 | in_reply_to_status_id = models.BigIntegerField(null=True, blank=True, default=None)
192 | retweeted_status_id = models.BigIntegerField(null=True, blank=True, default=None)
193 |
194 | @property
195 | def is_retweet(self):
196 | return self.retweeted_status_id is not None
197 |
198 | @classmethod
199 | def create_from_json(cls, raw):
200 | """
201 | Given a *parsed* json status object, construct a new Tweet model.
202 | """
203 |
204 | user = raw['user']
205 | retweeted_status = raw.get('retweeted_status')
206 | if retweeted_status is None:
207 | retweeted_status = {'id': None}
208 |
209 | # The "coordinates" entry looks like this:
210 | #
211 | # "coordinates":
212 | # {
213 | # "coordinates":
214 | # [
215 | # -75.14310264,
216 | # 40.05701649
217 | # ],
218 | # "type":"Point"
219 | # }
220 |
221 | coordinates = (None, None)
222 | if raw['coordinates']:
223 | coordinates = raw['coordinates']['coordinates']
224 |
225 | # Replace negative counts with None to indicate missing data
226 | counts = {
227 | 'favorite_count': raw.get('favorite_count'),
228 | 'retweet_count': raw.get('retweet_count'),
229 | 'user_followers_count': user.get('followers_count'),
230 | 'user_friends_count': user.get('friends_count'),
231 | }
232 | for key in counts:
233 | if counts[key] is not None and counts[key] < 0:
234 | counts[key] = None
235 |
236 | return cls(
237 | # Basic tweet info
238 | tweet_id=raw['id'],
239 | text=raw['text'],
240 | truncated=raw['truncated'],
241 | lang=raw.get('lang'),
242 |
243 | # Basic user info
244 | user_id=user['id'],
245 | user_screen_name=user['screen_name'],
246 | user_name=user['name'],
247 | user_verified=user['verified'],
248 |
249 | # Timing parameters
250 | created_at=parse_datetime(raw['created_at']),
251 | user_utc_offset=user.get('utc_offset'),
252 | user_time_zone=user.get('time_zone'),
253 |
254 | # none, low, or medium
255 | filter_level=raw.get('filter_level'),
256 |
257 | # Geo parameters
258 | latitude=coordinates[1],
259 | longitude=coordinates[0],
260 | user_geo_enabled=user.get('geo_enabled'),
261 | user_location=user.get('location'),
262 |
263 | # Engagement - not likely to be very useful for streamed tweets but whatever
264 | favorite_count=counts.get('favorite_count'),
265 | retweet_count=counts.get('retweet_count'),
266 | user_followers_count=counts.get('user_followers_count'),
267 | user_friends_count=counts.get('user_friends_count'),
268 |
269 | # Relation to other tweets
270 | in_reply_to_status_id=raw.get('in_reply_to_status_id'),
271 | retweeted_status_id=retweeted_status['id']
272 | )
273 |
274 | @classmethod
275 | def get_created_in_range(cls, start, end):
276 | """
277 | Returns all the tweets between start and end.
278 | """
279 | return cls.objects.filter(created_at__gte=start, created_at__lt=end)
280 |
281 | @classmethod
282 | def get_earliest_created_at(cls):
283 | """
284 | Returns the earliest created_at time, or None
285 | """
286 | result = cls.objects.aggregate(earliest_created_at=models.Min('created_at'))
287 | return result['earliest_created_at']
288 |
289 | @classmethod
290 | def get_latest_created_at(cls):
291 | """
292 | Returns the latest created_at time, or None
293 | """
294 | result = cls.objects.aggregate(latest_created_at=models.Max('created_at'))
295 | return result['latest_created_at']
296 |
297 | @classmethod
298 | def count_approx(cls):
299 | """
300 | Get the approximate number of tweets.
301 | Executes quickly, even on large InnoDB tables.
302 | """
303 | if django_settings.DATABASES['default']['ENGINE'].endswith('mysql'):
304 | query = "SHOW TABLE STATUS WHERE Name = %s"
305 | cursor = connection.cursor()
306 | cursor.execute(query, [cls._meta.db_table])
307 |
308 | desc = cursor.description
309 | row = cursor.fetchone()
310 | row = dict(zip([col[0].lower() for col in desc], row))
311 |
312 | return int(row['rows'])
313 | else:
314 | return cls.objects.count()
315 |
316 | class Tweet(AbstractTweet):
317 | """
318 | Load this class with swapper.load_model("twitter_stream", "Tweet")
319 | in case it has been swapped out.
320 |
321 | To swap it out for your own class (extending AbstractTweet),
322 | just add this to your settings:
323 | TWITTER_STREAM_TWEET_MODEL = "myapp.MyTweetModel"
324 | """
325 |
326 | class Meta(AbstractTweet.Meta):
327 | swappable = swappable_setting('twitter_stream', 'Tweet')
328 |
329 |
330 | class FilterTerm(models.Model):
331 | created_at = models.DateTimeField(auto_now_add=True)
332 | term = models.CharField(max_length=250)
333 | enabled = models.BooleanField(default=True)
334 |
335 | def __unicode__(self):
336 | return self.term
337 |
--------------------------------------------------------------------------------
/twitter_stream/tests/test_tweet.py:
--------------------------------------------------------------------------------
1 | import json
2 | from datetime import datetime
3 |
4 | from django.test import TestCase
5 | from django.utils import timezone
6 | from twitter_stream import settings
7 | from twitter_stream.models import Tweet
8 |
9 |
10 | class TweetCreateFromJsonTest(TestCase):
11 |
12 | def validate_json(self, tweet_json, correct_data):
13 | """
14 | create_from_json() should return a Tweet object with
15 | the fields set to their proper values.
16 |
17 | Checks that all the fields match up.
18 | The tweet_json is raw JSON text from the Twitter api and documentation,
19 | The correct_data is corresponding manually-extracted data.
20 | """
21 |
22 | raw_tweet = json.loads(tweet_json)
23 | tweet = Tweet.create_from_json(raw_tweet)
24 | self.assertIsInstance(tweet, Tweet)
25 |
26 | # check for model validity
27 | tweet.clean_fields()
28 |
29 | self.assertEqual(tweet.tweet_id, correct_data['tweet_id'], 'tweet_id matches')
30 | self.assertEqual(tweet.text, correct_data['text'], 'text matches')
31 | self.assertEqual(tweet.truncated, correct_data['truncated'], 'truncated matches')
32 | self.assertEqual(tweet.lang, correct_data['lang'], 'lang matches')
33 |
34 | # Basic user info
35 | self.assertEqual(tweet.user_id, correct_data['user_id'], 'user_id matches')
36 | self.assertEqual(tweet.user_screen_name, correct_data['user_screen_name'], 'user_screen_name matches')
37 | self.assertEqual(tweet.user_name, correct_data['user_name'], 'user_name matches')
38 | self.assertEqual(tweet.user_verified, correct_data['user_verified'], 'user_verified matches')
39 |
40 | # Timing parameters
41 | # May need to convert the date depending on timezone settings
42 | if settings.USE_TZ:
43 | correct_data['created_at'] = timezone.make_aware(correct_data['created_at'], timezone.get_current_timezone())
44 | self.assertEqual(tweet.created_at, correct_data['created_at'], 'created_at matches')
45 | self.assertEqual(tweet.user_utc_offset, correct_data['user_utc_offset'], 'user_utc_offset matches')
46 | self.assertEqual(tweet.user_time_zone, correct_data['user_time_zone'], 'user_time_zone matches')
47 |
48 | # none, low, or medium
49 | self.assertEqual(tweet.filter_level, correct_data['filter_level'], 'filter_level matches')
50 |
51 | # Geo parameters
52 | self.assertEqual(tweet.latitude, correct_data['latitude'], 'latitude matches')
53 | self.assertEqual(tweet.longitude, correct_data['longitude'], 'longitude matches')
54 | self.assertEqual(tweet.user_geo_enabled, correct_data['user_geo_enabled'], 'user_geo_enabled matches')
55 | self.assertEqual(tweet.user_location, correct_data['user_location'], 'user_location matches')
56 |
57 | # Engagement - not likely to be very useful for streamed tweets but whatever
58 | self.assertEqual(tweet.favorite_count, correct_data['favorite_count'], 'favorite_count matches')
59 | self.assertEqual(tweet.retweet_count, correct_data['retweet_count'], 'retweet_count matches')
60 | self.assertEqual(tweet.user_followers_count, correct_data['user_followers_count'], 'user_followers_count matches')
61 | self.assertEqual(tweet.user_friends_count, correct_data['user_friends_count'], 'user_friends_count matches')
62 |
63 | # Relation to other tweets
64 | self.assertEqual(tweet.in_reply_to_status_id, correct_data['in_reply_to_status_id'],
65 | 'in_reply_to_status_id matches')
66 | self.assertEqual(tweet.retweeted_status_id, correct_data['retweeted_status_id'], 'retweeted_status_id matches')
67 |
68 | @classmethod
69 | def add_test(cls, name, json, correct_data):
70 | setattr(cls, "test_%s" % name, lambda self: self.validate_json(json, correct_data))
71 |
72 | # This example has lots of stuff that is null
73 | # Example tweet from https://dev.twitter.com/docs/api/1.1/get/statuses/show/%3Aid
74 | TweetCreateFromJsonTest.add_test('null_fields', r"""{
75 | "coordinates": null,
76 | "favorited": false,
77 | "truncated": false,
78 | "created_at": "Wed Jun 06 20:07:10 +0000 2012",
79 | "id_str": "210462857140252672",
80 | "entities": {
81 | "urls": [
82 | {
83 | "expanded_url": "https://dev.twitter.com/terms/display-guidelines",
84 | "url": "https://t.co/Ed4omjYs",
85 | "indices": [
86 | 76,
87 | 97
88 | ],
89 | "display_url": "dev.twitter.com/terms/display-\u2026"
90 | }
91 | ],
92 | "hashtags": [
93 | {
94 | "text": "Twitterbird",
95 | "indices": [
96 | 19,
97 | 31
98 | ]
99 | }
100 | ],
101 | "user_mentions": [
102 |
103 | ]
104 | },
105 | "in_reply_to_user_id_str": null,
106 | "contributors": [
107 | 14927800
108 | ],
109 | "text": "Along with our new #Twitterbird, we've also updated our Display Guidelines: https://t.co/Ed4omjYs ^JC",
110 | "retweet_count": 66,
111 | "in_reply_to_status_id_str": null,
112 | "id": 210462857140252672,
113 | "geo": null,
114 | "retweeted": true,
115 | "possibly_sensitive": false,
116 | "in_reply_to_user_id": null,
117 | "place": null,
118 | "user": {
119 | "profile_sidebar_fill_color": "DDEEF6",
120 | "profile_sidebar_border_color": "C0DEED",
121 | "profile_background_tile": false,
122 | "name": "Twitter API",
123 | "profile_image_url": "http://a0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png",
124 | "created_at": "Wed May 23 06:01:13 +0000 2007",
125 | "location": "San Francisco, CA",
126 | "follow_request_sent": false,
127 | "profile_link_color": "0084B4",
128 | "is_translator": false,
129 | "id_str": "6253282",
130 | "entities": {
131 | "url": {
132 | "urls": [
133 | {
134 | "expanded_url": null,
135 | "url": "http://dev.twitter.com",
136 | "indices": [
137 | 0,
138 | 22
139 | ]
140 | }
141 | ]
142 | },
143 | "description": {
144 | "urls": [
145 |
146 | ]
147 | }
148 | },
149 | "default_profile": true,
150 | "contributors_enabled": true,
151 | "favourites_count": 24,
152 | "url": "http://dev.twitter.com",
153 | "profile_image_url_https": "https://si0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png",
154 | "utc_offset": -28800,
155 | "id": 6253282,
156 | "profile_use_background_image": true,
157 | "listed_count": 10774,
158 | "profile_text_color": "333333",
159 | "lang": "en",
160 | "followers_count": 1212963,
161 | "protected": false,
162 | "notifications": null,
163 | "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme1/bg.png",
164 | "profile_background_color": "C0DEED",
165 | "verified": true,
166 | "geo_enabled": true,
167 | "time_zone": "Pacific Time (US & Canada)",
168 | "description": "The Real Twitter API. I tweet about API changes, service issues and happily answer questions about Twitter and our API. Don't get an answer? It's on my website.",
169 | "default_profile_image": false,
170 | "profile_background_image_url": "http://a0.twimg.com/images/themes/theme1/bg.png",
171 | "statuses_count": 3333,
172 | "friends_count": 31,
173 | "following": true,
174 | "show_all_inline_media": false,
175 | "screen_name": "twitterapi"
176 | },
177 | "in_reply_to_screen_name": null,
178 | "source": "web",
179 | "in_reply_to_status_id": null
180 | }""", {
181 | # Basic tweet info
182 | 'tweet_id': 210462857140252672,
183 | 'text': "Along with our new #Twitterbird, we've also updated "
184 | "our Display Guidelines: https://t.co/Ed4omjYs ^JC",
185 | 'truncated': False,
186 | 'lang': None,
187 |
188 | # Basic user info
189 | 'user_id': 6253282,
190 | 'user_screen_name': 'twitterapi',
191 | 'user_name': 'Twitter API',
192 | 'user_verified': True,
193 |
194 | # Timing parameters
195 | 'created_at': datetime(2012, 6, 6, hour=20, minute=7, second=10, microsecond=0),
196 | 'user_utc_offset': -28800,
197 | 'user_time_zone': "Pacific Time (US & Canada)",
198 |
199 | # none, low, or medium
200 | 'filter_level': None,
201 |
202 | # Geo parameters
203 | 'latitude': None,
204 | 'longitude': None,
205 | 'user_geo_enabled': True,
206 | 'user_location': "San Francisco, CA",
207 |
208 | # Engagement - not likely to be very useful for streamed tweets but whatever
209 | 'favorite_count': None,
210 | 'retweet_count': 66,
211 | 'user_followers_count': 1212963,
212 | 'user_friends_count': 31,
213 |
214 | 'in_reply_to_status_id': None,
215 | 'retweeted_status_id': None
216 | })
217 |
218 | # A captured tweet (anonymized)
219 | # This example has location data
220 | TweetCreateFromJsonTest.add_test('location_data', r"""{
221 | "contributors": null,
222 | "coordinates": {
223 | "coordinates": [
224 | -118.722583202,
225 | 34.983424651
226 | ],
227 | "type": "Point"
228 | },
229 | "created_at": "Tue Feb 11 18:43:27 +0000 2014",
230 | "entities": {
231 | "hashtags": [],
232 | "symbols": [],
233 | "urls": [],
234 | "user_mentions": []
235 | },
236 | "favorite_count": 0,
237 | "favorited": false,
238 | "filter_level": "medium",
239 | "geo": {
240 | "coordinates": [
241 | 34.983424651,
242 | -118.722583202
243 | ],
244 | "type": "Point"
245 | },
246 | "id": 458121938375806432,
247 | "id_str": "458121938375806432",
248 | "in_reply_to_screen_name": null,
249 | "in_reply_to_status_id": null,
250 | "in_reply_to_status_id_str": null,
251 | "in_reply_to_user_id": null,
252 | "in_reply_to_user_id_str": null,
253 | "lang": "en",
254 | "place": {
255 | "attributes": {},
256 | "bounding_box": {
257 | "coordinates": [
258 | [
259 | [
260 | -118.0,
261 | 34.0
262 | ],
263 | [
264 | -118.0,
265 | 34.0
266 | ],
267 | [
268 | -118.0,
269 | 34.0
270 | ],
271 | [
272 | -118.0,
273 | 34.0
274 | ]
275 | ]
276 | ],
277 | "type": "Polygon"
278 | },
279 | "contained_within": [],
280 | "country": "United States",
281 | "country_code": "US",
282 | "full_name": "Place, CA",
283 | "id": "540563418",
284 | "name": "Place",
285 | "place_type": "city",
286 | "url": "https://api.twitter.com/1.1/geo/id/540563418.json"
287 | },
288 | "retweet_count": 0,
289 | "retweeted": false,
290 | "source": "Twitter for iPhone",
291 | "text": "Blah blah blah blah blah blah blah blah!",
292 | "truncated": false,
293 | "user": {
294 | "contributors_enabled": false,
295 | "created_at": "Thu Jul 26 14:02:08 +0000 2012",
296 | "default_profile": true,
297 | "default_profile_image": false,
298 | "description": null,
299 | "favourites_count": 2,
300 | "follow_request_sent": null,
301 | "followers_count": 4,
302 | "following": null,
303 | "friends_count": 13,
304 | "geo_enabled": true,
305 | "id": 687069798,
306 | "id_str": "687069798",
307 | "is_translation_enabled": false,
308 | "is_translator": false,
309 | "lang": "en",
310 | "listed_count": 0,
311 | "location": "",
312 | "name": "some_user_name",
313 | "notifications": null,
314 | "profile_background_color": "C0DEED",
315 | "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
316 | "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
317 | "profile_background_tile": false,
318 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
319 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
320 | "profile_link_color": "0084B4",
321 | "profile_sidebar_border_color": "C0DEED",
322 | "profile_sidebar_fill_color": "DDEEF6",
323 | "profile_text_color": "333333",
324 | "profile_use_background_image": true,
325 | "protected": false,
326 | "screen_name": "some_screen_name",
327 | "statuses_count": 7,
328 | "time_zone": "Pacific Time (US & Canada)",
329 | "url": null,
330 | "utc_offset": null,
331 | "verified": false
332 | }
333 | }""", {
334 | # Basic tweet info
335 | 'tweet_id': 458121938375806432,
336 | 'text': "Blah blah blah blah blah blah blah blah!",
337 | 'truncated': False,
338 | 'lang': "en",
339 |
340 | # Basic user info
341 | 'user_id': 687069798,
342 | 'user_screen_name': 'some_screen_name',
343 | 'user_name': 'some_user_name',
344 | 'user_verified': False,
345 |
346 | # Timing parameters
347 | 'created_at': datetime(2014, 2, 11, hour=18, minute=43, second=27, microsecond=0),
348 | 'user_utc_offset': None,
349 | 'user_time_zone': "Pacific Time (US & Canada)",
350 |
351 | # none, low, or medium
352 | 'filter_level': 'medium',
353 |
354 | # Geo parameters
355 | 'latitude': 34.983424651,
356 | 'longitude': -118.722583202,
357 | 'user_geo_enabled': True,
358 | 'user_location': "",
359 |
360 | # Engagement - not likely to be very useful for streamed tweets but whatever
361 | 'favorite_count': 0,
362 | 'retweet_count': 0,
363 | 'user_followers_count': 4,
364 | 'user_friends_count': 13,
365 |
366 | 'in_reply_to_status_id': None,
367 | 'retweeted_status_id': None
368 | })
369 |
370 | # A captured tweet (anonymized)
371 | # This example is a retweet
372 | TweetCreateFromJsonTest.add_test('retweet', r"""{
373 | "contributors": null,
374 | "coordinates": null,
375 | "created_at": "Tue Feb 11 18:43:27 +0000 2014",
376 | "entities": {
377 | "hashtags": [],
378 | "symbols": [],
379 | "urls": [],
380 | "user_mentions": [
381 | {
382 | "id": 600695731,
383 | "id_str": "600695731",
384 | "indices": [
385 | 3,
386 | 12
387 | ],
388 | "name": "somebody",
389 | "screen_name": "somebody124"
390 | }
391 | ]
392 | },
393 | "favorite_count": 0,
394 | "favorited": false,
395 | "filter_level": "medium",
396 | "geo": null,
397 | "id": 664439253345490274,
398 | "id_str": "664439253345490274",
399 | "in_reply_to_screen_name": null,
400 | "in_reply_to_status_id": null,
401 | "in_reply_to_status_id_str": null,
402 | "in_reply_to_user_id": null,
403 | "in_reply_to_user_id_str": null,
404 | "lang": "en",
405 | "place": null,
406 | "retweet_count": 0,
407 | "retweeted": false,
408 | "retweeted_status": {
409 | "contributors": null,
410 | "coordinates": null,
411 | "created_at": "Tue Feb 11 18:28:05 +0000 2014",
412 | "entities": {
413 | "hashtags": [],
414 | "symbols": [],
415 | "urls": [],
416 | "user_mentions": []
417 | },
418 | "favorite_count": 12,
419 | "favorited": false,
420 | "geo": null,
421 | "id": 552293876248595761,
422 | "id_str": "552293876248595761",
423 | "in_reply_to_screen_name": null,
424 | "in_reply_to_status_id": null,
425 | "in_reply_to_status_id_str": null,
426 | "in_reply_to_user_id": null,
427 | "in_reply_to_user_id_str": null,
428 | "lang": "en",
429 | "place": null,
430 | "retweet_count": 10,
431 | "retweeted": false,
432 | "source": "Twitter for iPhone",
433 | "text": "I am an amazing tweet blah blah blah blah blah blah blah",
434 | "truncated": false,
435 | "user": {
436 | "contributors_enabled": false,
437 | "created_at": "Thu Jan 26 21:45:50 +0000 2012",
438 | "default_profile": false,
439 | "default_profile_image": false,
440 | "description": "my user description goes here",
441 | "favourites_count": 12772,
442 | "follow_request_sent": null,
443 | "followers_count": 5201,
444 | "following": null,
445 | "friends_count": 836,
446 | "geo_enabled": false,
447 | "id": 557753453,
448 | "id_str": "557753453",
449 | "is_translation_enabled": false,
450 | "is_translator": false,
451 | "lang": "en",
452 | "listed_count": 10,
453 | "location": "some place",
454 | "name": "my name",
455 | "notifications": null,
456 | "profile_background_color": "090A0A",
457 | "profile_background_image_url": "http://pbs.twimg.com/profile_background_images/fake_fake_fake.jpeg",
458 | "profile_background_image_url_https": "https://pbs.twimg.com/profile_background_images/fake_fake_fake.jpeg",
459 | "profile_background_tile": true,
460 | "profile_banner_url": "https://pbs.twimg.com/profile_banners/fake_fake_fake",
461 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
462 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
463 | "profile_link_color": "2CC7C7",
464 | "profile_sidebar_border_color": "000000",
465 | "profile_sidebar_fill_color": "E6E4E4",
466 | "profile_text_color": "404040",
467 | "profile_use_background_image": false,
468 | "protected": false,
469 | "screen_name": "my_screen_name",
470 | "statuses_count": 15670,
471 | "time_zone": "Central Time (US & Canada)",
472 | "url": null,
473 | "utc_offset": -21600,
474 | "verified": false
475 | }
476 | },
477 | "source": "Twitter for iPhone",
478 | "text": "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah",
479 | "truncated": false,
480 | "user": {
481 | "contributors_enabled": false,
482 | "created_at": "Fri Nov 13 23:51:33 +0000 2009",
483 | "default_profile": false,
484 | "default_profile_image": false,
485 | "description": "An inspiring quote, #belieber",
486 | "favourites_count": 6009,
487 | "follow_request_sent": null,
488 | "followers_count": 442,
489 | "following": null,
490 | "friends_count": 380,
491 | "geo_enabled": true,
492 | "id": 165087803,
493 | "id_str": "165087803",
494 | "is_translation_enabled": false,
495 | "is_translator": false,
496 | "lang": "en",
497 | "listed_count": 2,
498 | "location": "",
499 | "name": "My Real Name",
500 | "notifications": null,
501 | "profile_background_color": "642D8B",
502 | "profile_background_image_url": "http://abs.twimg.com/images/themes/theme10/bg.gif",
503 | "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme10/bg.gif",
504 | "profile_background_tile": true,
505 | "profile_banner_url": "https://pbs.twimg.com/profile_banners/fake_fake_fake",
506 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
507 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
508 | "profile_link_color": "FF0000",
509 | "profile_sidebar_border_color": "65B0DA",
510 | "profile_sidebar_fill_color": "7AC3EE",
511 | "profile_text_color": "3D1957",
512 | "profile_use_background_image": true,
513 | "protected": false,
514 | "screen_name": "screen_name",
515 | "statuses_count": 8006,
516 | "time_zone": "Central Time (US & Canada)",
517 | "url": null,
518 | "utc_offset": -21600,
519 | "verified": false
520 | }
521 | }""", {
522 | # Basic tweet info
523 | 'tweet_id': 664439253345490274,
524 | 'text': "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah",
525 | 'truncated': False,
526 | 'lang': "en",
527 |
528 | # Basic user info
529 | 'user_id': 165087803,
530 | 'user_screen_name': "screen_name",
531 | 'user_name': 'My Real Name',
532 | 'user_verified': False,
533 |
534 | # Timing parameters
535 | 'created_at': datetime(2014, 2, 11, hour=18, minute=43, second=27, microsecond=0),
536 | 'user_utc_offset': -21600,
537 | 'user_time_zone': "Central Time (US & Canada)",
538 |
539 | # none, low, or medium
540 | 'filter_level': 'medium',
541 |
542 | # Geo parameters
543 | 'latitude': None,
544 | 'longitude': None,
545 | 'user_geo_enabled': True,
546 | 'user_location': "",
547 |
548 | # Engagement - not likely to be very useful for streamed tweets but whatever
549 | 'favorite_count': 0,
550 | 'retweet_count': 0,
551 | 'user_followers_count': 442,
552 | 'user_friends_count': 380,
553 |
554 | 'in_reply_to_status_id': None,
555 | 'retweeted_status_id': 552293876248595761
556 | })
557 |
558 |
559 | # A captured tweet (anonymized)
560 | # This example has negative counts
561 | # a la https://dev.twitter.com/docs/streaming-apis/processing#Missing_counts
562 | TweetCreateFromJsonTest.add_test('negative_counts', r"""{
563 | "contributors": null,
564 | "coordinates": null,
565 | "created_at": "Tue Feb 11 18:43:27 +0000 2014",
566 | "entities": {
567 | "hashtags": [],
568 | "symbols": [],
569 | "urls": [],
570 | "user_mentions": [
571 | {
572 | "id": 600695731,
573 | "id_str": "600695731",
574 | "indices": [
575 | 3,
576 | 12
577 | ],
578 | "name": "somebody",
579 | "screen_name": "somebody124"
580 | }
581 | ]
582 | },
583 | "favorite_count": -1,
584 | "favorited": false,
585 | "filter_level": "medium",
586 | "geo": null,
587 | "id": 664439253345490274,
588 | "id_str": "664439253345490274",
589 | "in_reply_to_screen_name": null,
590 | "in_reply_to_status_id": null,
591 | "in_reply_to_status_id_str": null,
592 | "in_reply_to_user_id": null,
593 | "in_reply_to_user_id_str": null,
594 | "lang": "en",
595 | "place": null,
596 | "retweet_count": -1,
597 | "retweeted": false,
598 | "retweeted_status": null,
599 | "source": "Twitter for iPhone",
600 | "text": "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah",
601 | "truncated": false,
602 | "user": {
603 | "contributors_enabled": false,
604 | "created_at": "Fri Nov 13 23:51:33 +0000 2009",
605 | "default_profile": false,
606 | "default_profile_image": false,
607 | "description": "An inspiring quote, #belieber",
608 | "favourites_count": -1,
609 | "follow_request_sent": null,
610 | "followers_count": -1,
611 | "following": null,
612 | "friends_count": -1,
613 | "geo_enabled": true,
614 | "id": 165087803,
615 | "id_str": "165087803",
616 | "is_translation_enabled": false,
617 | "is_translator": false,
618 | "lang": "en",
619 | "listed_count": -1,
620 | "location": "",
621 | "name": "My Real Name",
622 | "notifications": null,
623 | "profile_background_color": "642D8B",
624 | "profile_background_image_url": "http://abs.twimg.com/images/themes/theme10/bg.gif",
625 | "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme10/bg.gif",
626 | "profile_background_tile": true,
627 | "profile_banner_url": "https://pbs.twimg.com/profile_banners/fake_fake_fake",
628 | "profile_image_url": "http://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
629 | "profile_image_url_https": "https://pbs.twimg.com/profile_images/fake_fake_fake.jpeg",
630 | "profile_link_color": "FF0000",
631 | "profile_sidebar_border_color": "65B0DA",
632 | "profile_sidebar_fill_color": "7AC3EE",
633 | "profile_text_color": "3D1957",
634 | "profile_use_background_image": true,
635 | "protected": false,
636 | "screen_name": "screen_name",
637 | "statuses_count": -1,
638 | "time_zone": "Central Time (US & Canada)",
639 | "url": null,
640 | "utc_offset": -21600,
641 | "verified": false
642 | }
643 | }""", {
644 | # Basic tweet info
645 | 'tweet_id': 664439253345490274,
646 | 'text': "RT @my_screen_name: I am an amazing tweet blah blah blah blah blah blah blah",
647 | 'truncated': False,
648 | 'lang': "en",
649 |
650 | # Basic user info
651 | 'user_id': 165087803,
652 | 'user_screen_name': "screen_name",
653 | 'user_name': 'My Real Name',
654 | 'user_verified': False,
655 |
656 | # Timing parameters
657 | 'created_at': datetime(2014, 2, 11, hour=18, minute=43, second=27, microsecond=0),
658 | 'user_utc_offset': -21600,
659 | 'user_time_zone': "Central Time (US & Canada)",
660 |
661 | # none, low, or medium
662 | 'filter_level': 'medium',
663 |
664 | # Geo parameters
665 | 'latitude': None,
666 | 'longitude': None,
667 | 'user_geo_enabled': True,
668 | 'user_location': "",
669 |
670 | # Engagement - not likely to be very useful for streamed tweets but whatever
671 | 'favorite_count': None,
672 | 'retweet_count': None,
673 | 'user_followers_count': None,
674 | 'user_friends_count': None,
675 |
676 | 'in_reply_to_status_id': None,
677 | 'retweeted_status_id': None
678 | })
679 |
--------------------------------------------------------------------------------