├── .gitignore ├── LICENSE ├── README.rst ├── bootstrap.py ├── buildout.cfg ├── setup.py └── src └── jellyroll ├── __init__.py ├── admin.py ├── evolutions ├── __init__.py ├── bookmark_urls_max_length.sql ├── codecommit_add_new_revision.sql ├── codecommit_drop_old_revision.sql ├── item_id_to_text.sql ├── item_tags_max_length.sql ├── item_url_max_length.sql ├── photo_add_farm_id.sql └── photo_id_to_string.sql ├── fixtures ├── bookmarks.json ├── codecommits.json ├── initial_data.json ├── photos.json ├── tracks.json ├── videos.json └── websearches.json ├── management ├── __init__.py └── commands │ ├── __init__.py │ └── jellyroll_update.py ├── managers.py ├── models.py ├── providers ├── __init__.py ├── delicious.py ├── flickr.py ├── gitscm.py ├── gsearch.py ├── lastfm.py ├── latitude.py ├── svn.py ├── twitter.py ├── utils │ ├── __init__.py │ └── anyetree.py └── youtube.py ├── templates └── jellyroll │ ├── base.html │ ├── calendar │ ├── day.html │ ├── month.html │ ├── today.html │ └── year.html │ └── snippets │ ├── item.html │ └── item.txt ├── templatetags ├── __init__.py └── jellyroll.py ├── tests ├── __init__.py ├── providers │ ├── __init__.py │ ├── test_delicious.py │ ├── test_flickr.py │ └── test_latitude.py ├── test_items.py ├── test_misc.py ├── test_tags.py └── test_views.py ├── testsettings.py ├── urls ├── __init__.py ├── calendar.py └── tags.py └── views ├── __init__.py ├── calendar.py └── tags.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | .installed.cfg 4 | bin 5 | develop-eggs 6 | dist 7 | downloads 8 | eggs 9 | parts 10 | src/*.egg-info 11 | coverage -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | [This is the new BSD license.] 2 | 3 | Copyright (c) 2010 Jacob Kaplan-Moss. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of this project nor the names of its contributors may 16 | be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Prerequisites 2 | ------------- 3 | 4 | Required by setup.py: 5 | 6 | * django-tagging (0.3pre) 7 | * Django 1.1+ 8 | * PIL 9 | * python-dateutil 10 | * pytz 11 | * httplib2 12 | 13 | Optional 14 | -------- 15 | 16 | * GitPython (for Git support) 17 | * pysvn (for SVN support) 18 | * feedparser (for YouTube support) 19 | 20 | Installation 21 | ------------ 22 | 23 | You need to set up which providers you are going to use, e.g. 24 | 25 | :: 26 | 27 | JELLYROLL_PROVIDERS = ( 28 | 'jellyroll.providers.delicious', 29 | 'jellyroll.providers.flickr', 30 | ) -------------------------------------------------------------------------------- /bootstrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ############################################################################## 3 | # 4 | # Copyright (c) 2006 Zope Corporation and Contributors. 5 | # All Rights Reserved. 6 | # 7 | # This software is subject to the provisions of the Zope Public License, 8 | # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. 9 | # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED 10 | # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 11 | # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS 12 | # FOR A PARTICULAR PURPOSE. 13 | # 14 | ############################################################################## 15 | """Bootstrap a buildout-based project 16 | 17 | Simply run this script in a directory containing a buildout.cfg. 18 | The script accepts buildout command-line options, so you can 19 | use the -c option to specify an alternate configuration file. 20 | 21 | $Id$ 22 | """ 23 | 24 | import os, shutil, sys, tempfile, urllib2 25 | 26 | tmpeggs = tempfile.mkdtemp() 27 | 28 | is_jython = sys.platform.startswith('java') 29 | 30 | try: 31 | import pkg_resources 32 | except ImportError: 33 | ez = {} 34 | exec urllib2.urlopen('http://peak.telecommunity.com/dist/ez_setup.py' 35 | ).read() in ez 36 | ez['use_setuptools'](to_dir=tmpeggs, download_delay=0) 37 | 38 | import pkg_resources 39 | 40 | if sys.platform == 'win32': 41 | def quote(c): 42 | if ' ' in c: 43 | return '"%s"' % c # work around spawn lamosity on windows 44 | else: 45 | return c 46 | else: 47 | def quote (c): 48 | return c 49 | 50 | cmd = 'from setuptools.command.easy_install import main; main()' 51 | ws = pkg_resources.working_set 52 | 53 | if is_jython: 54 | import subprocess 55 | 56 | assert subprocess.Popen([sys.executable] + ['-c', quote(cmd), '-mqNxd', 57 | quote(tmpeggs), 'zc.buildout'], 58 | env=dict(os.environ, 59 | PYTHONPATH= 60 | ws.find(pkg_resources.Requirement.parse('setuptools')).location 61 | ), 62 | ).wait() == 0 63 | 64 | else: 65 | assert os.spawnle( 66 | os.P_WAIT, sys.executable, quote (sys.executable), 67 | '-c', quote (cmd), '-mqNxd', quote (tmpeggs), 'zc.buildout', 68 | dict(os.environ, 69 | PYTHONPATH= 70 | ws.find(pkg_resources.Requirement.parse('setuptools')).location 71 | ), 72 | ) == 0 73 | 74 | ws.add_entry(tmpeggs) 75 | ws.require('zc.buildout') 76 | import zc.buildout.buildout 77 | zc.buildout.buildout.main(sys.argv[1:] + ['bootstrap']) 78 | shutil.rmtree(tmpeggs) 79 | -------------------------------------------------------------------------------- /buildout.cfg: -------------------------------------------------------------------------------- 1 | [buildout] 2 | parts = python django-1.1 django-1.2 coverage 3 | develop = . 4 | eggs = jellyroll 5 | coverage 6 | mock 7 | GitPython 8 | feedparser 9 | httplib2 10 | 11 | [python] 12 | recipe = zc.recipe.egg 13 | interpreter = python 14 | eggs = ${buildout:eggs} 15 | extra-paths = ${coverage:location} 16 | 17 | [coverage] 18 | recipe = iw.recipe.subversion 19 | urls = 20 | http://opensource.55minutes.com/svn/python/trunk/django/apps/test_coverage/@41 test_coverage 21 | 22 | [django-1.1] 23 | recipe = djangorecipe 24 | version = 1.1.2 25 | projectegg = jellyroll 26 | project = jellyroll 27 | settings = testsettings 28 | test = jellyroll 29 | testrunner = test-1.1 30 | eggs = ${buildout:eggs} 31 | extra-paths = ${python:extra-paths} 32 | 33 | [django-1.2] 34 | recipe = djangorecipe 35 | version = 1.2.1 36 | projectegg = jellyroll 37 | project = jellyroll 38 | settings = testsettings 39 | test = jellyroll 40 | testrunner = test-1.2 41 | eggs = ${buildout:eggs} 42 | extra-paths = ${python:extra-paths} -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | def read(fname): 5 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 6 | 7 | setup( 8 | name = "jellyroll", 9 | version = "1.0", 10 | url = 'http://github.com/jacobian/jellyroll', 11 | license = 'BSD', 12 | description = "You keep personal data in all sorts of places on the internets. Jellyroll brings them together onto your own site.", 13 | long_description = read('README.rst'), 14 | 15 | author = 'Jacob Kaplan-Moss', 16 | author_email = 'jacob@jacobian.org', 17 | 18 | packages = find_packages('src'), 19 | package_dir = {'': 'src'}, 20 | 21 | install_requires = [ 22 | 'django-tagging >= 0.3.1, < 0.4', 23 | 'Django >= 1.1', 24 | 'PIL', 25 | 'python-dateutil', 26 | 'pytz == 2009e', 27 | 'setuptools', 28 | ], 29 | 30 | classifiers = [ 31 | 'Development Status :: 4 - Beta', 32 | 'Framework :: Django', 33 | 'Intended Audience :: Developers', 34 | 'License :: OSI Approved :: BSD License', 35 | 'Operating System :: OS Independent', 36 | 'Programming Language :: Python', 37 | 'Topic :: Internet :: WWW/HTTP', 38 | ] 39 | ) -------------------------------------------------------------------------------- /src/jellyroll/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobian/jellyroll/02751b3108b6f6ae732a801d42ca3c85cc759978/src/jellyroll/__init__.py -------------------------------------------------------------------------------- /src/jellyroll/admin.py: -------------------------------------------------------------------------------- 1 | import django.forms 2 | from django.contrib import admin 3 | from jellyroll.models import Item, Bookmark, Track, Photo, WebSearch, Message 4 | from jellyroll.models import WebSearchResult, Video, CodeRepository, CodeCommit 5 | 6 | class ItemAdmin(admin.ModelAdmin): 7 | date_hierarchy = 'timestamp' 8 | list_display = ('timestamp', 'object_str') 9 | list_filter = ('content_type', 'timestamp') 10 | search_fields = ('object_str', 'tags') 11 | 12 | class BookmarkAdmin(admin.ModelAdmin): 13 | list_display = ('url', 'description') 14 | search_fields = ('url', 'description', 'thumbnail') 15 | 16 | class TrackAdmin(admin.ModelAdmin): 17 | list_display = ('track_name', 'artist_name') 18 | search_fields = ('artist_name', 'track_name') 19 | 20 | class PhotoAdmin(admin.ModelAdmin): 21 | list_display = ('title', 'photo_id','description', 'taken_by') 22 | search_fields = ('title', 'description', 'taken_by') 23 | 24 | class WebSearchResultInline(admin.TabularInline): 25 | model = WebSearchResult 26 | 27 | class WebSearchAdmin(admin.ModelAdmin): 28 | list_display = ('query',) 29 | inlines = [WebSearchResultInline] 30 | 31 | class MessageAdmin(admin.ModelAdmin): 32 | list_display = ('message',) 33 | 34 | class WebSearchAdmin(admin.ModelAdmin): 35 | list_display = ('query',) 36 | 37 | class VideoAdmin(admin.ModelAdmin): 38 | list_display = ('title',) 39 | 40 | class CodeRepositoryAdmin(admin.ModelAdmin): 41 | list_display = ('name', 'type', 'url') 42 | prepopulated_fields = {"slug": ("name",)} 43 | 44 | class CodeRepositoryForm(django.forms.ModelForm): 45 | class Meta: 46 | model = CodeRepository 47 | 48 | # Override the URL field to be more permissive 49 | url = django.forms.CharField(required=True, max_length=100) 50 | 51 | form = CodeRepositoryForm 52 | 53 | class CodeCommitAdmin(admin.ModelAdmin): 54 | list_display = ('__unicode__', 'repository') 55 | list_filter = ('repository',) 56 | search_fields = ('message',) 57 | 58 | admin.site.register(Item, ItemAdmin) 59 | admin.site.register(Bookmark, BookmarkAdmin) 60 | admin.site.register(Track, TrackAdmin) 61 | admin.site.register(Photo, PhotoAdmin) 62 | admin.site.register(WebSearch, WebSearchAdmin) 63 | admin.site.register(Message, MessageAdmin) 64 | admin.site.register(Video, VideoAdmin) 65 | admin.site.register(CodeRepository, CodeRepositoryAdmin) 66 | admin.site.register(CodeCommit, CodeCommitAdmin) 67 | 68 | -------------------------------------------------------------------------------- /src/jellyroll/evolutions/__init__.py: -------------------------------------------------------------------------------- 1 | SEQUENCE = [ 2 | 'photo_id_to_string', 3 | 'item_id_to_text', 4 | 'codecommit_add_new_revision', 5 | 'codecommit_drop_old_revision', 6 | 'item_tags_max_length', 7 | 'bookmark_urls_max_length', 8 | 'item_url_max_length', 9 | 'track_url_max_length', 10 | 'photo_add_farm_id', 11 | ] 12 | -------------------------------------------------------------------------------- /src/jellyroll/evolutions/bookmark_urls_max_length.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_bookmark ALTER COLUMN url TYPE varchar(1000); 2 | ALTER TABLE jellyroll_bookmark ALTER COLUMN thumbnail_url TYPE varchar(1000); -------------------------------------------------------------------------------- /src/jellyroll/evolutions/codecommit_add_new_revision.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_codecommit ADD COLUMN new_revision VARCHAR(200) DEFAULT ''; 2 | UPDATE jellyroll_codecommit SET new_revision = revision::text; -------------------------------------------------------------------------------- /src/jellyroll/evolutions/codecommit_drop_old_revision.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_codecommit DROP COLUMN revision; 2 | ALTER TABLE jellyroll_codecommit RENAME COLUMN new_revision TO revision; 3 | ALTER TABLE jellyroll_codecommit ALTER COLUMN revision DROP DEFAULT; -------------------------------------------------------------------------------- /src/jellyroll/evolutions/item_id_to_text.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_item ALTER object_id TYPE text; -------------------------------------------------------------------------------- /src/jellyroll/evolutions/item_tags_max_length.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_item ALTER COLUMN tags TYPE varchar(2500); -------------------------------------------------------------------------------- /src/jellyroll/evolutions/item_url_max_length.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_item ALTER COLUMN url TYPE varchar(1000); 2 | -------------------------------------------------------------------------------- /src/jellyroll/evolutions/photo_add_farm_id.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_photo ADD COLUMN farm_id smallint unsigned NULL; -------------------------------------------------------------------------------- /src/jellyroll/evolutions/photo_id_to_string.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE jellyroll_photo ALTER photo_id TYPE varchar(50); -------------------------------------------------------------------------------- /src/jellyroll/fixtures/bookmarks.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model" : "jellyroll.bookmark", 4 | "pk" : "1", 5 | "fields" : { 6 | "url" : "http://example.com/", 7 | "description" : "Example" 8 | } 9 | } 10 | ] -------------------------------------------------------------------------------- /src/jellyroll/fixtures/codecommits.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model" : "jellyroll.coderepository", 4 | "pk" : "1", 5 | "fields" : { 6 | "type" : "svn", 7 | "name" : "Django", 8 | "slug" : "django", 9 | "url" : "http://code.djangoproject.com/svn/", 10 | "public_changeset_template" : "http://code.djangoproject.com/changeset/%s", 11 | "username" : "jacob" 12 | } 13 | }, 14 | { 15 | "model" : "jellyroll.codecommit", 16 | "pk" : "1", 17 | "fields" : { 18 | "repository" : 1, 19 | "revision" : 42, 20 | "message" : "Found the meaning of life." 21 | } 22 | } 23 | ] -------------------------------------------------------------------------------- /src/jellyroll/fixtures/initial_data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model" : "jellyroll.searchengine", 4 | "pk" : "1", 5 | "fields" : { 6 | "name" : "Google", 7 | "home" : "http://www.google.com/", 8 | "search_template" : "http://www.google.com/search?q=%s" 9 | } 10 | }, 11 | { 12 | "model" : "jellyroll.videosource", 13 | "pk" : "1", 14 | "fields" : { 15 | "name" : "Google", 16 | "home" : "http://video.google.com", 17 | "embed_template" : "http://video.google.com/googleplayer.swf?docId=%s&hl=en" 18 | } 19 | }, 20 | { 21 | "model" : "jellyroll.videosource", 22 | "pk" : "2", 23 | "fields" : { 24 | "name" : "YouTube", 25 | "home" : "http://www.youtube.com", 26 | "embed_template" : "http://www.youtube.com/v/%s" 27 | } 28 | } 29 | ] -------------------------------------------------------------------------------- /src/jellyroll/fixtures/photos.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model" : "jellyroll.photo", 4 | "pk" : "1", 5 | "fields" : { 6 | "server_id" : "123", 7 | "secret" : "1234567890", 8 | "taken_by" : "jacobian", 9 | "cc_license" : "http://creativecommons.org/licenses/by/2.0/", 10 | "title" : "Photo taken by me" 11 | } 12 | }, 13 | { 14 | "model" : "jellyroll.photo", 15 | "pk" : "2", 16 | "fields" : { 17 | "server_id" : "123", 18 | "secret" : "1234567890", 19 | "taken_by" : "someoneelse", 20 | "cc_license" : "http://creativecommons.org/licenses/by/2.0/", 21 | "title" : "Photo taken by someone else with a BY license" 22 | } 23 | }, 24 | { 25 | "model" : "jellyroll.photo", 26 | "pk" : "3", 27 | "fields" : { 28 | "server_id" : "123", 29 | "secret" : "1234567890", 30 | "taken_by" : "someoneelse", 31 | "cc_license" : "", 32 | "title" : "Photo taken by someone else with no license" 33 | } 34 | }, 35 | { 36 | "model" : "jellyroll.photo", 37 | "pk" : "4", 38 | "fields" : { 39 | "server_id" : "123", 40 | "secret" : "1234567890", 41 | "taken_by" : "someoneelse", 42 | "cc_license" : "http://creativecommons.org/licenses/by-nc-nd/2.0/", 43 | "title" : "Photo taken by someone else with a NC-ND license" 44 | } 45 | }, 46 | { 47 | "model" : "jellyroll.photo", 48 | "pk" : "5", 49 | "fields" : { 50 | "server_id" : "123", 51 | "secret" : "1234567890", 52 | "taken_by" : "someoneelse", 53 | "cc_license" : "http://creativecommons.org/licenses/by-sa/2.0/", 54 | "title" : "Photo taken by someone else with an SA license" 55 | } 56 | } 57 | ] -------------------------------------------------------------------------------- /src/jellyroll/fixtures/tracks.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model" : "jellyroll.track", 4 | "pk" : "1", 5 | "fields" : { 6 | "artist_name" : "Outkast", 7 | "track_name" : "The Train (feat. Scar & Sleepy Brown)" 8 | } 9 | } 10 | ] -------------------------------------------------------------------------------- /src/jellyroll/fixtures/videos.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model" : "jellyroll.video", 4 | "pk" : "1", 5 | "fields" : { 6 | "source" : 1, 7 | "title" : "Crazy Frog Christmas Funny Video", 8 | "url" : "http://video.google.com/videoplay?docid=-1182786924290841590" 9 | } 10 | }, 11 | { 12 | "model" : "jellyroll.video", 13 | "pk" : "2", 14 | "fields" : { 15 | "source" : 2, 16 | "title" : "Speed Painting with Ketchup and French Fries", 17 | "url" : "http://youtube.com/watch?v=1gvGDsIYrrQ" 18 | } 19 | } 20 | ] -------------------------------------------------------------------------------- /src/jellyroll/fixtures/websearches.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model" : "jellyroll.websearch", 4 | "pk" : "1", 5 | "fields" : { 6 | "engine" : 1, 7 | "query" : "test" 8 | } 9 | }, 10 | { 11 | "model" : "jellyroll.websearchresult", 12 | "pk" : "1", 13 | "fields" : { 14 | "search" : 1, 15 | "title" : "Test Central Home", 16 | "url" : "http://www.test.com/" 17 | } 18 | } 19 | ] -------------------------------------------------------------------------------- /src/jellyroll/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobian/jellyroll/02751b3108b6f6ae732a801d42ca3c85cc759978/src/jellyroll/management/__init__.py -------------------------------------------------------------------------------- /src/jellyroll/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobian/jellyroll/02751b3108b6f6ae732a801d42ca3c85cc759978/src/jellyroll/management/commands/__init__.py -------------------------------------------------------------------------------- /src/jellyroll/management/commands/jellyroll_update.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import optparse 3 | import jellyroll.providers 4 | from django.core.management.base import BaseCommand 5 | 6 | class Command(BaseCommand): 7 | option_list = BaseCommand.option_list + ( 8 | optparse.make_option( 9 | "-p", "--provider", 10 | dest="providers", 11 | action="append", 12 | help="Only use certain provider(s)." 13 | ), 14 | optparse.make_option( 15 | "-l", "--list-providers", 16 | action="store_true", 17 | help="Display a list of active data providers." 18 | ), 19 | ) 20 | 21 | def handle(self, *args, **options): 22 | level = { 23 | '0': logging.WARN, 24 | '1': logging.INFO, 25 | '2': logging.DEBUG 26 | }[options.get('verbosity', '0')] 27 | logging.basicConfig(level=level, format="%(name)s: %(levelname)s: %(message)s") 28 | 29 | if options['list_providers']: 30 | self.print_providers() 31 | return 0 32 | 33 | if options['providers']: 34 | for provider in options['providers']: 35 | if provider not in self.available_providers(): 36 | print "Invalid provider: %r" % provider 37 | self.print_providers() 38 | return 0 39 | 40 | jellyroll.providers.update(options['providers']) 41 | 42 | def available_providers(self): 43 | return jellyroll.providers.active_providers() 44 | 45 | def print_providers(self): 46 | available = sorted(self.available_providers().keys()) 47 | print "Available data providers:" 48 | for provider in available: 49 | print " ", provider 50 | 51 | -------------------------------------------------------------------------------- /src/jellyroll/managers.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from django.db import models 3 | from django.db.models import signals 4 | from django.contrib.contenttypes.models import ContentType 5 | from django.utils.encoding import force_unicode 6 | from tagging.fields import TagField 7 | 8 | class ItemManager(models.Manager): 9 | 10 | def __init__(self): 11 | super(ItemManager, self).__init__() 12 | self.models_by_name = {} 13 | 14 | def create_or_update(self, instance, timestamp=None, url=None, tags="", source="INTERACTIVE", source_id="", **kwargs): 15 | """ 16 | Create or update an Item from some instace. 17 | """ 18 | # If the instance hasn't already been saved, save it first. This 19 | # requires disconnecting the post-save signal that might be sent to 20 | # this function (otherwise we could get an infinite loop). 21 | if instance._get_pk_val() is None: 22 | try: 23 | signals.post_save.disconnect(self.create_or_update, sender=type(instance)) 24 | except Exception, err: 25 | reconnect = False 26 | else: 27 | reconnect = True 28 | instance.save() 29 | if reconnect: 30 | signals.post_save.connect(self.create_or_update, sender=type(instance)) 31 | 32 | # Make sure the item "should" be registered. 33 | if not getattr(instance, "jellyrollable", True): 34 | return 35 | 36 | # Check to see if the timestamp is being updated, possibly pulling 37 | # the timestamp from the instance. 38 | if hasattr(instance, "timestamp"): 39 | timestamp = instance.timestamp 40 | if timestamp is None: 41 | update_timestamp = False 42 | timestamp = datetime.datetime.now() 43 | else: 44 | update_timestamp = True 45 | 46 | # Ditto for tags. 47 | if not tags: 48 | for f in instance._meta.fields: 49 | if isinstance(f, TagField): 50 | tags = getattr(instance, f.attname) 51 | break 52 | 53 | if not url: 54 | if hasattr(instance,'url'): 55 | url = instance.url 56 | 57 | # Create the Item object. 58 | ctype = ContentType.objects.get_for_model(instance) 59 | item, created = self.get_or_create( 60 | content_type = ctype, 61 | object_id = force_unicode(instance._get_pk_val()), 62 | defaults = dict( 63 | timestamp = timestamp, 64 | source = source, 65 | source_id = source_id, 66 | tags = tags, 67 | url = url, 68 | ) 69 | ) 70 | item.tags = tags 71 | item.source = source 72 | item.source_id = source_id 73 | if update_timestamp: 74 | item.timestamp = timestamp 75 | 76 | # Save and return the item. 77 | item.save() 78 | return item 79 | 80 | def follow_model(self, model): 81 | """ 82 | Follow a particular model class, updating associated Items automatically. 83 | """ 84 | self.models_by_name[model.__name__.lower()] = model 85 | signals.post_save.connect(self.create_or_update, sender=model) 86 | 87 | def get_for_model(self, model): 88 | """ 89 | Return a QuerySet of only items of a certain type. 90 | """ 91 | return self.filter(content_type=ContentType.objects.get_for_model(model)) 92 | 93 | def get_last_update_of_model(self, model, **kwargs): 94 | """ 95 | Return the last time a given model's items were updated. Returns the 96 | epoch if the items were never updated. 97 | """ 98 | qs = self.get_for_model(model) 99 | if kwargs: 100 | qs = qs.filter(**kwargs) 101 | try: 102 | return qs.order_by('-timestamp')[0].timestamp 103 | except IndexError: 104 | return datetime.datetime.fromtimestamp(0) 105 | -------------------------------------------------------------------------------- /src/jellyroll/models.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import urlparse 3 | from django.conf import settings 4 | from django.contrib.contenttypes.models import ContentType 5 | from django.contrib.contenttypes import generic 6 | from django.db import models 7 | from django.utils import simplejson, text 8 | from django.utils.encoding import smart_unicode 9 | from jellyroll.managers import ItemManager 10 | from tagging.fields import TagField 11 | 12 | class Item(models.Model): 13 | """ 14 | A generic jellyroll item. Slightly denormalized for performance. 15 | """ 16 | 17 | # Generic relation to the object. 18 | content_type = models.ForeignKey(ContentType) 19 | object_id = models.TextField() 20 | object = generic.GenericForeignKey('content_type', 'object_id') 21 | 22 | # "Standard" metadata each object provides. 23 | url = models.URLField(blank=True, max_length=1000) 24 | timestamp = models.DateTimeField() 25 | tags = TagField(max_length=2500) 26 | 27 | # Metadata about where the object "came from" -- used by data providers to 28 | # figure out which objects to update when asked. 29 | source = models.CharField(max_length=100, blank=True) 30 | source_id = models.TextField(blank=True) 31 | 32 | # Denormalized object __unicode__, for performance 33 | object_str = models.TextField(blank=True) 34 | 35 | objects = ItemManager() 36 | 37 | class Meta: 38 | ordering = ['-timestamp'] 39 | unique_together = [("content_type", "object_id")] 40 | 41 | def __unicode__(self): 42 | return "%s: %s" % (self.content_type.model_class().__name__, self.object_str) 43 | 44 | def __cmp__(self, other): 45 | return cmp(self.timestamp, other.timestamp) 46 | 47 | def save(self, *args, **kwargs): 48 | ct = "%s_%s" % (self.content_type.app_label, self.content_type.model.lower()) 49 | self.object_str = smart_unicode(self.object) 50 | super(Item, self).save(*args, **kwargs) 51 | 52 | class Bookmark(models.Model): 53 | """ 54 | A bookmarked link. The model is based on del.icio.us, with the added 55 | thumbnail field for ma.gnolia users. 56 | """ 57 | 58 | url = models.URLField(unique=True, max_length=1000) 59 | description = models.CharField(max_length=255) 60 | extended = models.TextField(blank=True) 61 | thumbnail = models.ImageField(upload_to="img/jellyroll/bookmarks/%Y/%m", blank=True) 62 | thumbnail_url = models.URLField(blank=True, verify_exists=False, max_length=1000) 63 | 64 | def __unicode__(self): 65 | return self.url 66 | 67 | class Track(models.Model): 68 | """A track you listened to. The model is based on last.fm.""" 69 | 70 | artist_name = models.CharField(max_length=250) 71 | track_name = models.CharField(max_length=250) 72 | url = models.URLField(blank=True, max_length=1000) 73 | track_mbid = models.CharField("MusicBrainz Track ID", max_length=36, blank=True) 74 | artist_mbid = models.CharField("MusicBrainz Artist ID", max_length=36, blank=True) 75 | 76 | def __unicode__(self): 77 | return "%s - %s" % (self.artist_name, self.track_name) 78 | 79 | CC_LICENSES = ( 80 | ('http://creativecommons.org/licenses/by/2.0/', 'CC Attribution'), 81 | ('http://creativecommons.org/licenses/by-nd/2.0/', 'CC Attribution-NoDerivs'), 82 | ('http://creativecommons.org/licenses/by-nc-nd/2.0/', 'CC Attribution-NonCommercial-NoDerivs'), 83 | ('http://creativecommons.org/licenses/by-nc/2.0/', 'CC Attribution-NonCommercial'), 84 | ('http://creativecommons.org/licenses/by-nc-sa/2.0/', 'CC Attribution-NonCommercial-ShareAlike'), 85 | ('http://creativecommons.org/licenses/by-sa/2.0/', 'CC Attribution-ShareAlike'), 86 | ) 87 | 88 | class Photo(models.Model): 89 | """ 90 | A photo someone took. This person could be you, in which case you can 91 | obviously do whatever you want with it. However, it could also have been 92 | taken by someone else, so in that case there's a few fields for storing the 93 | object's rights. 94 | 95 | The model is based on Flickr, and won't work with anything else :( 96 | """ 97 | 98 | # Key Flickr info 99 | photo_id = models.CharField(unique=True, primary_key=True, max_length=50) 100 | farm_id = models.PositiveSmallIntegerField(null=True) 101 | server_id = models.PositiveSmallIntegerField() 102 | secret = models.CharField(max_length=30, blank=True) 103 | 104 | # Rights metadata 105 | taken_by = models.CharField(max_length=100, blank=True) 106 | cc_license = models.URLField(blank=True, choices=CC_LICENSES) 107 | 108 | # Main metadata 109 | title = models.CharField(max_length=250) 110 | description = models.TextField(blank=True) 111 | comment_count = models.PositiveIntegerField(max_length=5, default=0) 112 | 113 | # Date metadata 114 | date_uploaded = models.DateTimeField(blank=True, null=True) 115 | date_updated = models.DateTimeField(blank=True, null=True) 116 | 117 | # EXIF metadata 118 | _exif = models.TextField(blank=True) 119 | def _set_exif(self, d): 120 | self._exif = simplejson.dumps(d) 121 | def _get_exif(self): 122 | if self._exif: 123 | return simplejson.loads(self._exif) 124 | else: 125 | return {} 126 | exif = property(_get_exif, _set_exif, "Photo EXIF data, as a dict.") 127 | 128 | def _get_farm(self): 129 | if self.farm_id: 130 | return ''.join(["farm",str(self.farm_id),"."]) 131 | return '' 132 | farm = property(_get_farm) 133 | 134 | def __unicode__(self): 135 | return self.title 136 | 137 | def url(self): 138 | return "http://www.flickr.com/photos/%s/%s/" % (self.taken_by, self.photo_id) 139 | url = property(url) 140 | 141 | def timestamp(self): 142 | return self.date_uploaded 143 | timestamp = property(timestamp) 144 | 145 | ### Image URLs ### 146 | 147 | def get_image_url(self, size=None): 148 | if size in list('mstbo'): 149 | return "http://%sstatic.flickr.com/%s/%s_%s_%s.jpg" % \ 150 | (self.farm, self.server_id, self.photo_id, self.secret, size) 151 | else: 152 | return "http://%sstatic.flickr.com/%s/%s_%s.jpg" % \ 153 | (self.farm, self.server_id, self.photo_id, self.secret) 154 | 155 | image_url = property(lambda self: self.get_image_url()) 156 | square_url = property(lambda self: self.get_image_url('s')) 157 | thumbnail_url = property(lambda self: self.get_image_url('t')) 158 | small_url = property(lambda self: self.get_image_url('m')) 159 | large_url = property(lambda self: self.get_image_url('b')) 160 | original_url = property(lambda self: self.get_image_url('o')) 161 | 162 | ### Rights ### 163 | 164 | def license_code(self): 165 | if not self.cc_license: 166 | return None 167 | path = urlparse.urlparse(self.cc_license)[2] 168 | return path.split("/")[2] 169 | license_code = property(license_code) 170 | 171 | def taken_by_me(self): 172 | return self.taken_by == getattr(settings, "FLICKR_USERNAME", "") 173 | taken_by_me = property(taken_by_me) 174 | 175 | def can_republish(self): 176 | """ 177 | Is it OK to republish this photo, or must it be linked only? 178 | """ 179 | 180 | # If I took the photo, then it's always OK to republish. 181 | if self.taken_by_me: 182 | return True 183 | 184 | # If the photo has no CC license, then it's never OK to republish. 185 | elif self.license_code is None: 186 | return False 187 | 188 | # If the settings flags this site as "commercial" and it's an NC 189 | # license, then no republish for you. 190 | elif getattr(settings, "SITE_IS_COMMERCIAL", False) and "nc" in self.license_code: 191 | return False 192 | 193 | # Otherwise, we're OK to republish it. 194 | else: 195 | return True 196 | can_republish = property(can_republish) 197 | 198 | def derivative_ok(self): 199 | """Is it OK to produce derivative works?""" 200 | return self.can_republish and "nd" not in self.license_code 201 | derivative_ok = property(derivative_ok) 202 | 203 | def must_share_alike(self): 204 | """Must I share derivative works?""" 205 | return self.can_republish and "sa" in self.license_code 206 | must_share_alike = property(must_share_alike) 207 | 208 | class SearchEngine(models.Model): 209 | """ 210 | Simple encapsulation of a search engine. 211 | """ 212 | name = models.CharField(max_length=200) 213 | home = models.URLField() 214 | search_template = models.URLField() 215 | 216 | def __unicode__(self): 217 | return self.name 218 | 219 | class WebSearch(models.Model): 220 | """ 221 | A search made with a search engine. Modeled after Google's search history, 222 | but (may/could/will) work with other sources. 223 | """ 224 | engine = models.ForeignKey(SearchEngine, related_name="searches") 225 | query = models.CharField(max_length=250) 226 | 227 | class Meta: 228 | verbose_name_plural = "web searches" 229 | 230 | def __unicode__(self): 231 | return self.query 232 | 233 | def url(self): 234 | return self.engine.search_template % (urllib.quote_plus(self.query)) 235 | url = property(url) 236 | 237 | class WebSearchResult(models.Model): 238 | """ 239 | A page viewed as a result of a WebSearch 240 | """ 241 | search = models.ForeignKey(WebSearch, related_name="results") 242 | title = models.CharField(max_length=250) 243 | url = models.URLField() 244 | 245 | def __unicode__(self): 246 | return self.title 247 | 248 | class VideoSource(models.Model): 249 | """ 250 | A place you might view videos. Basically just an encapsulation for the 251 | "embed template" bit. 252 | """ 253 | name = models.CharField(max_length=200) 254 | home = models.URLField() 255 | embed_template = models.URLField() 256 | 257 | def __unicode__(self): 258 | return self.name 259 | 260 | class Video(models.Model): 261 | """A video you viewed.""" 262 | 263 | source = models.ForeignKey(VideoSource, related_name="videos") 264 | title = models.CharField(max_length=250) 265 | url = models.URLField() 266 | 267 | def __unicode__(self): 268 | return self.title 269 | 270 | def docid(self): 271 | scheme, netloc, path, params, query, fragment = urlparse.urlparse(self.url) 272 | return query.split("=")[-1] 273 | docid = property(docid) 274 | 275 | def embed_url(self): 276 | return self.source.embed_template % self.docid 277 | embed_url = property(embed_url) 278 | 279 | SCM_CHOICES = ( 280 | ("svn", "Subversion"), 281 | ("git", "Git"), 282 | ) 283 | 284 | class CodeRepository(models.Model): 285 | """ 286 | A code repository that you check code into somewhere. Currently only SVN 287 | is supported, but other forms should be hard to support. 288 | """ 289 | type = models.CharField(max_length=10, choices=SCM_CHOICES) 290 | name = models.CharField(max_length=100) 291 | slug = models.SlugField() 292 | username = models.CharField(max_length=100, help_text="Your username/email for this SCM.") 293 | public_changeset_template = models.URLField( 294 | verify_exists = False, blank = True, 295 | help_text = "Template for viewing a changeset publically. Use '%s' for the revision number") 296 | url = models.URLField() 297 | 298 | class Meta: 299 | verbose_name_plural = "code repositories" 300 | 301 | def __unicode__(self): 302 | return self.name 303 | 304 | class CodeCommit(models.Model): 305 | """ 306 | A code change you checked in. 307 | """ 308 | repository = models.ForeignKey(CodeRepository, related_name="commits") 309 | revision = models.CharField(max_length=200) 310 | message = models.TextField() 311 | 312 | class Meta: 313 | ordering = ["-revision"] 314 | 315 | def __unicode__(self): 316 | return "[%s] %s" % (self.format_revision(), text.truncate_words(self.message, 10)) 317 | 318 | def format_revision(self): 319 | """ 320 | Shorten hashed revisions for nice reading. 321 | """ 322 | try: 323 | return str(int(self.revision)) 324 | except ValueError: 325 | return self.revision[:7] 326 | 327 | @property 328 | def url(self): 329 | if self.repository.public_changeset_template: 330 | return self.repository.public_changeset_template % self.revision 331 | return "" 332 | 333 | class Message(models.Model): 334 | """ 335 | A message, status update, or "tweet". 336 | """ 337 | message = models.TextField() 338 | links = models.ManyToManyField('ContentLink',blank=True,null=True) 339 | 340 | def __unicode__(self): 341 | return text.truncate_words(self.message, 30) 342 | 343 | class ContentLink(models.Model): 344 | """ 345 | A non-resource reference to be associated with 346 | a model. 347 | 348 | In other words, not the canonical location 349 | for a resource defined by a jellyroll model, but 350 | instead a topical resource given in the resource 351 | body itself in a format that varies across model 352 | type. 353 | 354 | """ 355 | url = models.URLField() 356 | identifier = models.CharField(max_length=128) 357 | 358 | def __unicode__(self): 359 | return self.identifier 360 | 361 | class Location(models.Model): 362 | """ 363 | Where you are at a given moment in time. 364 | """ 365 | latitude = models.DecimalField(max_digits=10, decimal_places=6) 366 | longitude = models.DecimalField(max_digits=10, decimal_places=6) 367 | name = models.CharField(max_length=200, blank=True) 368 | 369 | def __unicode__(self): 370 | if self.name: 371 | return self.name 372 | else: 373 | return "(%s, %s)" % (self.longitude, self.latitude) 374 | 375 | @property 376 | def url(self): 377 | return "http://maps.google.com/maps?q=%s,%s" % (self.longitude, self.latitude) 378 | 379 | # Register item objects to be "followed" 380 | Item.objects.follow_model(Bookmark) 381 | Item.objects.follow_model(Track) 382 | Item.objects.follow_model(Photo) 383 | Item.objects.follow_model(WebSearch) 384 | Item.objects.follow_model(Video) 385 | Item.objects.follow_model(CodeCommit) 386 | Item.objects.follow_model(Message) 387 | Item.objects.follow_model(Location) -------------------------------------------------------------------------------- /src/jellyroll/providers/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import logging 4 | from django.conf import settings 5 | 6 | try: 7 | set 8 | except NameError: 9 | from sets import Set as set # Python 2.3 fallback 10 | 11 | log = logging.getLogger("jellyroll.update") 12 | 13 | def active_providers(): 14 | """ 15 | Return a dict of {name: module} of active, enabled providers. 16 | """ 17 | providers = {} 18 | for provider in settings.JELLYROLL_PROVIDERS: 19 | if provider.endswith('.*'): 20 | to_load = expand_star(provider) 21 | else: 22 | to_load = [provider] 23 | for p in to_load: 24 | try: 25 | mod = __import__(p, '', '', ['']) 26 | except ImportError, e: 27 | log.error("Couldn't import provider %r: %s" % (p, e)) 28 | if mod.enabled(): 29 | providers[p] = mod 30 | return providers 31 | 32 | def expand_star(mod_name): 33 | """ 34 | Expand something like 'jellyroll.providers.*' into a list of all the modules 35 | there. 36 | """ 37 | expanded = [] 38 | mod_dir = os.path.dirname(__import__(mod_name[:-2], {}, {}, ['']).__file__) 39 | for f in glob.glob1(mod_dir, "[!_]*.py"): 40 | expanded.append('%s.%s' % (mod_name[:-2], f[:-3])) 41 | return expanded 42 | 43 | def update(providers): 44 | """ 45 | Update a given set of providers. If the list is empty, it means update all 46 | of 'em. 47 | """ 48 | active = active_providers() 49 | if providers is None: 50 | providers = active.keys() 51 | else: 52 | providers = set(active.keys()).intersection(providers) 53 | 54 | for provider in providers: 55 | log.debug("Updating from provider %r", provider) 56 | try: 57 | mod = active[provider] 58 | except KeyError: 59 | log.error("Unknown provider: %r" % provider) 60 | continue 61 | 62 | log.info("Running '%s.update()'", provider) 63 | try: 64 | mod.update() 65 | except (KeyboardInterrupt, SystemExit): 66 | raise 67 | except Exception, e: 68 | log.error("Failed during '%s.update()'", provider) 69 | log.exception(e) 70 | continue 71 | 72 | log.info("Done with provider %r", provider) 73 | -------------------------------------------------------------------------------- /src/jellyroll/providers/delicious.py: -------------------------------------------------------------------------------- 1 | import time 2 | import dateutil.parser 3 | import dateutil.tz 4 | import logging 5 | import urllib 6 | from django.conf import settings 7 | from django.db import transaction 8 | from django.utils.encoding import smart_unicode 9 | from jellyroll.models import Item, Bookmark 10 | from jellyroll.providers import utils 11 | 12 | # 13 | # Super-mini Delicious API 14 | # 15 | class DeliciousClient(object): 16 | """ 17 | A super-minimal delicious client :) 18 | """ 19 | 20 | lastcall = 0 21 | 22 | def __init__(self, username, password, method='v1'): 23 | self.username, self.password = username, password 24 | self.method = method 25 | 26 | def __getattr__(self, method): 27 | return DeliciousClient(self.username, self.password, '%s/%s' % (self.method, method)) 28 | 29 | def __repr__(self): 30 | return "" % self.method 31 | 32 | def __call__(self, **params): 33 | # Enforce Yahoo's "no calls quicker than every 1 second" rule 34 | delta = time.time() - DeliciousClient.lastcall 35 | if delta < 2: 36 | time.sleep(2 - delta) 37 | DeliciousClient.lastcall = time.time() 38 | url = ("https://api.del.icio.us/%s?" % self.method) + urllib.urlencode(params) 39 | return utils.getxml(url, username=self.username, password=self.password) 40 | 41 | # 42 | # Public API 43 | # 44 | 45 | log = logging.getLogger("jellyroll.providers.delicious") 46 | 47 | def enabled(): 48 | ok = hasattr(settings, 'DELICIOUS_USERNAME') and hasattr(settings, 'DELICIOUS_PASSWORD') 49 | if not ok: 50 | log.warn('The Delicious provider is not available because the ' 51 | 'DELICIOUS_USERNAME and/or DELICIOUS_PASSWORD settings are ' 52 | 'undefined.') 53 | return ok 54 | 55 | def update(): 56 | delicious = DeliciousClient(settings.DELICIOUS_USERNAME, settings.DELICIOUS_PASSWORD) 57 | 58 | # Check to see if we need an update 59 | last_update_date = Item.objects.get_last_update_of_model(Bookmark) 60 | last_post_date = utils.parsedate(delicious.posts.update().get("time")) 61 | if last_post_date <= last_update_date: 62 | log.info("Skipping update: last update date: %s; last post date: %s", last_update_date, last_post_date) 63 | return 64 | 65 | for datenode in reversed(list(delicious.posts.dates().getiterator('date'))): 66 | dt = utils.parsedate(datenode.get("date")) 67 | if dt > last_update_date: 68 | log.debug("There is a record indicating bookmarks have been added after our last update") 69 | _update_bookmarks_from_date(delicious, dt) 70 | 71 | # 72 | # Private API 73 | # 74 | 75 | def _update_bookmarks_from_date(delicious, dt): 76 | log.debug("Reading bookmarks from %s", dt) 77 | xml = delicious.posts.get(dt=dt.strftime("%Y-%m-%d")) 78 | for post in xml.getiterator('post'): 79 | info = dict((k, smart_unicode(post.get(k))) for k in post.keys()) 80 | if (info.has_key("shared") and settings.DELICIOUS_GETDNS) or (not info.has_key("shared")): 81 | log.debug("Handling bookmark for %r", info["href"]) 82 | _handle_bookmark(info) 83 | else: 84 | log.debug("Skipping bookmark for %r, app settings indicate to ignore bookmarks marked \"Do Not Share\"", info["href"]) 85 | _update_bookmarks_from_date = transaction.commit_on_success(_update_bookmarks_from_date) 86 | 87 | def _handle_bookmark(info): 88 | b, created = Bookmark.objects.get_or_create( 89 | url = info['href'], 90 | defaults = dict( 91 | description = info['description'], 92 | extended = info.get('extended', ''), 93 | ) 94 | ) 95 | if not created: 96 | b.description = info['description'] 97 | b.extended = info.get('extended', '') 98 | b.save() 99 | return Item.objects.create_or_update( 100 | instance = b, 101 | timestamp = utils.parsedate(info['time']), 102 | tags = info.get('tag', ''), 103 | source = __name__, 104 | source_id = info['hash'], 105 | ) 106 | -------------------------------------------------------------------------------- /src/jellyroll/providers/flickr.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import urllib 4 | from django.conf import settings 5 | from django.db import transaction 6 | from django.utils.encoding import smart_unicode 7 | from jellyroll.models import Item, Photo 8 | from jellyroll.providers import utils 9 | 10 | log = logging.getLogger("jellyroll.providers.flickr") 11 | 12 | # 13 | # Mini FlickrClient API 14 | # 15 | 16 | class FlickrError(Exception): 17 | def __init__(self, code, message): 18 | self.code, self.message = code, message 19 | def __str__(self): 20 | return 'FlickrError %s: %s' % (self.code, self.message) 21 | 22 | class FlickrClient(object): 23 | def __init__(self, api_key, method='flickr'): 24 | self.api_key = api_key 25 | self.method = method 26 | 27 | def __getattr__(self, method): 28 | return FlickrClient(self.api_key, '%s.%s' % (self.method, method)) 29 | 30 | def __repr__(self): 31 | return "" % self.method 32 | 33 | def __call__(self, **params): 34 | params['method'] = self.method 35 | params['api_key'] = self.api_key 36 | params['format'] = 'json' 37 | params['nojsoncallback'] = '1' 38 | url = "http://flickr.com/services/rest/?" + urllib.urlencode(params) 39 | json = utils.getjson(url) 40 | if json.get("stat", "") == "fail": 41 | raise FlickrError(json["code"], json["message"]) 42 | return json 43 | 44 | # 45 | # Public API 46 | # 47 | def enabled(): 48 | ok = (hasattr(settings, "FLICKR_API_KEY") and 49 | hasattr(settings, "FLICKR_USER_ID") and 50 | hasattr(settings, "FLICKR_USERNAME")) 51 | if not ok: 52 | log.warn('The Flickr provider is not available because the ' 53 | 'FLICKR_API_KEY, FLICKR_USER_ID, and/or FLICKR_USERNAME settings ' 54 | 'are undefined.') 55 | return ok 56 | 57 | def update(): 58 | flickr = FlickrClient(settings.FLICKR_API_KEY) 59 | 60 | # Preload the list of licenses 61 | licenses = licenses = flickr.photos.licenses.getInfo() 62 | licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"]) 63 | 64 | # Handle update by pages until we see photos we've already handled 65 | last_update_date = Item.objects.get_last_update_of_model(Photo) 66 | page = 1 67 | while True: 68 | log.debug("Fetching page %s of photos", page) 69 | resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken", per_page="500", page=str(page)) 70 | photos = resp["photos"] 71 | if page > photos["pages"]: 72 | log.debug("Ran out of photos; stopping.") 73 | break 74 | 75 | for photodict in photos["photo"]: 76 | timestamp = utils.parsedate(str(photodict["datetaken"])) 77 | if timestamp < last_update_date: 78 | log.debug("Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date) 79 | break 80 | 81 | photo_id = utils.safeint(photodict["id"]) 82 | license = licenses[photodict["license"]] 83 | secret = smart_unicode(photodict["secret"]) 84 | _handle_photo(flickr, photo_id, secret, license, timestamp) 85 | 86 | page += 1 87 | 88 | # 89 | # Private API 90 | # 91 | 92 | def _handle_photo(flickr, photo_id, secret, license, timestamp): 93 | info = flickr.photos.getInfo(photo_id=photo_id, secret=secret)["photo"] 94 | server_id = utils.safeint(info["server"]) 95 | farm_id = utils.safeint(info["farm"]) 96 | taken_by = smart_unicode(info["owner"]["username"]) 97 | title = smart_unicode(info["title"]["_content"]) 98 | description = smart_unicode(info["description"]["_content"]) 99 | comment_count = utils.safeint(info["comments"]["_content"]) 100 | date_uploaded = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["posted"])) 101 | date_updated = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["lastupdate"])) 102 | 103 | log.debug("Handling photo: %r (taken %s)" % (title, timestamp)) 104 | photo, created = Photo.objects.get_or_create( 105 | photo_id = str(photo_id), 106 | defaults = dict( 107 | server_id = server_id, 108 | farm_id = farm_id, 109 | secret = secret, 110 | taken_by = taken_by, 111 | cc_license = license, 112 | title = title, 113 | description = description, 114 | comment_count = comment_count, 115 | date_uploaded = date_uploaded, 116 | date_updated = date_updated, 117 | ) 118 | ) 119 | if created: 120 | photo.exif = _convert_exif(flickr.photos.getExif(photo_id=photo_id, secret=secret)) 121 | else: 122 | photo.server_id = server_id 123 | photo.farm_id = farm_id 124 | photo.secret = secret 125 | photo.taken_by = taken_by 126 | photo.cc_license = license 127 | photo.title = title 128 | photo.description = description 129 | photo.comment_count = comment_count 130 | photo.date_uploaded = date_uploaded 131 | photo.date_updated = date_updated 132 | photo.save() 133 | 134 | return Item.objects.create_or_update( 135 | instance = photo, 136 | timestamp = timestamp, 137 | tags = _convert_tags(info["tags"]), 138 | source = __name__, 139 | ) 140 | _handle_photo = transaction.commit_on_success(_handle_photo) 141 | 142 | def _convert_exif(exif): 143 | converted = {} 144 | for e in exif["photo"]["exif"]: 145 | key = smart_unicode(e["label"]) 146 | val = e.get("clean", e["raw"])["_content"] 147 | val = smart_unicode(val) 148 | converted[key] = val 149 | return converted 150 | 151 | def _convert_tags(tags): 152 | return " ".join(set(t["_content"] for t in tags["tag"] if not t["machine_tag"])) 153 | -------------------------------------------------------------------------------- /src/jellyroll/providers/gitscm.py: -------------------------------------------------------------------------------- 1 | import re 2 | import time 3 | import logging 4 | import datetime 5 | import shutil 6 | import tempfile 7 | from unipath import FSPath as Path 8 | from django.db import transaction 9 | from django.utils.encoding import smart_unicode 10 | from jellyroll.models import Item, CodeRepository, CodeCommit 11 | from jellyroll.providers import utils 12 | 13 | try: 14 | import git 15 | except ImportError: 16 | git = None 17 | 18 | 19 | log = logging.getLogger("jellyroll.providers.gitscm") 20 | 21 | # 22 | # Public API 23 | # 24 | def enabled(): 25 | ok = git is not None 26 | if not ok: 27 | log.warn("The GIT provider is not available because the GitPython module " 28 | "isn't installed.") 29 | return ok 30 | 31 | def update(): 32 | for repository in CodeRepository.objects.filter(type="git"): 33 | _update_repository(repository) 34 | 35 | # 36 | # Private API 37 | # 38 | 39 | def _update_repository(repository): 40 | source_identifier = "%s:%s" % (__name__, repository.url) 41 | last_update_date = Item.objects.get_last_update_of_model(CodeCommit, source=source_identifier) 42 | log.info("Updating changes from %s since %s", repository.url, last_update_date) 43 | 44 | # Git chokes on the 1969-12-31 sentinal returned by 45 | # get_last_update_of_model, so fix that up. 46 | if last_update_date.date() == datetime.date(1969, 12, 31): 47 | last_update_date = datetime.datetime(1970, 1, 1) 48 | 49 | working_dir, repo = _create_local_repo(repository) 50 | commits = repo.commits_since(since=last_update_date.strftime("%Y-%m-%d")) 51 | log.debug("Handling %s commits", len(commits)) 52 | for commit in reversed(commits): 53 | if commit.author.email == repository.username: 54 | _handle_revision(repository, commit) 55 | 56 | log.debug("Removing working dir %s.", working_dir) 57 | shutil.rmtree(working_dir) 58 | 59 | def _create_local_repo(repository): 60 | working_dir = tempfile.mkdtemp() 61 | g = git.Git(working_dir) 62 | 63 | log.debug("Cloning %s into %s", repository.url, working_dir) 64 | res = g.clone(repository.url) 65 | 66 | # This is pretty nasty. 67 | m = re.match('^Initialized empty Git repository in (.*)', res) 68 | repo_location = Path(m.group(1).rstrip('/')) 69 | if repo_location.name == ".git": 70 | repo_location = repo_location.parent 71 | return working_dir, git.Repo(repo_location) 72 | 73 | @transaction.commit_on_success 74 | def _handle_revision(repository, commit): 75 | log.debug("Handling [%s] from %s", commit.id[:7], repository.url) 76 | ci, created = CodeCommit.objects.get_or_create( 77 | revision = commit.id, 78 | repository = repository, 79 | defaults = {"message": smart_unicode(commit.message)} 80 | ) 81 | if created: 82 | # stored as UTC 83 | timestamp = datetime.datetime.fromtimestamp(time.mktime(commit.committed_date)) 84 | if utils.JELLYROLL_ADJUST_DATETIME: 85 | return utils.utc_to_local_timestamp(time.mktime(commit.committed_date)) 86 | 87 | return Item.objects.create_or_update( 88 | instance = ci, 89 | timestamp = timestamp, 90 | source = "%s:%s" % (__name__, repository.url), 91 | ) 92 | -------------------------------------------------------------------------------- /src/jellyroll/providers/gsearch.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import feedparser 3 | import urlparse 4 | import logging 5 | from django.conf import settings 6 | from django.contrib.contenttypes.models import ContentType 7 | from django.db import transaction 8 | from django.utils import tzinfo 9 | from django.utils.encoding import smart_unicode 10 | from jellyroll.models import Item, SearchEngine, WebSearch, WebSearchResult 11 | from jellyroll.models import VideoSource, Video 12 | from jellyroll.providers import utils 13 | 14 | RSS_URL = "https://%s:%s@www.google.com/searchhistory/?output=rss" 15 | VIDEO_TAG_URL = "http://video.google.com/tags?docid=%s" 16 | 17 | # Monkeypatch feedparser to understand smh:query_guid elements 18 | feedparser._FeedParserMixin._start_smh_query_guid = lambda self, attrs: self.push("query_guid", 1) 19 | 20 | log = logging.getLogger("jellyroll.providers.gsearch") 21 | 22 | # 23 | # Public API 24 | # 25 | 26 | def enabled(): 27 | ok = hasattr(settings, 'GOOGLE_USERNAME') and hasattr(settings, 'GOOGLE_PASSWORD') 28 | if not ok: 29 | log.warn('The Google Search provider is not available because the ' 30 | 'GOOGLE_USERNAME and/or GOOGLE_PASSWORD settings are ' 31 | 'undefined.') 32 | return ok 33 | 34 | def update(): 35 | feed = feedparser.parse(RSS_URL % (settings.GOOGLE_USERNAME, settings.GOOGLE_PASSWORD)) 36 | for entry in feed.entries: 37 | if entry.tags[0].term == "web query": 38 | _handle_query(entry) 39 | elif entry.tags[0].term == "web result": 40 | _handle_result(entry) 41 | elif entry.tags[0].term == "video result": 42 | _handle_video(entry) 43 | 44 | # 45 | # Private API 46 | # 47 | 48 | # Shortcut 49 | CT = ContentType.objects.get_for_model 50 | 51 | def _handle_query(entry): 52 | engine = SearchEngine.objects.get(name="Google") 53 | guid = smart_unicode(urlparse.urlsplit(entry.guid)[2].replace("/searchhistory/", "")) 54 | query = smart_unicode(entry.title) 55 | timestamp = datetime.datetime(tzinfo=tzinfo.FixedOffset(0), *entry.updated_parsed[:6]) 56 | 57 | log.debug("Handling Google query for %r", query) 58 | try: 59 | item = Item.objects.get( 60 | content_type = CT(WebSearch), 61 | source = __name__, 62 | source_id = guid 63 | ) 64 | except Item.DoesNotExist: 65 | item = Item.objects.create_or_update( 66 | instance = WebSearch(engine=engine, query=query), 67 | timestamp = timestamp, 68 | source = __name__, 69 | source_id = guid, 70 | ) 71 | _handle_query = transaction.commit_on_success(_handle_query) 72 | 73 | def _handle_result(entry): 74 | guid = smart_unicode(entry.query_guid) 75 | title = smart_unicode(entry.title) 76 | url = smart_unicode(entry.link) 77 | 78 | log.debug("Adding search result: %r" % url) 79 | try: 80 | item = Item.objects.get( 81 | content_type = CT(WebSearch), 82 | source = __name__, 83 | source_id = guid 84 | ) 85 | except Item.DoesNotExist: 86 | log.debug("Skipping unknown query GUID: %r" % guid) 87 | return 88 | 89 | WebSearchResult.objects.get_or_create( 90 | search = item.object, 91 | url = url, 92 | defaults = {'title' : title}, 93 | ) 94 | _handle_result = transaction.commit_on_success(_handle_result) 95 | 96 | def _handle_video(entry): 97 | vs = VideoSource.objects.get(name="Google") 98 | url = smart_unicode(entry.link) 99 | title = smart_unicode(entry.title) 100 | timestamp = datetime.datetime(tzinfo=tzinfo.FixedOffset(0), *entry.updated_parsed[:6]) 101 | 102 | log.debug("Adding viewed video: %r" % title) 103 | vid, created = Video.objects.get_or_create( 104 | source = vs, 105 | url = url, 106 | defaults = {'title' : title}, 107 | ) 108 | return Item.objects.create_or_update( 109 | instance = vid, 110 | timestamp = timestamp, 111 | source = __name__, 112 | ) 113 | _handle_video = transaction.commit_on_success(_handle_video) 114 | -------------------------------------------------------------------------------- /src/jellyroll/providers/lastfm.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import hashlib 3 | import logging 4 | from django.conf import settings 5 | from django.db import transaction 6 | from django.template.defaultfilters import slugify 7 | from django.utils.functional import memoize 8 | from django.utils.http import urlquote 9 | from django.utils.encoding import smart_str, smart_unicode 10 | from httplib2 import HttpLib2Error 11 | from jellyroll.models import Item, Track 12 | from jellyroll.providers import utils 13 | 14 | # 15 | # API URLs 16 | # 17 | 18 | RECENT_TRACKS_URL = "http://ws.audioscrobbler.com/1.0/user/%s/recenttracks.xml?limit=100" 19 | TRACK_TAGS_URL = "http://ws.audioscrobbler.com/1.0/track/%s/%s/toptags.xml" 20 | ARTIST_TAGS_URL = "http://ws.audioscrobbler.com/1.0/artist/%s/toptags.xml" 21 | 22 | # 23 | # Public API 24 | # 25 | 26 | log = logging.getLogger("jellyroll.providers.lastfm") 27 | 28 | def enabled(): 29 | ok = hasattr(settings, 'LASTFM_USERNAME') 30 | if not ok: 31 | log.warn('The Last.fm provider is not available because the ' 32 | 'LASTFM_USERNAME settings is undefined.') 33 | return ok 34 | 35 | def update(): 36 | last_update_date = Item.objects.get_last_update_of_model(Track) 37 | log.debug("Last update date: %s", last_update_date) 38 | 39 | xml = utils.getxml(RECENT_TRACKS_URL % settings.LASTFM_USERNAME) 40 | for track in xml.getiterator("track"): 41 | artist = track.find('artist') 42 | artist_name = smart_unicode(artist.text) 43 | artist_mbid = artist.get('mbid') 44 | track_name = smart_unicode(track.find('name').text) 45 | track_mbid = smart_unicode(track.find('mbid').text) 46 | url = smart_unicode(track.find('url').text) 47 | 48 | # date delivered as UTC 49 | timestamp = datetime.datetime.fromtimestamp(int(track.find('date').get('uts'))) 50 | if utils.JELLYROLL_ADJUST_DATETIME: 51 | timestamp = utils.utc_to_local_timestamp(int(track.find('date').get('uts'))) 52 | 53 | if not _track_exists(artist_name, track_name, timestamp): 54 | tags = _tags_for_track(artist_name, track_name) 55 | _handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags) 56 | 57 | # 58 | # Private API 59 | # 60 | 61 | def _tags_for_track(artist_name, track_name): 62 | """ 63 | Get the top tags for a track. Also fetches tags for the artist. Only 64 | includes tracks that break a certain threshold of usage, defined by 65 | settings.LASTFM_TAG_USAGE_THRESHOLD (which defaults to 15). 66 | """ 67 | 68 | urls = [ 69 | ARTIST_TAGS_URL % (urlquote(artist_name)), 70 | TRACK_TAGS_URL % (urlquote(artist_name), urlquote(track_name)), 71 | ] 72 | tags = set() 73 | for url in urls: 74 | tags.update(_tags_for_url(url)) 75 | 76 | def _tags_for_url(url): 77 | tags = set() 78 | try: 79 | xml = utils.getxml(url) 80 | except HttpLib2Error, e: 81 | if e.code == 408: 82 | return "" 83 | else: 84 | raise 85 | except SyntaxError: 86 | return "" 87 | for t in xml.getiterator("tag"): 88 | count = utils.safeint(t.find("count").text) 89 | if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15): 90 | tag = slugify(smart_unicode(t.find("name").text)) 91 | tags.add(tag[:50]) 92 | 93 | return tags 94 | 95 | # Memoize tags to avoid unnecessary API calls. 96 | _tag_cache = {} 97 | _tags_for_url = memoize(_tags_for_url, _tag_cache, 1) 98 | 99 | @transaction.commit_on_success 100 | def _handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags): 101 | t = Track( 102 | artist_name = artist_name, 103 | track_name = track_name, 104 | url = url, 105 | track_mbid = track_mbid is not None and track_mbid or '', 106 | artist_mbid = artist_mbid is not None and artist_mbid or '', 107 | ) 108 | if not _track_exists(artist_name, track_name, timestamp): 109 | log.debug("Saving track: %r - %r", artist_name, track_name) 110 | return Item.objects.create_or_update( 111 | instance = t, 112 | timestamp = timestamp, 113 | tags = tags, 114 | source = __name__, 115 | source_id = _source_id(artist_name, track_name, timestamp), 116 | ) 117 | 118 | def _source_id(artist_name, track_name, timestamp): 119 | return hashlib.md5(smart_str(artist_name) + smart_str(track_name) + str(timestamp)).hexdigest() 120 | 121 | def _track_exists(artist_name, track_name, timestamp): 122 | id = _source_id(artist_name, track_name, timestamp) 123 | try: 124 | Item.objects.get(source=__name__, source_id=id) 125 | except Item.DoesNotExist: 126 | return False 127 | else: 128 | return True 129 | 130 | -------------------------------------------------------------------------------- /src/jellyroll/providers/latitude.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provide location from Google Latitude. 3 | 4 | Requires that you've turned on public location at 5 | http://www.google.com/latitude/apps/badge. 6 | """ 7 | 8 | import datetime 9 | import logging 10 | from django.conf import settings 11 | from django.db import transaction 12 | from jellyroll.models import Location, Item 13 | from jellyroll.providers import utils 14 | 15 | log = logging.getLogger("jellyroll.providers.latitude") 16 | 17 | # 18 | # Public API 19 | # 20 | def enabled(): 21 | ok = hasattr(settings, 'GOOGLE_LATITUDE_USER_ID') 22 | if not ok: 23 | log.warn('The Latitude provider is not available because the ' 24 | 'GOOGLE_LATITUDE_USER_ID settings is undefined.') 25 | return ok 26 | 27 | def update(): 28 | last_update_date = Item.objects.get_last_update_of_model(Location) 29 | log.debug("Last update date: %s", last_update_date) 30 | _update_location(settings.GOOGLE_LATITUDE_USER_ID, since=last_update_date) 31 | 32 | # 33 | # Private API 34 | # 35 | 36 | @transaction.commit_on_success 37 | def _update_location(user_id, since): 38 | json = utils.getjson('http://www.google.com/latitude/apps/badge/api?user=%s&type=json' % user_id) 39 | feature = json['features'][0] 40 | 41 | lat, lng = map(str, feature['geometry']['coordinates']) 42 | name = feature['properties']['reverseGeocode'] 43 | timestamp = datetime.datetime.fromtimestamp(feature['properties']['timeStamp']) 44 | if timestamp > since: 45 | log.debug("New location: %s", name) 46 | loc = Location(latitude=lat, longitude=lng, name=name) 47 | return Item.objects.create_or_update( 48 | instance = loc, 49 | timestamp = timestamp, 50 | source = __name__, 51 | source_id = str(feature['properties']['timeStamp']), 52 | ) -------------------------------------------------------------------------------- /src/jellyroll/providers/svn.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | import datetime 4 | from django.db import transaction 5 | from django.utils.encoding import smart_unicode 6 | from jellyroll.models import Item, CodeRepository, CodeCommit 7 | from jellyroll.providers import utils 8 | 9 | 10 | try: 11 | import pysvn 12 | except ImportError: 13 | pysvn = None 14 | 15 | log = logging.getLogger("jellyroll.providers.svn") 16 | 17 | # 18 | # Public API 19 | # 20 | def enabled(): 21 | ok = pysvn is not None 22 | if not ok: 23 | log.warn("The SVN provider is not available because the pysvn module " 24 | "isn't installed.") 25 | return ok 26 | 27 | def update(): 28 | for repository in CodeRepository.objects.filter(type="svn"): 29 | _update_repository(repository) 30 | 31 | # 32 | # Private API 33 | # 34 | 35 | def _update_repository(repository): 36 | source_identifier = "%s:%s" % (__name__, repository.url) 37 | last_update_date = Item.objects.get_last_update_of_model(CodeCommit, source=source_identifier) 38 | log.info("Updating changes from %s since %s", repository.url, last_update_date) 39 | rev = pysvn.Revision(pysvn.opt_revision_kind.date, time.mktime(last_update_date.timetuple())) 40 | c = pysvn.Client() 41 | for revision in reversed(c.log(repository.url, revision_end=rev)): 42 | if revision.author == repository.username: 43 | _handle_revision(repository, revision) 44 | 45 | def _handle_revision(repository, r): 46 | log.debug("Handling [%s] from %s" % (r.revision.number, repository.url)) 47 | ci, created = CodeCommit.objects.get_or_create( 48 | revision = str(r.revision.number), 49 | repository = repository, 50 | defaults = {"message": smart_unicode(r.message)} 51 | ) 52 | if created: 53 | return Item.objects.create_or_update( 54 | instance = ci, 55 | timestamp = datetime.datetime.fromtimestamp(r.date), 56 | source = "%s:%s" % (__name__, repository.url), 57 | ) 58 | _handle_revision = transaction.commit_on_success(_handle_revision) 59 | -------------------------------------------------------------------------------- /src/jellyroll/providers/twitter.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import datetime 3 | import logging 4 | import dateutil 5 | import re 6 | from django.conf import settings 7 | from django.db import transaction 8 | from django.template.defaultfilters import slugify 9 | from django.utils.functional import memoize 10 | from django.utils.http import urlquote 11 | from django.utils.encoding import smart_str, smart_unicode 12 | from httplib2 import HttpLib2Error 13 | from jellyroll.providers import utils 14 | from jellyroll.models import Item, Message, ContentLink 15 | 16 | 17 | # 18 | # API URLs 19 | # 20 | 21 | RECENT_STATUSES_URL = "http://twitter.com/statuses/user_timeline/%s.rss" 22 | USER_URL = "http://twitter.com/%s" 23 | 24 | # 25 | # Public API 26 | # 27 | 28 | log = logging.getLogger("jellyroll.providers.twitter") 29 | 30 | def enabled(): 31 | return True 32 | 33 | def update(): 34 | last_update_date = Item.objects.get_last_update_of_model(Message) 35 | log.debug("Last update date: %s", last_update_date) 36 | 37 | xml = utils.getxml(RECENT_STATUSES_URL % settings.TWITTER_USERNAME) 38 | for status in xml.getiterator("item"): 39 | message = status.find('title') 40 | message_text = smart_unicode(message.text) 41 | url = smart_unicode(status.find('link').text) 42 | 43 | # pubDate delivered as UTC 44 | timestamp = dateutil.parser.parse(status.find('pubDate').text) 45 | if utils.JELLYROLL_ADJUST_DATETIME: 46 | timestamp = utils.utc_to_local_datetime(timestamp) 47 | 48 | if not _status_exists(message_text, url, timestamp): 49 | _handle_status(message_text, url, timestamp) 50 | 51 | # 52 | # GLOBAL CLUTTER 53 | # 54 | 55 | TWITTER_TRANSFORM_MSG = False 56 | TWITTER_RETWEET_TXT = "Forwarding from %s: " 57 | try: 58 | TWITTER_TRANSFORM_MSG = settings.TWITTER_TRANSFORM_MSG 59 | TWITTER_RETWEET_TXT = settings.TWITTER_RETWEET_TXT 60 | except AttributeError: 61 | pass 62 | 63 | if TWITTER_TRANSFORM_MSG: 64 | USER_LINK_TPL = '%s' 65 | TAG_RE = re.compile(r'(?P\#\w+)') 66 | USER_RE = re.compile(r'(?P@\w+)') 67 | RT_RE = re.compile(r'RT\s+(?P@\w+)') 68 | USERNAME_RE = re.compile(r'^%s:'%settings.TWITTER_USERNAME) 69 | 70 | # modified from django.forms.fields.url_re 71 | URL_RE = re.compile( 72 | r'https?://' 73 | r'(?:(?:[A-Z0-9-]+\.)+[A-Z]{2,6}|' 74 | r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' 75 | r'(?::\d+)?' 76 | r'(?:/\S+|/?)', re.IGNORECASE) 77 | 78 | def _transform_retweet(matchobj): 79 | if '%s' in TWITTER_RETWEET_TXT: 80 | return TWITTER_RETWEET_TXT % matchobj.group('username') 81 | return TWITTER_RETWEET_TXT 82 | 83 | def _transform_user_ref_to_link(matchobj): 84 | user = matchobj.group('username')[1:] 85 | link = USER_URL % user 86 | return USER_LINK_TPL % \ 87 | (link,user,''.join(['@',user])) 88 | 89 | def _parse_message(message_text): 90 | """ 91 | Parse out some semantics for teh lulz. 92 | 93 | """ 94 | links = list() 95 | tags = "" 96 | 97 | # remove newlines 98 | message_text = message_text.replace('\n','') 99 | # generate link list for ContentLink 100 | links = [ link for link in URL_RE.findall(message_text) ] 101 | link_ctr = 1 102 | link_dict = {} 103 | for link in URL_RE.finditer(message_text): 104 | link_dict[link.group(0)] = link_ctr 105 | link_ctr += 1 106 | generate_link_num = lambda obj: "[%d]"%link_dict[obj.group(0)] 107 | # remove URLs referenced in message content 108 | if not hasattr(settings, 'TWITTER_REMOVE_LINKS') or settings.TWITTER_REMOVE_LINKS == True: 109 | message_text = URL_RE.sub(generate_link_num,message_text) 110 | # remove leading username 111 | message_text = USERNAME_RE.sub('',message_text) 112 | # check for RT-type retweet syntax 113 | message_text = RT_RE.sub(_transform_retweet,message_text) 114 | # replace @user references with links to their timeline 115 | message_text = USER_RE.sub(_transform_user_ref_to_link,message_text) 116 | # generate tags list 117 | tags = ' '.join( [tag[1:] for tag in TAG_RE.findall(message_text)] ) 118 | # extract defacto #tag style tweet tags 119 | if not hasattr(settings, 'TWITTER_REMOVE_TAGS') or settings.TWITTER_REMOVE_TAGS == True: 120 | message_text = TAG_RE.sub('',message_text) 121 | 122 | return (message_text.strip(),links,tags) 123 | 124 | log.info("Enabling message transforms") 125 | else: 126 | _parse_message = lambda msg: (msg,list(),"") 127 | log.info("Disabling message transforms") 128 | 129 | # 130 | # Private API 131 | # 132 | 133 | @transaction.commit_on_success 134 | def _handle_status(message_text, url, timestamp): 135 | message_text, links, tags = _parse_message(message_text) 136 | 137 | t = Message( 138 | message = message_text, 139 | ) 140 | 141 | if not _status_exists(message_text, url, timestamp): 142 | log.debug("Saving message: %r", message_text) 143 | item = Item.objects.create_or_update( 144 | instance = t, 145 | timestamp = timestamp, 146 | source = __name__, 147 | source_id = _source_id(message_text, url, timestamp), 148 | url = url, 149 | tags = tags, 150 | ) 151 | item.save() 152 | 153 | for link in links: 154 | l = ContentLink( 155 | url = link, 156 | identifier = link, 157 | ) 158 | l.save() 159 | t.links.add(l) 160 | 161 | def _source_id(message_text, url, timestamp): 162 | return hashlib.md5(smart_str(message_text) + smart_str(url) + str(timestamp)).hexdigest() 163 | 164 | def _status_exists(message_text, url, timestamp): 165 | id = _source_id(message_text, url, timestamp) 166 | try: 167 | Item.objects.get(source=__name__, source_id=id) 168 | except Item.DoesNotExist: 169 | return False 170 | else: 171 | return True 172 | -------------------------------------------------------------------------------- /src/jellyroll/providers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import httplib2 2 | import dateutil.parser 3 | import dateutil.tz 4 | from django.utils import simplejson 5 | from django.utils.encoding import force_unicode 6 | from django.conf import settings 7 | from jellyroll.providers.utils.anyetree import etree 8 | 9 | DEFAULT_HTTP_HEADERS = { 10 | "User-Agent" : "Jellyroll/1.0 (http://github.com/jacobian/jellyroll/tree/master)" 11 | } 12 | 13 | # 14 | # URL fetching sugar 15 | # 16 | 17 | def getxml(url, **kwargs): 18 | """Fetch and parse some XML. Returns an ElementTree""" 19 | xml = fetch_resource(url, **kwargs) 20 | return etree.fromstring(xml) 21 | 22 | def getjson(url, **kwargs): 23 | """Fetch and parse some JSON. Returns the deserialized JSON.""" 24 | json = fetch_resource(url, **kwargs) 25 | return simplejson.loads(json) 26 | 27 | def fetch_resource(url, method="GET", body=None, username=None, password=None, headers=None): 28 | h = httplib2.Http(timeout=15) 29 | h.force_exception_to_status_code = True 30 | 31 | if username is not None or password is not None: 32 | h.add_credentials(username, password) 33 | 34 | if headers is None: 35 | headers = DEFAULT_HTTP_HEADERS.copy() 36 | 37 | response, content = h.request(url, method, body, headers) 38 | return content 39 | 40 | # 41 | # Date handling utils 42 | # 43 | 44 | def parsedate(s): 45 | """ 46 | Convert a string into a (local, naive) datetime object. 47 | """ 48 | dt = dateutil.parser.parse(s) 49 | if dt.tzinfo: 50 | dt = dt.astimezone(dateutil.tz.tzlocal()).replace(tzinfo=None) 51 | return dt 52 | 53 | def safeint(s): 54 | """Always returns an int. Returns 0 on failure.""" 55 | try: 56 | return int(force_unicode(s)) 57 | except (ValueError, TypeError): 58 | return 0 59 | 60 | 61 | JELLYROLL_ADJUST_DATETIME = False 62 | if hasattr(settings,'JELLYROLL_ADJUST_DATETIME'): 63 | JELLYROLL_ADJUST_DATETIME = settings.JELLYROLL_ADJUST_DATETIME 64 | 65 | if JELLYROLL_ADJUST_DATETIME: 66 | try: 67 | import pytz 68 | except ImportError: 69 | import logging 70 | log = logging.getLogger('jellyroll.providers.utils') 71 | log.error("Cannot import pytz package and consequently, all datetime objects will be naive. " 72 | "In this particular case, e.g., all commit dates will be expressed in UTC.") 73 | 74 | import datetime 75 | import time 76 | 77 | UTC = pytz.timezone('UTC') 78 | LOCAL = pytz.timezone(settings.TIME_ZONE) 79 | 80 | def utc_to_local_datetime(dt): 81 | """ 82 | Map datetime as UTC object to it's localtime counterpart. 83 | """ 84 | return dt.astimezone(LOCAL) 85 | 86 | def utc_to_local_timestamp(ts, orig_tz=UTC): 87 | """ 88 | Convert a timestamp object into a tz-aware datetime object. 89 | """ 90 | timestamp = datetime.datetime.fromtimestamp(ts,tz=orig_tz) 91 | return timestamp.astimezone(LOCAL) 92 | 93 | def utc_to_local_timestruct(ts, orig_tz=UTC): 94 | """ 95 | Convert a timestruct object into a tz-aware datetime object. 96 | """ 97 | return utc_to_local_timestamp(time.mktime(ts),orig_tz) 98 | -------------------------------------------------------------------------------- /src/jellyroll/providers/utils/anyetree.py: -------------------------------------------------------------------------------- 1 | """ 2 | Get an Etree library. Usage:: 3 | 4 | >>> from anyetree import etree 5 | 6 | Returns some etree library. Looks for (in order of decreasing preference): 7 | 8 | * ``lxml.etree`` (http://cheeseshop.python.org/pypi/lxml/) 9 | * ``xml.etree.cElementTree`` (built into Python 2.5) 10 | * ``cElementTree`` (http://effbot.org/zone/celementtree.htm) 11 | * ``xml.etree.ElementTree`` (built into Python 2.5) 12 | * ``elementree.ElementTree (http://effbot.org/zone/element-index.htm) 13 | """ 14 | 15 | __all__ = ['etree'] 16 | 17 | SEARCH_PATHS = [ 18 | "lxml.etree", 19 | "xml.etree.cElementTree", 20 | "cElementTree", 21 | "xml.etree.ElementTree", 22 | "elementtree.ElementTree", 23 | ] 24 | 25 | etree = None 26 | 27 | for name in SEARCH_PATHS: 28 | try: 29 | etree = __import__(name, '', '', ['']) 30 | break 31 | except ImportError: 32 | continue 33 | 34 | if etree is None: 35 | raise ImportError("No suitable ElementTree implementation found.") -------------------------------------------------------------------------------- /src/jellyroll/providers/youtube.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import feedparser 4 | from django.conf import settings 5 | from django.db import transaction 6 | from django.utils.encoding import smart_unicode, smart_str 7 | from django.utils.encoding import DjangoUnicodeDecodeError 8 | from jellyroll.models import Item, VideoSource, Video 9 | from jellyroll.providers import utils 10 | 11 | TAG_SCHEME = 'http://gdata.youtube.com/schemas/2007/keywords.cat' 12 | FEED_URL = 'http://gdata.youtube.com/feeds/api/users/%s/favorites?v=2&start-index=%s&max-results=%s' 13 | 14 | log = logging.getLogger("jellyroll.providers.youtube") 15 | 16 | # 17 | # Public API 18 | # 19 | def enabled(): 20 | ok = hasattr(settings, "YOUTUBE_USERNAME") 21 | if not ok: 22 | log.warn('The Youtube provider is not available because the ' 23 | 'YOUTUBE_USERNAME settings is undefined undefined.') 24 | return ok 25 | 26 | def update(): 27 | start_index = 1 28 | max_results = 50 29 | while True: 30 | log.debug("Fetching videos %s - %s" % (start_index, start_index+max_results-1)) 31 | feed = feedparser.parse(FEED_URL % (settings.YOUTUBE_USERNAME, start_index, max_results)) 32 | for entry in feed.entries: 33 | if 'link' in entry: 34 | url = entry.link 35 | elif 'yt_videoid' in entry: 36 | url = 'http://www.youtube.com/watch?v=%s' % entry.yt_videoid 37 | else: 38 | log.error("Video '%s' appears to have no link" % (entry.tite)) 39 | continue 40 | 41 | _handle_video( 42 | title = entry.title, 43 | url = url, 44 | tags = " ".join(t['term'] for t in entry.tags if t['scheme'] == TAG_SCHEME), 45 | timestamp = datetime.datetime(*entry.published_parsed[:6]), 46 | ) 47 | if len(feed.entries) < max_results: 48 | log.debug("Ran out of results; finishing.") 49 | break 50 | 51 | start_index += max_results 52 | # 53 | # Private API 54 | # 55 | 56 | @transaction.commit_on_success 57 | def _handle_video(title, url, tags, timestamp): 58 | log.debug("Handling video: %s" % smart_str(title)) 59 | source = VideoSource.objects.get(name="YouTube") 60 | 61 | # For some strange reason sometimes the YouTube API returns 62 | # corrupted titles... 63 | try: 64 | title = smart_unicode(title) 65 | except DjangoUnicodeDecodeError: 66 | return 67 | 68 | vid, created = Video.objects.get_or_create( 69 | url = url, 70 | defaults = { 71 | 'title': title, 72 | 'source': source 73 | } 74 | ) 75 | if created: 76 | return Item.objects.create_or_update( 77 | instance = vid, 78 | timestamp = timestamp, 79 | tags = tags, 80 | source = __name__, 81 | ) 82 | -------------------------------------------------------------------------------- /src/jellyroll/templates/jellyroll/base.html: -------------------------------------------------------------------------------- 1 | {# Dummy Jellyroll base template so that tests will work #} 2 | 3 | 4 | 5 | 6 | {% block title %}Jellyroll{% endblock %} 7 | 8 | 9 | {% block content %}{% endblock %} 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/jellyroll/templates/jellyroll/calendar/day.html: -------------------------------------------------------------------------------- 1 | {% extends "jellyroll/base.html" %} 2 | 3 | {# Example template for a the calendar day view #} 4 | 5 | {% block title %}Items in {{ day|date:"j F Y" }}{% endblock %} 6 | 7 | {% block content %} 8 |

Items {% if is_today %}today{% else %}on {{ day|date:"j F Y" }}{% endif %}

9 |

10 | {% if previous %}← {{ previous|date:"j F Y" }}{% endif %} 11 | {% if next %}{{ next|date:"j F Y" }} →{% endif %} 12 |

13 |
    14 | {% for item in items %} 15 |
  • {{ item }}
  • 16 | {% endfor %} 17 |
18 | {% endblock %} -------------------------------------------------------------------------------- /src/jellyroll/templates/jellyroll/calendar/month.html: -------------------------------------------------------------------------------- 1 | {% extends "jellyroll/base.html" %} 2 | 3 | {# Example template for a the calendar month view #} 4 | 5 | {% block title %}Items in {{ month|date:"F Y" }}{% endblock %} 6 | 7 | {% block content %} 8 |

Items in {{ month|date:"F Y" }}

9 |

10 | {% if previous %}← {{ previous|date:"F Y" }}{% endif %} 11 | {% if next %}{{ next|date:"F Y" }} →{% endif %} 12 |

13 |
    14 | {% for item in items %} 15 |
  • {{ item.timestamp|date:"F j" }}: {{ item }}
  • 16 | {% endfor %} 17 |
18 | {% endblock %} -------------------------------------------------------------------------------- /src/jellyroll/templates/jellyroll/calendar/today.html: -------------------------------------------------------------------------------- 1 | {% extends "jellyroll/calendar/day.html" %} 2 | 3 | {# Example template for a the calendar "today" view #} 4 | 5 | {% block title %}Items for today{% endblock %} -------------------------------------------------------------------------------- /src/jellyroll/templates/jellyroll/calendar/year.html: -------------------------------------------------------------------------------- 1 | {% extends "jellyroll/base.html" %} 2 | 3 | {# Example template for a the calendar year view #} 4 | 5 | {% block title %}Items in {{ year }}{% endblock %} 6 | 7 | {% block content %} 8 |

Items in {{ year }}

9 |

10 | {% if previous %}← {{ previous }}{% endif %} 11 | {% if next %}{{ next }} →{% endif %} 12 |

13 |
    14 | {% for item in items %} 15 |
  • {{ item.timestamp|date:"F j" }}: {{ item }}
  • 16 | {% endfor %} 17 |
18 | {% endblock %} -------------------------------------------------------------------------------- /src/jellyroll/templates/jellyroll/snippets/item.html: -------------------------------------------------------------------------------- 1 |
2 |

3 | {% if item.url %} 4 | {{ object }} 5 | {% else %} 6 | {{ object }} 7 | {% endif %} 8 | {% if object.permalink %} 9 | # 10 | {% endif %} 11 |

12 |

{{ item.timestamp|date:"N jS, Y, P" }}

13 |

{{ item.tags }}

14 |
-------------------------------------------------------------------------------- /src/jellyroll/templates/jellyroll/snippets/item.txt: -------------------------------------------------------------------------------- 1 | {{ object }} -------------------------------------------------------------------------------- /src/jellyroll/templatetags/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobian/jellyroll/02751b3108b6f6ae732a801d42ca3c85cc759978/src/jellyroll/templatetags/__init__.py -------------------------------------------------------------------------------- /src/jellyroll/templatetags/jellyroll.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import dateutil.parser 3 | import urllib 4 | from django import template 5 | from django.db import models 6 | from django.template.loader import render_to_string 7 | from django.contrib.contenttypes.models import ContentType 8 | 9 | try: 10 | from collections import defaultdict 11 | except ImportError: 12 | defaultdict = None 13 | 14 | 15 | # Hack until relative imports 16 | Item = models.get_model("jellyroll", "item") 17 | 18 | register = template.Library() 19 | 20 | def jellyrender(parser, token): 21 | """ 22 | Render a jellyroll ``Item`` by passing it through a snippet template. 23 | 24 | :: 25 | 26 | {% jellyrender [using