├── qsstats ├── models.py ├── tests.py └── __init__.py ├── setup.py ├── LICENSE └── README.rst /qsstats/models.py: -------------------------------------------------------------------------------- 1 | # Hello, testrunner! 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from distutils.core import setup 3 | 4 | setup( 5 | name='django-qsstats', 6 | version='0.3.1', 7 | description='A django microframework that eases the generation of aggregate data for querysets.', 8 | author='Matt Croydon', 9 | author_email='mcroydon@gmail.com', 10 | url='http://github.com/mcroydon/django-qsstats/', 11 | packages=['qsstats'], 12 | requires=['dateutil(>=1.4.1)'], 13 | ) 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, Matt Croydon 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the tastypie nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL MATT CROYDON BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /qsstats/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | from django.contrib.auth.models import User 3 | from qsstats import QuerySetStats, InvalidInterval, DateFieldMissing, QuerySetMissing 4 | import datetime 5 | 6 | class QuerySetStatsTestCase(TestCase): 7 | def test_basic_today(self): 8 | # We'll be making sure that this user is found 9 | u1 = User.objects.create_user('u1', 'u1@example.com') 10 | # And that this user is not 11 | u2 = User.objects.create_user('u2', 'u2@example.com') 12 | u2.is_active = False 13 | u2.save() 14 | 15 | # Create a QuerySet and QuerySetStats 16 | qs = User.objects.filter(is_active=True) 17 | qss = QuerySetStats(qs, 'date_joined') 18 | 19 | # We should only see a single user 20 | self.assertEqual(qss.this_day(), 1) 21 | 22 | def test_time_series(self): 23 | today = datetime.date.today() 24 | seven_days_ago = today - datetime.timedelta(days=7) 25 | for j in range(1,8): 26 | for i in range(0,j): 27 | u = User.objects.create_user('p-%s-%s' % (j, i), 'p%s-%s@example.com' % (j, i)) 28 | u.date_joined = today - datetime.timedelta(days=i) 29 | u.save() 30 | qs = User.objects.all() 31 | qss = QuerySetStats(qs, 'date_joined') 32 | time_series = qss.time_series(seven_days_ago, today) 33 | self.assertEqual([t[1] for t in time_series], [0, 1, 2, 3, 4, 5, 6]) 34 | 35 | def test_until(self): 36 | today = datetime.date.today() 37 | yesterday = today - datetime.timedelta(days=1) 38 | now = datetime.datetime.now() 39 | 40 | u = User.objects.create_user('u', 'u@example.com') 41 | u.date_joined = today 42 | u.save() 43 | 44 | qs = User.objects.all() 45 | qss = QuerySetStats(qs, 'date_joined') 46 | 47 | self.assertEqual(qss.until(now), 1) 48 | self.assertEqual(qss.until(today), 1) 49 | self.assertEqual(qss.until(yesterday), 0) 50 | self.assertEqual(qss.until_now(), 1) 51 | 52 | def test_after(self): 53 | today = datetime.date.today() 54 | tomorrow = today + datetime.timedelta(days=1) 55 | now = datetime.datetime.now() 56 | 57 | u = User.objects.create_user('u', 'u@example.com') 58 | u.date_joined = today 59 | u.save() 60 | 61 | qs = User.objects.all() 62 | qss = QuerySetStats(qs, 'date_joined') 63 | 64 | self.assertEqual(qss.after(today), 1) 65 | self.assertEqual(qss.after(now), 0) 66 | u.date_joined=tomorrow 67 | u.save() 68 | self.assertEqual(qss.after(now), 1) 69 | 70 | # MC_TODO: aggregate_field tests 71 | 72 | def test_query_set_missing(self): 73 | qss = QuerySetStats(date_field='foo') 74 | for method in ['this_day', 'this_month', 'this_year']: 75 | self.assertRaises(QuerySetMissing, getattr(qss, method)) 76 | 77 | def test_date_field_missing(self): 78 | qss = QuerySetStats(User.objects.all()) 79 | for method in ['this_day', 'this_month', 'this_year']: 80 | self.assertRaises(DateFieldMissing, getattr(qss, method)) 81 | 82 | def test_invalid_interval(self): 83 | qss = QuerySetStats(User.objects.all(), 'date_joined') 84 | def _invalid(): 85 | qss.time_series(qss.today, qss.today, interval='monkeys') 86 | self.assertRaises(InvalidInterval, _invalid) 87 | -------------------------------------------------------------------------------- /qsstats/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Matt Croydon' 2 | __version__ = (0, 3, 1) 3 | 4 | from dateutil.relativedelta import relativedelta 5 | from django.conf import settings 6 | from django.db.models import Count 7 | import datetime 8 | 9 | class InvalidInterval(Exception): 10 | pass 11 | 12 | class InvalidOperator(Exception): 13 | pass 14 | 15 | class DateFieldMissing(Exception): 16 | pass 17 | 18 | class QuerySetMissing(Exception): 19 | pass 20 | 21 | class QuerySetStats(object): 22 | """ 23 | Generates statistics about a queryset using Django aggregates. QuerySetStats 24 | is able to handle snapshots of data (for example this day, week, month, or 25 | year) or generate time series data suitable for graphing. 26 | """ 27 | def __init__(self, qs=None, date_field=None, aggregate_field=None, aggregate_class=None, operator=None): 28 | self.qs = qs 29 | self.date_field = date_field 30 | self.aggregate_field = aggregate_field or getattr(settings, 'QUERYSETSTATS_DEFAULT_AGGREGATE_FIELD', 'id') 31 | self.aggregate_class = aggregate_class or getattr(settings, 'QUERYSETSTATS_DEFAULT_AGGREGATE_CLASS', Count) 32 | self.operator = operator or getattr(settings, 'QUERYSETSTATS_DEFAULT_OPERATOR', 'lte') 33 | 34 | # MC_TODO: Danger in caching this? 35 | self.update_today() 36 | 37 | # Aggregates for a specific period of time 38 | 39 | def for_day(self, dt, date_field=None, aggregate_field=None, aggregate_class=None): 40 | date_field = date_field or self.date_field 41 | aggregate_class = aggregate_class or self.aggregate_class 42 | aggregate_field = aggregate_field or self.aggregate_field 43 | 44 | self.check_date_field(date_field) 45 | self.check_qs() 46 | 47 | kwargs = { 48 | '%s__year' % date_field : dt.year, 49 | '%s__month' % date_field : dt.month, 50 | '%s__day' % date_field : dt.day, 51 | } 52 | agg = self.qs.filter(**kwargs).aggregate(agg=aggregate_class(aggregate_field)) 53 | return agg['agg'] 54 | 55 | def this_day(self, date_field=None, aggregate_field=None, aggregate_class=None): 56 | return self.for_day(self.today, date_field, aggregate_field, aggregate_class) 57 | 58 | def for_month(self, dt, date_field=None, aggregate_field=None, aggregate_class=None): 59 | date_field = date_field or self.date_field 60 | aggregate_class = aggregate_class or self.aggregate_class 61 | aggregate_field = aggregate_field or self.aggregate_field 62 | 63 | self.check_date_field(date_field) 64 | self.check_qs() 65 | 66 | first_day = datetime.date(year=dt.year, month=dt.month, day=1) 67 | last_day = first_day + relativedelta(day=31) 68 | return self.get_aggregate(first_day, last_day, date_field, aggregate_field, aggregate_class) 69 | 70 | def this_month(self, date_field=None, aggregate_field=None, aggregate_class=None): 71 | return self.for_month(self.today, date_field, aggregate_class) 72 | 73 | def for_year(self, dt, date_field=None, aggregate_field=None, aggregate_class=None): 74 | date_field = date_field or self.date_field 75 | aggregate_class = aggregate_class or self.aggregate_class 76 | aggregate_field = aggregate_field or self.aggregate_field 77 | 78 | self.check_date_field(date_field) 79 | self.check_qs() 80 | 81 | first_day = datetime.date(year=dt.year, month=1, day=1) 82 | last_day = datetime.date(year=dt.year, month=12, day=31) 83 | return self.get_aggregate(first_day, last_day, date_field, aggregate_field, aggregate_class) 84 | 85 | def this_year(self, date_field=None, aggregate_field=None, aggregate_class=None): 86 | return self.for_year(self.today, date_field, aggregate_field, aggregate_class) 87 | 88 | # Aggregate over time intervals 89 | 90 | def time_series(self, start_date, end_date, interval='days', date_field=None, aggregate_field=None, aggregate_class=None): 91 | if interval not in ('years', 'months', 'weeks', 'days'): 92 | raise InvalidInterval('Inverval not supported.') 93 | 94 | date_field = date_field or self.date_field 95 | aggregate_class = aggregate_class or self.aggregate_class 96 | aggregate_field = aggregate_field or self.aggregate_field 97 | 98 | self.check_date_field(date_field) 99 | self.check_qs() 100 | 101 | stat_list = [] 102 | dt = start_date 103 | while dt < end_date: 104 | # MC_TODO: Less hacky way of doing this? 105 | method = getattr(self, 'for_%s' % interval.rstrip('s')) 106 | stat_list.append((dt, method(dt, date_field=date_field, aggregate_field=aggregate_field, aggregate_class=aggregate_class))) 107 | dt = dt + relativedelta(**{interval : 1}) 108 | return stat_list 109 | 110 | # Aggregate totals using a date or datetime as a pivot 111 | 112 | def until(self, dt, date_field=None, aggregate_field=None, aggregate_class=None): 113 | return self.pivot(dt, 'lte', date_field, aggregate_field, aggregate_class) 114 | 115 | def until_now(self, date_field=None, aggregate_field=None, aggregate_class=None): 116 | return self.pivot(datetime.datetime.now(), 'lte', date_field, aggregate_field, aggregate_class) 117 | 118 | def after(self, dt, date_field=None, aggregate_field=None, aggregate_class=None): 119 | return self.pivot(dt, 'gte', date_field, aggregate_field, aggregate_class) 120 | 121 | def after_now(self, date_field=None, aggregate_field=None, aggregate_class=None): 122 | return self.pivot(datetime.datetime.now(), 'gte', date_field, aggregate_field, aggregate_class) 123 | 124 | def pivot(self, dt, operator=None, date_field=None, aggregate_field=None, aggregate_class=None): 125 | date_field = date_field or self.date_field 126 | aggregate_class = aggregate_class or self.aggregate_class 127 | aggregate_field = aggregate_field or self.aggregate_field 128 | operator = operator or self.operator 129 | 130 | self.check_date_field(date_field) 131 | self.check_qs() 132 | if operator not in ['lt', 'lte', 'gt', 'gte']: 133 | raise InvalidOperator("Please provide a valid operator.") 134 | 135 | kwargs = {'%s__%s' % (date_field, operator) : dt} 136 | 137 | total = self.qs.filter(**kwargs).aggregate(total=aggregate_class(aggregate_field)) 138 | return total['total'] 139 | 140 | # Utility functions 141 | def update_today(self): 142 | self.today = datetime.date.today() 143 | 144 | def get_aggregate(self, first_day, last_day, date_field, aggregate_field, aggregate_class): 145 | kwargs = {'%s__range' % date_field : (first_day, last_day)} 146 | agg = self.qs.filter(**kwargs).aggregate(agg=aggregate_class(aggregate_field)) 147 | return agg['agg'] 148 | 149 | def check_date_field(self, date_field): 150 | if not date_field: 151 | raise DateFieldMissing("Please provide a date_field.") 152 | 153 | def check_qs(self): 154 | if not self.qs: 155 | raise QuerySetMissing("Please provide a queryset.") 156 | 157 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ============================================== 2 | django-qsstats: QuerySet statistics for Django 3 | ============================================== 4 | 5 | The goal of django-qsstats is to be a microframework to make 6 | repetitive tasks such as generating aggregate statistics of querysets 7 | over time easier. It's probably overkill for the task at hand, but yay 8 | microframeworks! 9 | 10 | Requirements 11 | ============ 12 | 13 | * `python-dateutil `_ 14 | * `django `_ 1.1+ 15 | 16 | License 17 | ======= 18 | 19 | Liensed under a BSD-style license. 20 | 21 | Examples 22 | ======== 23 | 24 | How many users signed up today? this month? this year? 25 | ------------------------------------------------------ 26 | 27 | :: 28 | 29 | from django.contrib.auth import User 30 | import qsstats 31 | 32 | qs = User.objects.all() 33 | qss = qsstats.QuerySetStats(qs, 'date_joined') 34 | 35 | print '%s new accounts today.' % qss.this_day() 36 | print '%s new accounts this month.' % qss.this_month() 37 | print '%s new accounts this year.' % qss.this_year() 38 | print '%s new accounts until now.' % qss.until_now() 39 | 40 | This might print something like:: 41 | 42 | 5 new accounts today. 43 | 27 new accounts this month. 44 | 377 new accounts this year. 45 | 409 new accounts until now. 46 | 47 | Aggregating time-series data suitable for graphing 48 | -------------------------------------------------- 49 | 50 | :: 51 | 52 | from django.contrib.auth import User 53 | import datetime, qsstats 54 | 55 | qs = User.objects.all() 56 | qss = qsstats.QuerySetStats(qs, 'date_joined') 57 | 58 | today = datetime.date.today() 59 | seven_days_ago = today - datetime.timedelta(days=7) 60 | 61 | time_series = qss.time_series(seven_days_ago, today) 62 | print 'New users in the last 7 days: %s' % [t[1] for t in time_series] 63 | 64 | This might print something like:: 65 | 66 | New users in the last 7 days: [3, 10, 7, 4, 12, 9, 11] 67 | 68 | Please see qsstats/tests.py for similar usage examples. 69 | 70 | API 71 | === 72 | 73 | The ``QuerySetStats`` object 74 | ---------------------------- 75 | 76 | In order to provide maximum flexibility, the ``QuerySetStats`` object 77 | can be instantiated with as little or as much information as you like. 78 | All keword arguments are optional but ``DateFieldMissing`` and 79 | ``QuerySetMissing`` will be raised if you try to use ``QuerySetStats`` 80 | without providing enough information. 81 | 82 | ``qs`` 83 | The queryset to operate on. 84 | 85 | Default: ``None`` 86 | 87 | ``date_field`` 88 | The date field within the queryset to use. 89 | 90 | Default: ``None`` 91 | 92 | ``aggregate_field`` 93 | The field to use for aggregate data. Can be set system-wide with 94 | the setting ``QUERYSETSTATS_DEFAULT_AGGREGATE_FIELD`` or set when 95 | instantiating or calling one of the methods. 96 | 97 | Default: ``'id'`` 98 | 99 | ``aggregate_class`` 100 | The aggregate class to be called during aggregation operations. Can 101 | be set system-wide with the setting ``QUERYSETSTATS_DEFAULT_AGGREGATE_CLASS`` 102 | or set when instantiating or calling one of the methods. 103 | 104 | Default: ``Count`` 105 | 106 | ``operator`` 107 | The default operator to use for the ``pivot`` function. Can be set 108 | system-wide with the setting ``QUERYSETSTATS_DEFAULT_OPERATOR`` or 109 | set when calling ``pivot``. 110 | 111 | Default: ``'lte'`` 112 | 113 | 114 | All of the documented methods take a standard set of keyword arguments that override any information already stored within the ``QuerySetStats`` object. These keyword arguments are ``date_field``, ``aggregate_field``, ``aggregate_class``. 115 | 116 | Once you have a ``QuerySetStats`` object instantiated, you can receive a single aggregate result by using the following methods: 117 | 118 | ``for_day`` 119 | Positional arguments: ``dt``, a ``datetime.datetime`` or ``datetime.date`` object 120 | to filter the queryset to this day. 121 | 122 | ``this_day`` 123 | A wrapper around ``for_day`` that provides aggregate information for ``datetime.date.today()``. It takes no positional arguments. 124 | 125 | ``for_month`` 126 | Positional arguments: ``dt``, a ``datetime.datetime`` or ``datetime.date`` object to filter the queryset to this month. 127 | 128 | ``this_month`` 129 | A wrapper around ``for_month`` that uses ``dateutil.relativedelta`` to provide aggregate information for this current month. 130 | 131 | ``for_year`` 132 | Positional arguments: ``dt``, a ``datetime.datetime`` or ``datetime.date`` object to filter the queryset to this year. 133 | 134 | ``this_year`` 135 | A wrapper around ``for_year`` that uses ``dateutil.relativedelta`` to provide aggregate information for this current year. 136 | 137 | ``QuerySetStats`` also provides a method for returning aggregated 138 | time-series data which may be extremely using in plotting data: 139 | 140 | ``time_series`` 141 | Positional arguments: ``start_date`` and ``end_date``, each a ``datetime.date`` or ``datetime.datetime`` object used in marking the start and stop of the time series data. 142 | 143 | Keyword arguments: In addition to the standard ``date_field``, 144 | ``aggregate_field``, and ``aggregate_class`` keyword argument, 145 | ``time_series`` takes an optional ``interval`` keyword argument 146 | used to mark which interval to use while calculating aggregate 147 | data between ``start_date`` and ``end_date``. This argument 148 | defaults to ``'days'`` and can accept ``'years'``, ``'months'``, 149 | ``'weeks'``, or ``'days'``. It will raise ``InvalidInterval`` 150 | otherwise. 151 | 152 | This methods returns a list of tuples. The first item in each 153 | tuple is a ``datetime.date`` object for the current inverval. The 154 | second item is the result of the aggregate operation. For 155 | example:: 156 | 157 | [(datetime.date(2010, 3, 28), 12), (datetime.date(2010, 3, 29), 0), ...] 158 | 159 | Formatting of date information is left as an exercise to the user and may 160 | vary depending on interval used. 161 | 162 | ``until`` 163 | Provide aggregate information until a given date or time, filtering the 164 | queryset using ``lte``. 165 | 166 | Positional arguments: ``dt`` a ``datetime.date`` or ``datetime.datetime`` 167 | object to be used for filtering the queryset since. 168 | 169 | Keyword arguments: ``date_field``, ``aggregate_field``, ``aggregate_class``. 170 | 171 | ``until_now`` 172 | Aggregate information until now. 173 | 174 | Positional arguments: ``dt`` a ``datetime.date`` or ``datetime.datetime`` 175 | object to be used for filtering the queryset since (using ``lte``). 176 | 177 | Keyword arguments: ``date_field``, ``aggregate_field``, ``aggregate_class``. 178 | 179 | ``after`` 180 | Aggregate information after a given date or time, filtering the queryset 181 | using ``gte``. 182 | 183 | Positional arguments: ``dt`` a ``datetime.date`` or ``datetime.datetime`` 184 | object to be used for filtering the queryset since. 185 | 186 | Keyword arguments: ``date_field``, ``aggregate_field``, ``aggregate_class``. 187 | 188 | ``after_now`` 189 | Aggregate information after now. 190 | 191 | Positional arguments: ``dt`` a ``datetime.date`` or ``datetime.datetime`` 192 | object to be used for filtering the queryset since (using ``gte``). 193 | 194 | Keyword arguments: ``date_field``, ``aggregate_field``, ``aggregate_class``. 195 | 196 | ``pivot`` 197 | Used by ``since``, ``after``, and ``until_now`` but potentially useful if 198 | you would like to specify your own operator instead of the defaults. 199 | 200 | Positional arguments: ``dt`` a ``datetime.date`` or ``datetime.datetime`` 201 | object to be used for filtering the queryset since (using ``lte``). 202 | 203 | Keyword arguments: ``operator``, ``date_field``, ``aggregate_field``, ``aggregate_class``. 204 | 205 | Raises ``InvalidOperator`` if the operator provided is not one of ``'lt'``, 206 | ``'lte'``, ``gt`` or ``gte``. 207 | 208 | Testing 209 | ======= 210 | 211 | If you'd like to test ``django-qsstats`` against your local configuration, add 212 | ``qsstats`` to your ``INSTALLED_APPS`` and run ``./manage.py test qsstats``. The test suite assumes that ``django.contrib.auth`` is installed. 213 | 214 | TODO 215 | ==== 216 | 217 | * There's a bunch of boilerplate that I'm sure could be reduced. 218 | * Clearer documentation and usage examples. 219 | * More test coverage. 220 | --------------------------------------------------------------------------------