├── .gitignore ├── LICENSE ├── README.rst ├── dbutils ├── __init__.py ├── helpers.py └── querysets.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | dist/ 3 | *.egg-info 4 | build/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2011 DISQUS 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | (README needs mucho love) 2 | 3 | attach_foreignkey 4 | ================= 5 | 6 | Shortcut method which handles a pythonic LEFT OUTER JOIN. 7 | 8 | :: 9 | 10 | from dbutils.helpers import attach_foreignkey 11 | 12 | qs = list(Model.objects.all()) 13 | 14 | attach_foreignkey(qs, Model.author) 15 | 16 | queryset_to_dict 17 | ================ 18 | 19 | Shortcut method which stores a group of results as a dictionary 20 | by the key you specify (or primary key by default). 21 | 22 | :: 23 | 24 | from dbutils.helpers import queryset_to_dict 25 | 26 | qs = Model.objects.all() 27 | 28 | queryset_to_dict(qs, 'author_id') 29 | 30 | 31 | SkinnyQuerySet 32 | ============== 33 | 34 | A QuerySet which eliminates the in-memory result cache. 35 | 36 | :: 37 | 38 | from dbutils.querysets import SkinnyQuerySet 39 | 40 | for foo in SkinnyQuerySet(Model): 41 | print foo 42 | 43 | 44 | RangeQuerySet 45 | ============= 46 | 47 | (See also: RangeQuerySetWrapper) 48 | 49 | Iterates through a result set using MIN/MAX on primary key and stepping through. 50 | 51 | Very efficient, but ORDER BY statements will not work. 52 | 53 | :: 54 | 55 | from dbutils.querysets import RangeQuerySet 56 | 57 | for foo in RangeQuerySet(Model): 58 | print foo 59 | 60 | 61 | IterableQuerySetWrapper 62 | ======================= 63 | 64 | Iterates through a QuerySet using limit and offset. 65 | 66 | For efficiency use ``RangeQuerySetWrapper``. 67 | 68 | :: 69 | 70 | from dbutils.querysets import IterableQuerySetWrapper 71 | 72 | for foo in IterableQuerySetWrapper(Model.objects.all()): 73 | print foo -------------------------------------------------------------------------------- /dbutils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/disqus/django-db-utils/b4b6872804d297cf3485ec8ca7cf97068e7c344e/dbutils/__init__.py -------------------------------------------------------------------------------- /dbutils/helpers.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | def queryset_to_dict(qs, key='pk', singular=True): 5 | """ 6 | Given a queryset will transform it into a dictionary based on ``key``. 7 | """ 8 | if singular: 9 | result = {} 10 | for u in qs: 11 | result.setdefault(getattr(u, key), u) 12 | else: 13 | result = defaultdict(list) 14 | for u in qs: 15 | result[getattr(u, key)].append(u) 16 | return result 17 | 18 | 19 | def distinct(l): 20 | """ 21 | Given an iterable will return a list of all distinct values. 22 | """ 23 | return list(set(l)) 24 | 25 | 26 | from django.db.models.fields.related import SingleRelatedObjectDescriptor 27 | 28 | 29 | def attach_foreignkey(objects, field, related=[], database='default'): 30 | """ 31 | Shortcut method which handles a pythonic LEFT OUTER JOIN. 32 | 33 | ``attach_foreignkey(posts, Post.thread)`` 34 | 35 | Works with both ForeignKey and OneToOne (reverse) lookups. 36 | """ 37 | is_foreignkey = isinstance(field, SingleRelatedObjectDescriptor) 38 | 39 | if not is_foreignkey: 40 | field = field.field 41 | accessor = '_%s_cache' % field.name 42 | model = field.rel.to 43 | lookup = 'pk' 44 | column = field.column 45 | key = lookup 46 | else: 47 | accessor = field.cache_name 48 | field = field.related.field 49 | model = field.model 50 | lookup = field.name 51 | column = 'pk' 52 | key = field.column 53 | 54 | # Ensure values are unique, do not contain already present values, and are not missing 55 | # values specified in select_related 56 | values = distinct(getattr(o, column) for o in objects if (related or getattr(o, accessor, False) is False)) 57 | if not values: 58 | return 59 | 60 | qs = model.objects.filter(**{'%s__in' % lookup: values})\ 61 | .using(database) 62 | if related: 63 | qs = qs.select_related(*related) 64 | queryset = queryset_to_dict(qs, key=key) 65 | for o in objects: 66 | setattr(o, accessor, queryset.get(getattr(o, column))) 67 | 68 | 69 | def attach_foreignkeys(*object_sets, **kwargs): 70 | """ 71 | Shortcut method which handles a pythonic LEFT OUTER JOIN. Allows you to attach the same object type 72 | to multiple different sets of data. 73 | 74 | ``attach_foreignkeys((posts, Post.author), (threads, Thread.creator), related=['profile'])`` 75 | 76 | Works with only ForeignKeys 77 | """ 78 | 79 | related = kwargs.get('related', []) 80 | database = kwargs.get('database', 'default') 81 | 82 | values = set() 83 | 84 | model = None 85 | 86 | for objects, field in object_sets: 87 | if not model: 88 | model = field.field.rel.to 89 | elif model != field.field.rel.to: 90 | raise ValueError('You cannot attach foreign keys that do not reference the same models (%s != %s)' % (model, field.field.rel.to)) 91 | # Ensure values are unique, do not contain already present values, and are not missing 92 | # values specified in select_related 93 | values.update(distinct(getattr(o, field.field.column) for o in objects if (related or getattr(o, '_%s_cache' % field.field.name, False) is False))) 94 | 95 | if not values: 96 | return 97 | 98 | qs = model.objects.filter(pk__in=values).using(database) 99 | if related: 100 | qs = qs.select_related(*related) 101 | queryset = queryset_to_dict(qs) 102 | 103 | for objects, field in object_sets: 104 | for o in objects: 105 | setattr(o, '_%s_cache' % field.field.name, queryset.get(getattr(o, field.field.column))) 106 | -------------------------------------------------------------------------------- /dbutils/querysets.py: -------------------------------------------------------------------------------- 1 | from django.db.models.manager import Manager 2 | from django.db.models.query import QuerySet 3 | from django.db.models.fields import AutoField, IntegerField 4 | from django.db.models import Min, Max 5 | 6 | from dbutils.helpers import attach_foreignkey 7 | 8 | 9 | class QuerySetDoubleIteration(Exception): 10 | "A QuerySet was iterated over twice, you probably want to list() it." 11 | pass 12 | 13 | 14 | # "Skinny" here means we use iterator by default, rather than 15 | # ballooning in memory. 16 | class SkinnyManager(Manager): 17 | def get_query_set(self): 18 | return SkinnyQuerySet(self.model, using=self._db) 19 | 20 | 21 | class SkinnyQuerySet(QuerySet): 22 | """ 23 | A QuerySet which eliminates the in-memory result cache. 24 | """ 25 | def __len__(self): 26 | if getattr(self, 'has_run_before', False): 27 | raise TypeError("SkinnyQuerySet doesn't support __len__ after __iter__, if you *only* need a count you should use .count(), if you need to reuse the results you should coerce to a list and then len() that.") 28 | return super(SkinnyQuerySet, self).__len__() 29 | 30 | def __iter__(self): 31 | if self._result_cache is not None: 32 | # __len__ must have been run 33 | return iter(self._result_cache) 34 | 35 | has_run_before = getattr(self, 'has_run_before', False) 36 | if has_run_before: 37 | raise QuerySetDoubleIteration("This SkinnyQuerySet has already been iterated over once, you should assign it to a list if you want to reuse the data.") 38 | self.has_run_before = True 39 | 40 | return self.iterator() 41 | 42 | def list(self): 43 | return list(self) 44 | 45 | 46 | class InvalidQuerySetError(ValueError): 47 | pass 48 | 49 | 50 | class IterableQuerySetWrapper(object): 51 | """ 52 | Iterates through a QuerySet using limit and offset. 53 | 54 | For efficiency use ``RangeQuerySetWrapper``. 55 | """ 56 | def __init__(self, queryset, step=10000, limit=None): 57 | self.limit = limit 58 | if limit: 59 | self.step = min(limit, step) 60 | else: 61 | self.step = step 62 | self.queryset = queryset 63 | 64 | def __iter__(self): 65 | at = 0 66 | 67 | results = list(self.queryset[at:(at + self.step)]) 68 | while results and (not self.limit or at < self.limit): 69 | for result in results: 70 | yield result 71 | at += self.step 72 | results = list(self.queryset[at:(at + self.step)]) 73 | 74 | def iterator(self): 75 | return iter(self) 76 | 77 | 78 | 79 | class RangeQuerySet(SkinnyQuerySet): 80 | """ 81 | See ``RangeQuerySetWrapper`` 82 | """ 83 | def __init__(self, model, step=10000, sorted=False, *args, **kwargs): 84 | super(SkinnyQuerySet, self).__init__(model, *args, **kwargs) 85 | self.step = step 86 | self.sorted = sorted 87 | 88 | def iterator(self, bypass=False): 89 | # Only execute if low mark is 0 90 | if not bypass and self.query.low_mark == 0 and not\ 91 | (self.query.order_by or self.query.extra_order_by): 92 | # Clear the actual limit/offset 93 | high_mark = self.query.high_mark 94 | self.query.clear_limits() 95 | results = RangeQuerySetWrapper(self, step=self.step, limit=high_mark, sorted=self.sorted) 96 | elif not bypass: 97 | results = IterableQuerySetWrapper(self, step=self.step) 98 | else: 99 | results = super(RangeQuerySet, self).iterator() 100 | for result in results: 101 | yield result 102 | 103 | 104 | class RangeQuerySetWrapper(object): 105 | """ 106 | Iterates through a queryset by chunking results by ``step`` and using GREATER THAN 107 | and LESS THAN queries on the primary key. 108 | 109 | Very efficient, but ORDER BY statements will not work. 110 | """ 111 | 112 | def __init__(self, queryset, step=1000, limit=None, min_id=None, max_id=None, sorted=True, 113 | select_related=[], callbacks=[]): 114 | # Support for slicing 115 | if queryset.query.low_mark == 0 and not\ 116 | (queryset.query.order_by or queryset.query.extra_order_by): 117 | if limit is None: 118 | limit = queryset.query.high_mark 119 | queryset.query.clear_limits() 120 | else: 121 | raise InvalidQuerySetError 122 | 123 | self.limit = limit 124 | if limit: 125 | self.step = min(limit, abs(step)) 126 | self.desc = step < 0 127 | else: 128 | self.step = abs(step) 129 | self.desc = step < 0 130 | self.queryset = queryset 131 | self.min_id, self.max_id = min_id, max_id 132 | # if max_id isnt set we sort by default for optimization 133 | self.sorted = sorted or not max_id 134 | self.select_related = select_related 135 | self.callbacks = callbacks 136 | 137 | def __iter__(self): 138 | pk = self.queryset.model._meta.pk 139 | if not isinstance(pk, (IntegerField, AutoField)): 140 | for result in iter(IterableQuerySetWrapper(self.queryset, self.step, self.limit)): 141 | yield result 142 | else: 143 | max_id = self.max_id 144 | if self.min_id is not None: 145 | at = self.min_id 146 | elif not self.sorted: 147 | at = 0 148 | else: 149 | at = None 150 | 151 | num = 0 152 | limit = self.limit or max_id 153 | 154 | if isinstance(self.queryset, RangeQuerySet): 155 | extra_kwargs = {'bypass': True} 156 | else: 157 | extra_kwargs = {} 158 | 159 | has_results = True 160 | while ((max_id and at <= max_id) or has_results) and (not self.limit or num < self.limit): 161 | start = num 162 | 163 | if at is None: 164 | results = self.queryset 165 | elif self.desc: 166 | results = self.queryset.filter(id__lte=at) 167 | elif not self.desc: 168 | results = self.queryset.filter(id__gte=at) 169 | 170 | # Adjust the sort order if we're stepping through reverse 171 | if self.sorted: 172 | if self.desc: 173 | results = results.order_by('-id') 174 | else: 175 | results = results.order_by('id') 176 | 177 | if self.max_id: 178 | results = results.filter(id__lte=max_id) 179 | 180 | results = results[:self.step].iterator(**extra_kwargs) 181 | if self.select_related: 182 | # we have to pull them all into memory to do the select_related 183 | results = list(results) 184 | for fkey in self.select_related: 185 | if '__' in fkey: 186 | fkey, related = fkey.split('__') 187 | else: 188 | related = [] 189 | attach_foreignkey(results, getattr(self.queryset.model, fkey, related)) 190 | 191 | if self.callbacks: 192 | results = list(results) 193 | for callback in self.callbacks: 194 | callback(results) 195 | 196 | for result in results: 197 | yield result 198 | num += 1 199 | at = result.id 200 | if (max_id and result.id >= max_id) or (limit and num >= limit): 201 | break 202 | 203 | if at is None: 204 | break 205 | 206 | has_results = num > start 207 | if self.desc: 208 | at -= 1 209 | else: 210 | at += 1 211 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | try: 4 | from setuptools import setup, find_packages 5 | from setuptools.command.test import test 6 | except ImportError: 7 | from ez_setup import use_setuptools 8 | use_setuptools() 9 | from setuptools import setup, find_packages 10 | from setuptools.command.test import test 11 | 12 | 13 | class mytest(test): 14 | def run(self, *args, **kwargs): 15 | from runtests import runtests 16 | runtests() 17 | 18 | setup( 19 | name='django-db-utils', 20 | version='0.1', 21 | author='DISQUS', 22 | author_email='opensource@disqus.com', 23 | url='http://github.com/disqus/django-db-utils', 24 | description = 'Utilities for Django Databases', 25 | packages=find_packages(), 26 | zip_safe=False, 27 | install_requires=[ 28 | ], 29 | test_suite = 'dbutils.tests', 30 | include_package_data=True, 31 | cmdclass={"test": mytest}, 32 | classifiers=[ 33 | 'Framework :: Django', 34 | 'Intended Audience :: Developers', 35 | 'Intended Audience :: System Administrators', 36 | 'Operating System :: OS Independent', 37 | 'Topic :: Software Development' 38 | ], 39 | ) --------------------------------------------------------------------------------