├── tests ├── __init__.py └── datatools │ ├── __init__.py │ └── query │ ├── __init__.py │ └── range │ ├── __init__.py │ └── tests.py ├── .gitignore ├── datatools ├── __init__.py ├── models.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── dumpdata.py │ │ └── loaddata.py ├── query │ ├── __init__.py │ └── range.py └── utils.py ├── setup.py ├── README.rst ├── runtests.py └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/datatools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/datatools/query/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/datatools/query/range/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg* 3 | /dist 4 | /build -------------------------------------------------------------------------------- /datatools/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools 3 | ~~~~~~~~~ 4 | 5 | :copyright: (c) 2011 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | -------------------------------------------------------------------------------- /datatools/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools.models 3 | ~~~~~~~~~~~~~~~~ 4 | 5 | :copyright: (c) 2011 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | -------------------------------------------------------------------------------- /datatools/management/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools.management 3 | ~~~~~~~~~~~~~~~~~~~~ 4 | 5 | :copyright: (c) 2011 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | -------------------------------------------------------------------------------- /datatools/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools.management.commands 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | :copyright: (c) 2011 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | -------------------------------------------------------------------------------- /datatools/query/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools.query.range 3 | ~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | :copyright: (c) 2011-2012 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | 9 | from datatools.query.range import * 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Hack to prevent stupid "TypeError: 'NoneType' object is not callable" error 2 | # in multiprocessing/util.py _exit_function when running `python 3 | # setup.py test` (see 4 | # http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html) 5 | try: 6 | import multiprocessing 7 | except ImportError: 8 | pass 9 | 10 | from setuptools import setup, find_packages 11 | 12 | tests_require = [ 13 | 'Django>=1.2,<1.4', 14 | 'django-nose', 15 | 'nose', 16 | ] 17 | 18 | tests_require = [ 19 | ] 20 | 21 | setup( 22 | name='django-data-tools', 23 | version='0.1.0', 24 | description='', 25 | author='David Cramer', 26 | author_email='dcramer@gmail.com', 27 | url='https://github.com/dcramer/django-data-tools', 28 | tests_require=tests_require, 29 | test_suite='runtests.runtests', 30 | license='Apache License 2.0', 31 | packages=find_packages(), 32 | zip_safe=False, 33 | # test_suite='runtests.runtests', 34 | include_package_data=True, 35 | ) 36 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | django-data-tools 2 | ================= 3 | 4 | A set of utilities and improvements for managing data (fixtures specifically) in Django. 5 | 6 | Install 7 | ------- 8 | 9 | :: 10 | 11 | INSTALLED_APPS = ( 12 | # ... 13 | 'datatools', 14 | ) 15 | 16 | Commands 17 | -------- 18 | 19 | dumpdata 20 | ~~~~~~~~ 21 | 22 | An improved version of the ``manage.py dumpdata`` command: 23 | 24 | * Adds a --limit option to specify the maximum number of objects per model to fetch. 25 | * Adds a --sort option to specify ascending or descending order for serialization. 26 | * Automatically follows the dependency graph for ForeignKeys and ManyToManyFields. 27 | 28 | :: 29 | 30 | # Retrieve the latest 10000 thread objects with all their required dependencies 31 | python manage.py dumpdata forums.thread --limit=10000 --sort=desc 32 | 33 | Utilities 34 | --------- 35 | 36 | RangeQuerySetWrapper 37 | ~~~~~~~~~~~~~~~~~~~~ 38 | 39 | Efficient iteration over a large collection of database objects, using a standard range 40 | pattern on the primary key. 41 | 42 | :: 43 | 44 | from datatools.query import RangeQuerySetWrapper 45 | 46 | qs = RangeQuerySetWrapper(Model.objects.all(), limit=100000) 47 | for obj in qs: 48 | print "Got %r!" % obj 49 | -------------------------------------------------------------------------------- /runtests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from os.path import dirname, abspath, splitext 4 | from os import listdir 5 | from optparse import OptionParser 6 | 7 | where_am_i = dirname(abspath(__file__)) 8 | 9 | sys.path.insert(0, where_am_i) 10 | 11 | # adding eggs to path 12 | files = listdir(where_am_i) 13 | 14 | for file in files: 15 | name, extension = splitext(file) 16 | if extension == ".egg": 17 | sys.path.insert(0, file) 18 | 19 | 20 | from django.conf import settings 21 | 22 | if not settings.configured: 23 | settings.configure( 24 | DATABASE_ENGINE='sqlite3', 25 | DATABASES={ 26 | 'default': { 27 | 'ENGINE': 'django.db.backends.sqlite3', 28 | 'TEST_NAME': ':memory:', 29 | }, 30 | }, 31 | INSTALLED_APPS=[ 32 | 'django.contrib.contenttypes', 33 | 'django.contrib.auth', 34 | ], 35 | ROOT_URLCONF='', 36 | DEBUG=False, 37 | ) 38 | 39 | from django_nose import NoseTestSuiteRunner 40 | 41 | 42 | def runtests(*test_args, **kwargs): 43 | if not test_args: 44 | test_args = ['tests'] 45 | 46 | kwargs.setdefault('interactive', False) 47 | 48 | test_runner = NoseTestSuiteRunner(**kwargs) 49 | 50 | failures = test_runner.run_tests(test_args) 51 | sys.exit(failures) 52 | 53 | if __name__ == '__main__': 54 | parser = OptionParser() 55 | parser.add_option('--verbosity', dest='verbosity', action='store', default=1, type=int) 56 | parser.add_options(NoseTestSuiteRunner.options) 57 | (options, args) = parser.parse_args() 58 | 59 | runtests(*args, **options.__dict__) 60 | -------------------------------------------------------------------------------- /datatools/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools.utils 3 | ~~~~~~~~~~~~~~~ 4 | 5 | :copyright: (c) 2011-2012 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | 9 | from collections import defaultdict 10 | from django.db.models.fields.related import SingleRelatedObjectDescriptor 11 | 12 | 13 | def distinct(l): 14 | """ 15 | Given an iterable will return a list of all distinct values. 16 | """ 17 | return list(set(l)) 18 | 19 | 20 | def queryset_to_dict(qs, key='pk', singular=True): 21 | """ 22 | Given a queryset will transform it into a dictionary based on ``key``. 23 | """ 24 | if singular: 25 | result = {} 26 | for u in qs: 27 | result.setdefault(getattr(u, key), u) 28 | else: 29 | result = defaultdict(list) 30 | for u in qs: 31 | result[getattr(u, key)].append(u) 32 | return result 33 | 34 | 35 | def attach_foreignkey(objects, field, related=[], database=None): 36 | """ 37 | Shortcut method which handles a pythonic LEFT OUTER JOIN. 38 | 39 | ``attach_foreignkey(posts, Post.thread)`` 40 | 41 | Works with both ForeignKey and OneToOne (reverse) lookups. 42 | """ 43 | 44 | if not objects: 45 | return 46 | 47 | if database is None: 48 | database = list(objects)[0]._state.db 49 | 50 | is_foreignkey = isinstance(field, SingleRelatedObjectDescriptor) 51 | 52 | if not is_foreignkey: 53 | field = field.field 54 | accessor = '_%s_cache' % field.name 55 | model = field.rel.to 56 | lookup = 'pk' 57 | column = field.column 58 | key = lookup 59 | else: 60 | accessor = field.cache_name 61 | field = field.related.field 62 | model = field.model 63 | lookup = field.name 64 | column = 'pk' 65 | key = field.column 66 | 67 | objects = [o for o in objects if (related or getattr(o, accessor, False) is False)] 68 | 69 | if not objects: 70 | return 71 | 72 | # Ensure values are unique, do not contain already present values, and are not missing 73 | # values specified in select_related 74 | values = distinct(filter(None, (getattr(o, column) for o in objects))) 75 | if values: 76 | qs = model.objects.filter(**{'%s__in' % lookup: values}) 77 | if database: 78 | qs = qs.using(database) 79 | if related: 80 | qs = qs.select_related(*related) 81 | 82 | queryset = queryset_to_dict(qs, key=key) 83 | else: 84 | queryset = {} 85 | 86 | for o in objects: 87 | setattr(o, accessor, queryset.get(getattr(o, column))) 88 | -------------------------------------------------------------------------------- /tests/datatools/query/range/tests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from django.contrib.auth.models import User 4 | from django.test import TestCase 5 | from datatools.query.range import RangeQuerySetWrapper 6 | 7 | 8 | class QueryTest(TestCase): 9 | def setUp(self): 10 | for n in xrange(3): 11 | User.objects.create(username=n, email='%s@example.com' % n) 12 | 13 | def test_simple(self): 14 | # number of iterations + 1 for empty result set 15 | seen = set() 16 | with self.assertNumQueries(4): 17 | last = 0 18 | for x, n in enumerate(RangeQuerySetWrapper(User.objects.all(), step=1)): 19 | self.assertTrue(n.id not in seen) 20 | seen.add(n.id) 21 | self.assertTrue(n.id > last) 22 | last = n.id 23 | 24 | def test_stepping(self): 25 | # number of iterations + 1 for empty result set 26 | seen = set() 27 | with self.assertNumQueries(2): 28 | last = 0 29 | for x, n in enumerate(RangeQuerySetWrapper(User.objects.all(), step=3)): 30 | self.assertTrue(n.id not in seen) 31 | seen.add(n.id) 32 | self.assertTrue(n.id > last) 33 | last = n.id 34 | 35 | def test_order_by(self): 36 | # number of iterations + 1 for empty result set 37 | seen = set() 38 | with self.assertNumQueries(2): 39 | last = None 40 | for x, n in enumerate(RangeQuerySetWrapper(User.objects.all(), order_by='date_joined')): 41 | self.assertTrue(n.id not in seen) 42 | seen.add(n.id) 43 | if last: 44 | self.assertTrue(n.date_joined > last) 45 | last = n.date_joined 46 | 47 | def test_with_callbacks(self): 48 | def add_crud(r): 49 | r.crud = r.id 50 | 51 | # number of iterations + 1 for empty result set 52 | seen = set() 53 | with self.assertNumQueries(4): 54 | for x, n in enumerate(RangeQuerySetWrapper(User.objects.all(), step=1, callbacks=[ 55 | lambda x: [add_crud(r) for r in x] 56 | ])): 57 | self.assertEquals(getattr(n, 'crud', None), n.id) 58 | self.assertTrue(n.id not in seen) 59 | seen.add(n.id) 60 | 61 | def test_reverse_no_matches(self): 62 | self.assertEquals(len(list(RangeQuerySetWrapper(User.objects.filter(id=9000), step=-1))), 0) 63 | 64 | def test_reverse(self): 65 | # number of iterations + 1 for empty result set 66 | seen = set() 67 | with self.assertNumQueries(4): 68 | last = sys.maxint 69 | for x, n in enumerate(RangeQuerySetWrapper(User.objects.all(), step=-1)): 70 | self.assertTrue(n.id not in seen) 71 | seen.add(n.id) 72 | self.assertTrue(n.id < last) 73 | last = n.id 74 | 75 | def test_reverse_stepping(self): 76 | # number of iterations + 1 for empty result set 77 | seen = set() 78 | with self.assertNumQueries(2): 79 | last = sys.maxint 80 | for x, n in enumerate(RangeQuerySetWrapper(User.objects.all(), step=-3)): 81 | self.assertTrue(n.id not in seen) 82 | seen.add(n.id) 83 | self.assertTrue(n.id < last) 84 | last = n.id 85 | -------------------------------------------------------------------------------- /datatools/query/range.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools.query.range 3 | ~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | :copyright: (c) 2011-2012 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | 9 | from datatools.utils import attach_foreignkey 10 | 11 | __all__ = ('RangeQuerySetWrapper', 'InvalidQuerySetError') 12 | 13 | 14 | class InvalidQuerySetError(ValueError): 15 | pass 16 | 17 | 18 | class RangeQuerySetWrapper(object): 19 | """ 20 | Iterates through a queryset by chunking results by ``step`` and using GREATER THAN 21 | and LESS THAN queries on the primary key. 22 | """ 23 | 24 | def __init__(self, queryset, step=1000, limit=None, min_id=None, max_id=None, sorted=True, 25 | select_related=[], callbacks=[], order_by='pk'): 26 | # Support for slicing 27 | if queryset.query.low_mark == 0 and not\ 28 | (queryset.query.order_by or queryset.query.extra_order_by): 29 | if limit is None: 30 | limit = queryset.query.high_mark 31 | queryset.query.clear_limits() 32 | else: 33 | raise InvalidQuerySetError 34 | 35 | self.limit = limit 36 | if limit: 37 | self.step = min(limit, abs(step)) 38 | self.desc = step < 0 39 | else: 40 | self.step = abs(step) 41 | self.desc = step < 0 42 | self.queryset = queryset 43 | self.min_value, self.max_value = min_id, max_id 44 | # if max_id isnt set we sort by default for optimization 45 | self.sorted = sorted or not max_id 46 | self.select_related = select_related 47 | self.callbacks = callbacks 48 | self.order_by = order_by 49 | 50 | def __iter__(self): 51 | max_value = self.max_value 52 | if self.min_value is not None: 53 | cur_value = self.min_value 54 | elif not self.sorted: 55 | cur_value = 0 56 | else: 57 | cur_value = None 58 | 59 | num = 0 60 | limit = self.limit 61 | 62 | queryset = self.queryset 63 | if max_value: 64 | queryset = queryset.filter(**{'%s__lte' % self.order_by: max_value}) 65 | # Adjust the sort order if we're stepping through reverse 66 | if self.sorted: 67 | if self.desc: 68 | queryset = queryset.order_by('-%s' % self.order_by) 69 | else: 70 | queryset = queryset.order_by(self.order_by) 71 | 72 | # we implement basic cursor pagination for columns that are not unique 73 | last_value = None 74 | offset = 0 75 | has_results = True 76 | while ((max_value and cur_value <= max_value) or has_results) and (not self.limit or num < self.limit): 77 | start = num 78 | 79 | if cur_value is None: 80 | results = queryset 81 | elif self.desc: 82 | results = queryset.filter(**{'%s__lte' % self.order_by: cur_value}) 83 | elif not self.desc: 84 | results = queryset.filter(**{'%s__gte' % self.order_by: cur_value}) 85 | 86 | results = results[offset:offset + self.step].iterator() 87 | 88 | # hash maps to pull in select_related columns 89 | if self.select_related: 90 | # we have to pull them all into memory to do the select_related 91 | results = list(results) 92 | for fkey in self.select_related: 93 | if '__' in fkey: 94 | fkey, related = fkey.split('__') 95 | else: 96 | related = [] 97 | attach_foreignkey(results, getattr(self.queryset.model, fkey, related)) 98 | 99 | if self.callbacks: 100 | results = list(results) 101 | for callback in self.callbacks: 102 | callback(results) 103 | 104 | for result in results: 105 | yield result 106 | 107 | num += 1 108 | cur_value = getattr(result, self.order_by) 109 | if cur_value == last_value: 110 | offset += 1 111 | else: 112 | # offset needs to be based at 1 so we dont return a row 113 | # that was already selected 114 | last_value = cur_value 115 | offset = 1 116 | 117 | if (max_value and cur_value >= max_value) or (limit and num >= limit): 118 | break 119 | 120 | if cur_value is None: 121 | break 122 | 123 | has_results = num > start 124 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2010 DISQUS 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /datatools/management/commands/dumpdata.py: -------------------------------------------------------------------------------- 1 | """ 2 | datatools.management.commands.dumpdata 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | :copyright: (c) 2011-2012 DISQUS. 6 | :license: Apache License 2.0, see LICENSE for more details. 7 | """ 8 | 9 | from django.core.exceptions import ImproperlyConfigured 10 | from django.core.management.base import BaseCommand, CommandError 11 | from django.core import serializers 12 | from django.db import router, DEFAULT_DB_ALIAS 13 | from django.db.models import ForeignKey 14 | 15 | import itertools 16 | from optparse import make_option 17 | from collections import defaultdict 18 | 19 | 20 | def objects_from_queryset(queryset, using='default'): 21 | """ 22 | Serializes objects from the database. 23 | 24 | Works much like Django's ``manage.py dumpdata``, except that it allows you to 25 | limit and sort the apps that you're pulling in, as well as automatically follow 26 | the dependency graph to pull in related objects. 27 | """ 28 | # Now collate the objects to be serialized. 29 | objects = [] 30 | 31 | if using: 32 | queryset = queryset.using(using) 33 | 34 | results = list(queryset) 35 | if not results: 36 | return [] 37 | 38 | objs_to_check = [results[:]] 39 | while objs_to_check: 40 | i_objs = objs_to_check.pop(0) 41 | i_model = i_objs[0].__class__ 42 | 43 | # Handle O2M dependencies 44 | for field in (f for f in i_model._meta.fields if isinstance(f, ForeignKey)): 45 | qs = field.rel.to._default_manager 46 | if using: 47 | qs = qs.using(using) 48 | i_res = [o for o 49 | in qs.filter(pk__in=[getattr(r, field.column) for r in i_objs]) 50 | if o not in results] 51 | if i_res: 52 | objs_to_check.append(i_res) 53 | results.extend(i_res) 54 | 55 | # Handle M2M dependencies 56 | # TODO: this could be a lot more efficient on the SQL query 57 | for field in i_model._meta.many_to_many: 58 | i_res = [o for o 59 | in itertools.chain(*[getattr(r, field.name).all() for r in i_objs]) 60 | if o not in results] 61 | if i_res: 62 | objs_to_check.append(i_res) 63 | results.extend(i_res) 64 | 65 | for obj in results: 66 | if obj not in objects: 67 | objects.append(obj) 68 | 69 | return objects 70 | 71 | 72 | class Command(BaseCommand): 73 | option_list = BaseCommand.option_list + ( 74 | make_option('--format', default='json', dest='format', 75 | help='Specifies the output serialization format for fixtures.'), 76 | make_option('--indent', default=None, dest='indent', type='int', 77 | help='Specifies the indent level to use when pretty-printing output'), 78 | make_option('--database', action='store', dest='database', 79 | default=DEFAULT_DB_ALIAS, help='Nominates a specific database to load ' 80 | 'fixtures into. Defaults to the "default" database.'), 81 | make_option('-e', '--exclude', dest='exclude', action='append', default=[], 82 | help='App to exclude (use multiple --exclude to exclude multiple apps).'), 83 | make_option('-n', '--natural', action='store_true', dest='use_natural_keys', default=False, 84 | help='Use natural keys if they are available.'), 85 | make_option('-l', '--limit', dest='limit', type='int', default=None, 86 | help='Limit the number of objects per app.'), 87 | make_option('-s', '--sort', dest='sort', default=None, 88 | help='Change the sort order (useful with limit). Defaults to no sorting. Options are \'asc\' and \'desc\''), 89 | ) 90 | help = 'Output the contents of the database as a fixture of the given format.' 91 | args = '[appname appname.ModelName ...]' 92 | 93 | def _get_model_list(self, app_labels, exclude=()): 94 | from django.db.models import get_app, get_apps, get_model, get_models 95 | 96 | excluded_apps = set(get_app(app_label) for app_label in exclude) 97 | 98 | if len(app_labels) == 0: 99 | model_list = [m for m in (get_models(a) for a in get_apps() if a not in excluded_apps)] 100 | try: 101 | # Pre 1.4 days, Django returned models as a flat list 102 | model_list = set(model_list) 103 | except TypeError: 104 | # +1.4, model_list will be a list of lists that needs unfolded 105 | model_list = set(m for _ in model_list for m in _) 106 | else: 107 | model_list = set() 108 | app_labels = list(app_labels) 109 | while app_labels: 110 | label = app_labels.pop(0) 111 | 112 | if label in excluded_apps: 113 | continue 114 | 115 | try: 116 | app_label, model_label = label.split('.') 117 | try: 118 | app = get_app(app_label) 119 | except ImproperlyConfigured: 120 | raise CommandError("Unknown application: %s" % app_label) 121 | 122 | if app_label in excluded_apps: 123 | continue 124 | 125 | model = get_model(app_label, model_label) 126 | if model is None: 127 | raise CommandError("Unknown model: %s.%s" % (app_label, model_label)) 128 | 129 | model_list.add(model) 130 | 131 | except ValueError: 132 | # This is just an app - no model qualifier 133 | app_label = label 134 | try: 135 | app = get_app(app_label) 136 | except ImproperlyConfigured: 137 | raise CommandError("Unknown application: %s" % app_label) 138 | model_list.update(get_models(app)) 139 | return model_list 140 | 141 | def _get_query_set(self, model, sort=None, using=None): 142 | qs = model._default_manager 143 | if using: 144 | qs = qs.using(using) 145 | qs = qs.all() 146 | 147 | if sort == 'desc': 148 | qs = qs.order_by('-pk') 149 | elif sort == 'asc': 150 | qs = qs.order_by('pk') 151 | 152 | return qs 153 | 154 | def _can_dump_model(self, model, using=None): 155 | if model._meta.proxy: 156 | return False 157 | if not router.allow_syncdb(using, model): 158 | return False 159 | return True 160 | 161 | def handle(self, *app_labels, **options): 162 | """ 163 | Serializes objects from the database. 164 | 165 | Works much like Django's ``manage.py dumpdata``, except that it allows you to 166 | limit and sort the apps that you're pulling in, as well as automatically follow 167 | the dependency graph to pull in related objects. 168 | """ 169 | # TODO: excluded_apps doesnt correctly handle foo.bar if you're not using app_labels 170 | format = options.get('format', 'json') 171 | indent = options.get('indent', None) 172 | limit = options.get('limit', None) 173 | sort = options.get('sort', None) 174 | using = options.get('database', None) 175 | exclude = options.get('exclude', []) 176 | show_traceback = options.get('traceback', True) 177 | use_natural_keys = options.get('use_natural_keys', False) 178 | 179 | model_list = self._get_model_list(app_labels, exclude) 180 | 181 | # Check that the serialization format exists; this is a shortcut to 182 | # avoid collating all the objects and _then_ failing. 183 | try: 184 | serializers.get_serializer(format) 185 | except KeyError: 186 | raise CommandError("Unknown serialization format: %s" % format) 187 | 188 | # Now collate the objects to be serialized. 189 | objects = [] 190 | for model in model_list: 191 | if not self._can_dump_model(model, using): 192 | continue 193 | 194 | queryset = self._get_query_set(model, sort, using)[:limit] 195 | 196 | objects.extend(objects_from_queryset(queryset, using=using)) 197 | 198 | objects = sort_dependencies(objects) 199 | 200 | try: 201 | return serializers.serialize(format, objects, indent=indent, 202 | use_natural_keys=use_natural_keys) 203 | except Exception, e: 204 | if show_traceback: 205 | raise 206 | raise CommandError("Unable to serialize database: %s" % e) 207 | 208 | 209 | def sort_dependencies(objects): 210 | """ 211 | Sort a list of instances by their model dependancy graph. 212 | 213 | This is very similar to Django's sort_dependencies method except 214 | for two big differences: 215 | 216 | 1. We graph dependencies unrelated to natural_key. 217 | 2. We take a list of objects, and return a sorted list of objects. 218 | """ 219 | from django.db.models import get_model 220 | # Process the list of models, and get the list of dependencies 221 | model_dependencies = [] 222 | models = set() 223 | model_list = set() 224 | objs_by_model = defaultdict(list) 225 | for o in objects: 226 | model = o.__class__ 227 | objs_by_model[model].append(o) 228 | model_list.add(model) 229 | 230 | for model in model_list: 231 | models.add(model) 232 | # Add any explicitly defined dependencies 233 | if hasattr(model, 'natural_key'): 234 | deps = getattr(model.natural_key, 'dependencies', []) 235 | if deps: 236 | deps = [get_model(*d.split('.')) for d in deps] 237 | else: 238 | deps = [] 239 | 240 | # Now add a dependency for any FK or M2M relation with 241 | # a model that defines a natural key 242 | for field in itertools.chain(model._meta.fields, model._meta.many_to_many): 243 | if hasattr(field.rel, 'to') and field.rel.to != model: 244 | deps.append(field.rel.to) 245 | model_dependencies.append((model, deps)) 246 | 247 | model_dependencies.reverse() 248 | # Now sort the models to ensure that dependencies are met. This 249 | # is done by repeatedly iterating over the input list of models. 250 | # If all the dependencies of a given model are in the final list, 251 | # that model is promoted to the end of the final list. This process 252 | # continues until the input list is empty, or we do a full iteration 253 | # over the input models without promoting a model to the final list. 254 | # If we do a full iteration without a promotion, that means there are 255 | # circular dependencies in the list. 256 | model_list = [] 257 | while model_dependencies: 258 | skipped = [] 259 | changed = False 260 | while model_dependencies: 261 | model, deps = model_dependencies.pop() 262 | 263 | # If all of the models in the dependency list are either already 264 | # on the final model list, or not on the original serialization list, 265 | # then we've found another model with all it's dependencies satisfied. 266 | found = True 267 | for candidate in ((d not in models or d in model_list) for d in deps): 268 | if not candidate: 269 | found = False 270 | if found: 271 | model_list.append(model) 272 | changed = True 273 | else: 274 | skipped.append((model, deps)) 275 | if not changed: 276 | raise CommandError("Can't resolve dependencies for %s in serialized app list." % 277 | ', '.join('%s.%s' % (model._meta.app_label, model._meta.object_name) 278 | for model, deps in sorted(skipped, key=lambda obj: obj[0].__name__)) 279 | ) 280 | model_dependencies = skipped 281 | 282 | sorted_results = [] 283 | for model in model_list: 284 | sorted_results.extend(objs_by_model[model]) 285 | 286 | return sorted_results 287 | -------------------------------------------------------------------------------- /datatools/management/commands/loaddata.py: -------------------------------------------------------------------------------- 1 | # This is necessary in Python 2.5 to enable the with statement, in 2.6 2 | # and up it is no longer necessary. 3 | from __future__ import with_statement 4 | 5 | import sys 6 | import os 7 | import gzip 8 | import traceback 9 | import zipfile 10 | from optparse import make_option 11 | 12 | from django.conf import settings 13 | from django.core import serializers 14 | from django.core.management.base import BaseCommand 15 | from django.core.management.color import no_style 16 | from django.db import (connections, router, transaction, DEFAULT_DB_ALIAS, 17 | IntegrityError, DatabaseError) 18 | from django.db.models import get_apps 19 | from django.utils.itercompat import product 20 | 21 | try: 22 | import bz2 23 | has_bz2 = True 24 | except ImportError: 25 | has_bz2 = False 26 | 27 | 28 | class SingleZipReader(zipfile.ZipFile): 29 | def __init__(self, *args, **kwargs): 30 | zipfile.ZipFile.__init__(self, *args, **kwargs) 31 | if settings.DEBUG: 32 | assert len(self.namelist()) == 1, "Zip-compressed fixtures must contain only one file." 33 | 34 | def read(self): 35 | return zipfile.ZipFile.read(self, self.namelist()[0]) 36 | 37 | 38 | compression_types = { 39 | None: file, 40 | 'gz': gzip.GzipFile, 41 | 'zip': SingleZipReader 42 | } 43 | if has_bz2: 44 | compression_types['bz2'] = bz2.BZ2File 45 | 46 | 47 | def humanize(dirname): 48 | return dirname and "'%s'" % dirname or 'absolute path' 49 | 50 | 51 | class Command(BaseCommand): 52 | help = 'Installs the named fixture(s) in the database.' 53 | args = "fixture [fixture ...]" 54 | 55 | option_list = BaseCommand.option_list + ( 56 | make_option('--database', action='store', dest='database', 57 | default=DEFAULT_DB_ALIAS, help='Nominates a specific database to load ' 58 | 'fixtures into. Defaults to the "default" database.'), 59 | ) 60 | 61 | def get_app_fixtures(self): 62 | app_module_paths = [] 63 | for app in get_apps(): 64 | if hasattr(app, '__path__'): 65 | # It's a 'models/' subpackage 66 | for path in app.__path__: 67 | app_module_paths.append(path) 68 | else: 69 | # It's a models.py module 70 | app_module_paths.append(app.__file__) 71 | 72 | return [os.path.join(os.path.dirname(path), 'fixtures') for path in app_module_paths] 73 | 74 | def load_fixture(self, fixture_label, using, commit=True): 75 | fixture_count = 0 76 | loaded_object_count = 0 77 | fixture_object_count = 0 78 | models = set() 79 | 80 | parts = fixture_label.split('.') 81 | 82 | if len(parts) > 1 and parts[-1] in compression_types: 83 | compression_formats = [parts[-1]] 84 | parts = parts[:-1] 85 | else: 86 | compression_formats = compression_types.keys() 87 | 88 | if len(parts) == 1: 89 | fixture_name = parts[0] 90 | formats = serializers.get_public_serializer_formats() 91 | else: 92 | fixture_name, format = '.'.join(parts[:-1]), parts[-1] 93 | if format in serializers.get_public_serializer_formats(): 94 | formats = [format] 95 | else: 96 | formats = [] 97 | 98 | if formats: 99 | if self.verbosity >= 2: 100 | self.stdout.write("Loading '%s' fixtures...\n" % fixture_name) 101 | else: 102 | self.stderr.write( 103 | self.style.ERROR("Problem installing fixture '%s': %s is not a known serialization format.\n" % 104 | (fixture_name, format))) 105 | if commit: 106 | transaction.rollback(using=using) 107 | transaction.leave_transaction_management(using=using) 108 | return 109 | 110 | if os.path.isabs(fixture_name): 111 | fixture_dirs = [fixture_name] 112 | else: 113 | fixture_dirs = self.get_app_fixtures() + list(settings.FIXTURE_DIRS) + [''] 114 | 115 | for fixture_dir in fixture_dirs: 116 | if self.verbosity >= 2: 117 | self.stdout.write("Checking %s for fixtures...\n" % humanize(fixture_dir)) 118 | 119 | label_found = False 120 | for combo in product([using, None], formats, compression_formats): 121 | database, format, compression_format = combo 122 | file_name = '.'.join( 123 | p for p in [ 124 | fixture_name, database, format, compression_format 125 | ] 126 | if p 127 | ) 128 | 129 | if self.verbosity >= 3: 130 | self.stdout.write("Trying %s for %s fixture '%s'...\n" % \ 131 | (humanize(fixture_dir), file_name, fixture_name)) 132 | full_path = os.path.join(fixture_dir, file_name) 133 | open_method = compression_types[compression_format] 134 | try: 135 | fixture = open_method(full_path, 'r') 136 | except IOError: 137 | if self.verbosity >= 2: 138 | self.stdout.write("No %s fixture '%s' in %s.\n" % \ 139 | (format, fixture_name, humanize(fixture_dir))) 140 | continue 141 | try: 142 | if label_found: 143 | self.stderr.write(self.style.ERROR("Multiple fixtures named '%s' in %s. Aborting.\n" % 144 | (fixture_name, humanize(fixture_dir)))) 145 | if commit: 146 | transaction.rollback(using=using) 147 | transaction.leave_transaction_management(using=using) 148 | return 149 | 150 | fixture_count += 1 151 | objects_in_fixture = 0 152 | loaded_objects_in_fixture = 0 153 | if self.verbosity >= 2: 154 | self.stdout.write("Installing %s fixture '%s' from %s.\n" % \ 155 | (format, fixture_name, humanize(fixture_dir))) 156 | 157 | objects = serializers.deserialize(format, fixture, using=using) 158 | 159 | for obj in objects: 160 | objects_in_fixture += 1 161 | if router.allow_syncdb(using, obj.object.__class__): 162 | loaded_objects_in_fixture += 1 163 | models.add(obj.object.__class__) 164 | try: 165 | obj.save(using=using) 166 | except (DatabaseError, IntegrityError), e: 167 | msg = "Could not load %(app_label)s.%(object_name)s(pk=%(pk)s): %(error_msg)s" % { 168 | 'app_label': obj.object._meta.app_label, 169 | 'object_name': obj.object._meta.object_name, 170 | 'pk': obj.object.pk, 171 | 'error_msg': e 172 | } 173 | raise e.__class__, e.__class__(msg), sys.exc_info()[2] 174 | 175 | loaded_object_count += loaded_objects_in_fixture 176 | fixture_object_count += objects_in_fixture 177 | label_found = True 178 | 179 | except (SystemExit, KeyboardInterrupt): 180 | raise 181 | except Exception: 182 | if self.show_traceback: 183 | traceback.print_exc() 184 | else: 185 | self.stderr.write( 186 | self.style.ERROR("Problem installing fixture '%s': %s\n" % 187 | (full_path, ''.join(traceback.format_exception(sys.exc_type, 188 | sys.exc_value, sys.exc_traceback))))) 189 | finally: 190 | fixture.close() 191 | 192 | # If the fixture we loaded contains 0 objects, assume that an 193 | # error was encountered during fixture loading. 194 | if objects_in_fixture == 0: 195 | self.stderr.write( 196 | self.style.ERROR("No fixture data found for '%s'. (File format may be invalid.)\n" % 197 | (fixture_name))) 198 | if commit: 199 | transaction.rollback(using=using) 200 | transaction.leave_transaction_management(using=using) 201 | return 202 | 203 | return { 204 | 'fixture_count': fixture_count, 205 | 'loaded_object_count': loaded_object_count, 206 | 'fixture_object_count': fixture_object_count, 207 | 'models': models, 208 | } 209 | 210 | def handle(self, *fixture_labels, **options): 211 | self.verbosity = int(options.get('verbosity', 1)) 212 | self.show_traceback = options.get('traceback', False) 213 | 214 | using = options.get('database') 215 | 216 | connection = connections[using] 217 | self.style = no_style() 218 | 219 | if not len(fixture_labels): 220 | self.stderr.write( 221 | self.style.ERROR("No database fixture specified. Please provide the path of at least one fixture in the command line.\n") 222 | ) 223 | return 224 | 225 | # commit is a stealth option - it isn't really useful as 226 | # a command line option, but it can be useful when invoking 227 | # loaddata from within another script. 228 | # If commit=True, loaddata will use its own transaction; 229 | # if commit=False, the data load SQL will become part of 230 | # the transaction in place when loaddata was invoked. 231 | commit = options.get('commit', True) 232 | 233 | # Keep a count of the installed objects and fixtures 234 | fixture_count = 0 235 | loaded_object_count = 0 236 | fixture_object_count = 0 237 | models = set() 238 | 239 | # Get a cursor (even though we don't need one yet). This has 240 | # the side effect of initializing the test database (if 241 | # it isn't already initialized). 242 | cursor = connection.cursor() 243 | 244 | # Start transaction management. All fixtures are installed in a 245 | # single transaction to ensure that all references are resolved. 246 | if commit: 247 | transaction.commit_unless_managed(using=using) 248 | transaction.enter_transaction_management(using=using) 249 | transaction.managed(True, using=using) 250 | 251 | try: 252 | with connection.constraint_checks_disabled(): 253 | for fixture_label in fixture_labels: 254 | result = self.load_fixture(fixture_label, using=using, commit=commit) 255 | if not result: 256 | continue 257 | 258 | fixture_count += result['fixture_count'] 259 | loaded_object_count += result['loaded_object_count'] 260 | fixture_object_count += result['fixture_object_count'] 261 | models |= result['models'] 262 | 263 | # Since we disabled constraint checks, we must manually check for 264 | # any invalid keys that might have been added 265 | table_names = [model._meta.db_table for model in models] 266 | connection.check_constraints(table_names=table_names) 267 | 268 | except (SystemExit, KeyboardInterrupt): 269 | raise 270 | except Exception: 271 | if commit: 272 | transaction.rollback(using=using) 273 | transaction.leave_transaction_management(using=using) 274 | return 275 | 276 | # If we found even one object in a fixture, we need to reset the 277 | # database sequences. 278 | if loaded_object_count > 0: 279 | sequence_sql = connection.ops.sequence_reset_sql(self.style, models) 280 | if sequence_sql: 281 | if self.verbosity >= 2: 282 | self.stdout.write("Resetting sequences\n") 283 | for line in sequence_sql: 284 | cursor.execute(line) 285 | 286 | if commit: 287 | transaction.commit(using=using) 288 | transaction.leave_transaction_management(using=using) 289 | 290 | if self.verbosity >= 1: 291 | if fixture_object_count == loaded_object_count: 292 | self.stdout.write("Installed %d object(s) from %d fixture(s)\n" % ( 293 | loaded_object_count, fixture_count)) 294 | else: 295 | self.stdout.write("Installed %d object(s) (of %d) from %d fixture(s)\n" % ( 296 | loaded_object_count, fixture_object_count, fixture_count)) 297 | 298 | # Close the DB connection. This is required as a workaround for an 299 | # edge case in MySQL: if the same connection is used to 300 | # create tables, load data, and query, the query can return 301 | # incorrect results. See Django #7572, MySQL #37735. 302 | if commit: 303 | connection.close() 304 | --------------------------------------------------------------------------------