├── .gitignore
├── LICENSE
├── Pipfile
├── Pipfile.lock
├── README.md
├── _config.yml
├── docker-compose.yml
├── setup.cfg
└── shop
├── manage.py
└── shop
├── __init__.py
├── data.py
├── migrations
├── 0001_initial.py
└── __init__.py
├── models.py
├── settings.py
├── urls.py
└── wsgi.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2017, Josh Smeaton
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.python.org/simple"
3 | verify_ssl = true
4 |
5 | [packages]
6 | Django = "*"
7 | ipython = "*"
8 | flake8 = "*"
9 | isort = "*"
10 | psycopg2 = "*"
11 | Faker = "*"
12 | django-extensions = "*"
13 |
--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
1 | {
2 | "_meta": {
3 | "hash": {
4 | "sha256": "0d4442e575b9fd0d7d1e0b89d85ec1cd81a5c1322a300d2f84665250d123c94e"
5 | },
6 | "requires": {},
7 | "sources": [
8 | {
9 | "url": "https://pypi.python.org/simple",
10 | "verify_ssl": true
11 | }
12 | ]
13 | },
14 | "default": {
15 | "Django": {
16 | "version": "==1.11.2"
17 | },
18 | "Faker": {
19 | "version": "==0.7.15"
20 | },
21 | "Pygments": {
22 | "version": "==2.2.0"
23 | },
24 | "appnope": {
25 | "version": "==0.1.0"
26 | },
27 | "decorator": {
28 | "version": "==4.0.11"
29 | },
30 | "django-extensions": {
31 | "version": "==1.7.9"
32 | },
33 | "flake8": {
34 | "version": "==3.3.0"
35 | },
36 | "ipython": {
37 | "version": "==6.1.0"
38 | },
39 | "ipython_genutils": {
40 | "version": "==0.2.0"
41 | },
42 | "isort": {
43 | "version": "==4.2.13"
44 | },
45 | "jedi": {
46 | "version": "==0.10.2"
47 | },
48 | "mccabe": {
49 | "version": "==0.6.1"
50 | },
51 | "pexpect": {
52 | "version": "==4.2.1"
53 | },
54 | "pickleshare": {
55 | "version": "==0.7.4"
56 | },
57 | "prompt_toolkit": {
58 | "version": "==1.0.14"
59 | },
60 | "psycopg2": {
61 | "version": "==2.7.1"
62 | },
63 | "ptyprocess": {
64 | "version": "==0.5.1"
65 | },
66 | "pycodestyle": {
67 | "version": "==2.3.1"
68 | },
69 | "pyflakes": {
70 | "version": "==1.5.0"
71 | },
72 | "python-dateutil": {
73 | "version": "==2.6.0"
74 | },
75 | "pytz": {
76 | "version": "==2017.2"
77 | },
78 | "setuptools": {
79 | "version": "==36.0.1"
80 | },
81 | "simplegeneric": {
82 | "version": "==0.8.1"
83 | },
84 | "six": {
85 | "version": "==1.10.0"
86 | },
87 | "traitlets": {
88 | "version": "==4.3.2"
89 | },
90 | "wcwidth": {
91 | "version": "==0.1.7"
92 | }
93 | },
94 | "develop": {}
95 | }
96 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Optimising Django Queries
2 |
3 | Django ORM basics, with some tips and tricks, for writing optimal queries.
4 |
5 | ## [](#getting-started)Getting Started
6 |
7 | ### [](#setup)Setup
8 |
9 | Make sure you have python 3.6 installed, and is the default python3 on your
10 | system. There's a docker-compose file included for running postgres in a docker
11 | container if you don't want to install postgres locally.
12 |
13 | Make sure you edit the `DATABASES` setting in `shop/shop/settings.py` if you
14 | aren't running `postgres` using `Docker Toolbelt for OSX`.
15 |
16 | ```bash
17 | xcode-select --install # OSX only - skip if xcode is already installed
18 | docker-compose up -d db
19 |
20 | pip install pipenv
21 | git clone git@github.com:jarshwah/optimising-django-queries.git
22 | cd optimising-django-queries
23 | pipenv --three install
24 | pipenv shell # activates the virtual environment
25 | cd shop
26 | ./manage.py migrate
27 | ```
28 |
29 | The above commands will install all the dependencies needed.
30 |
31 | ### [](#query-logging)Query Logging
32 |
33 | When developing it's a good idea to have logging configured so that queries
34 | are printed to the console. A neverending stream of SQL queries in your
35 | terminal is your first hint that you might have some performance issues.
36 |
37 | Below is an extremely minimal config you can add (or merge) into your settings
38 | file:
39 |
40 | ```python
41 | DEBUG = True
42 | LOGGING = {
43 | 'version': 1,
44 | 'filters': {
45 | 'require_debug_true': {
46 | '()': 'django.utils.log.RequireDebugTrue',
47 | }
48 | },
49 | 'handlers': {
50 | 'console': {
51 | 'level': 'DEBUG',
52 | 'filters': ['require_debug_true'],
53 | 'class': 'logging.StreamHandler',
54 | }
55 | },
56 | 'loggers': {
57 | 'django.db.backends': {
58 | 'level': 'DEBUG',
59 | 'handlers': ['console'],
60 | }
61 | }
62 | }
63 | ```
64 |
65 | You can also inspect previously executed queries directly from the django
66 | shell, provided you've set `DEBUG = True` in your settings file. Django will
67 | only remember 9000 queries to avoid memory issues.
68 |
69 | ```python
70 | In [1]: from django.db import connection, reset_queries
71 |
72 | In [2]: print(connection.queries)
73 | [
74 | {
75 | 'sql': 'SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price" FROM "shop_product" ORDER BY "shop_product"."id" ASC LIMIT 1',
76 | 'time': '0.001'
77 | }
78 | ]
79 | In [3]: reset_queries()
80 |
81 | In [4]: print(connection.queries)
82 | []
83 | ```
84 |
85 | ## [](#database-relations)Database Relations
86 |
87 | The Django ORM (Object-Relational Mapper) is an abstraction that uses python
88 | objects to represent database tables and the relations between them.
89 |
90 | ### [](#foreign-key-columns)Foreign Keys
91 |
92 | Foreign Keys are the primary way of defining how two tables are related. They
93 | provide a link from one table to the key of another table. These kind of
94 | relationships are commonly called Many-To-One (M-1) because there are Many
95 | products that map to One Category.
96 |
97 | Product
98 |
99 | | id | name | category_id |
100 | |:----------|:----------------|:------------|
101 | | 1 | Samsung 65 inch | 1 |
102 | | 2 | Samsung 75 inch | 1 |
103 | | 3 | Juicer | 2 |
104 |
105 | Category
106 |
107 | | id | name |
108 | |:----------|:------------|
109 | | 1 | TVs |
110 | | 2 | Homeware |
111 |
112 | In the tables above, you can see that product with `id = 3` is a `Juicer`, in
113 | the `category` with `id = 2`. The data is `related` by the `category_id`
114 | foreign key pointing to the `id` field in the `Category` table.
115 |
116 | The SQL to retrieve the `Juicer` with it's `category` would be something like:
117 |
118 | ```sql
119 | SELECT product.id,
120 | product.name,
121 | category.id,
122 | category.name
123 | FROM product
124 | JOIN category
125 | ON product.category_id = category.id
126 | WHERE product.name = 'Juicer';
127 |
128 | 1 | Juicer | 2 | Homeware
129 | ```
130 |
131 | The equivalent `python` code would be:
132 |
133 | ```python
134 | >>> product = Product.objects.get(name='Juicer')
135 | >>> print(product.id, product.name, product.category.id, product.category.name)
136 |
137 | 1 Juicer 2 Homeware
138 | ```
139 |
140 | You can see that accessing a foreign key in python is equivalent to accessing
141 | an attribute or property.
142 |
143 | ### [](#multivalue-relations)Multivalue Relations
144 |
145 | It's also possible to start from `Category` and find all linked `Products`. But
146 | since we represent data as rows, and a `Category` can have multiple `Products`
147 | it means we'll end up repeating the same category multiple times.
148 |
149 | ```sql
150 | SELECT category.id
151 | category.name
152 | product.name
153 | FROM category
154 | JOIN product
155 | ON category.id = product.category_id
156 | WHERE category.name = 'TVs'
157 |
158 | 1 | TVs | Samsung 65 inch
159 | 1 | TVs | Samsung 75 inch
160 | ```
161 |
162 | This is a multivalue relation, as there are multiple links between a category
163 | and the products within that category. This is sometimes referred to as a
164 | One-To-Many (1-M) relationship.
165 |
166 | Since Django wants to avoid returning duplicate data, it represents multivalue
167 | relations as `lists`.
168 |
169 | ```python
170 | >>> category = Category.objects.get(name='TVs')
171 | >>> print(category.id, category.name)
172 |
173 | 1 TVs
174 |
175 | >>> for product in category.product_set.all():
176 | ... print(product.name)
177 |
178 | Samsung 65 inch
179 | Samsung 75 inch
180 | ```
181 |
182 | ### [](#m2m-relations)Many To Many Relations
183 |
184 | There's a third type of relationship in a database called Many To Many (M-M),
185 | which models multiple rows in a table linking to multiple rows in another
186 | table. This is really just a special case of Multivalue relationships, with
187 | an extra table inbetween the two for tracking the links.
188 |
189 | Consider a Product table and a Size table, where multiple products will share
190 | similar sizing.
191 |
192 | Product
193 |
194 | | id | name |
195 | |:----------|:-------|
196 | | 1 | Jumper |
197 | | 2 | Tshirt |
198 |
199 | Size
200 |
201 | | id | name |
202 | |:----------|:---------|
203 | | 1 | Small |
204 | | 2 | Large |
205 |
206 | Product Size
207 |
208 | | id | product_id | size_id |
209 | |:----------|:-----------|:---------
210 | | 1 | 1 | 1 |
211 | | 2 | 2 | 1 |
212 | | 3 | 1 | 2 |
213 |
214 | ```sql
215 | SELECT product.name
216 | size.name
217 | FROM product
218 | JOIN productsize
219 | ON product.id = productsize.product_id
220 | JOIN size
221 | ON productsize.size_id = size.id
222 |
223 | Jumper Small
224 | Jumper Large
225 | Tshirt Small
226 | ```
227 |
228 | The django ORM hides the `ProductSize` mapping table (through relation) by
229 | default, but it's possible to define it yourself. For a many-to-many relation,
230 | django models both sides as lists.
231 |
232 | ```python
233 | >>> product = Product.objects.get(name='Jumper')
234 | >>> for size in product.sizes.all():
235 | ... print(size.name)
236 |
237 | Small
238 | Large
239 |
240 | >>> size = Size.objects.get(name='Large')
241 | >>> for product in size.product_set.all():
242 | ... print(product.name)
243 |
244 | Jumper
245 | ```
246 |
247 | ## [](#orm-models)ORM Models
248 |
249 | Django models represent database tables as classes, rows as instances of those
250 | classes, and columns as attributes.
251 |
252 | Here we'll see three models, with the relationships between them defined as
253 | fields.
254 |
255 | ```python
256 |
257 | class Category(models.Model):
258 | name = models.CharField(max_length=32)
259 |
260 | class Feature(models.Model):
261 | name = models.CharField(max_length=32)
262 | value = models.CharField(max_length=32)
263 | visible = models.BooleanField(default=True)
264 |
265 | class Product(models.Model):
266 | name = models.CharField(max_length=32)
267 | category = models.ForeignKey(Category)
268 | features = models.ManyToManyField(Feature)
269 | price = models.DecimalField(max_digits=6, decimal_places=2)
270 |
271 | ```
272 |
273 | `Product` is the interesting model here, as it has both a `ForeignKey` field
274 | pointing to `Category`, and a `ManyToMany` field pointing to multiple
275 | `Features`.
276 |
277 | ### [](#foreign-key-field)ForeignKey
278 |
279 | As we learned before, foreign keys are represented with simple attribute
280 | access. Given a `product`, we access the category via the `product.category`
281 | field.
282 |
283 | But what is Django doing to get the data from the database? How many queries
284 | does it need to fetch both `product` and `product.category`? In our examples
285 | above, we seen that all of this data can be generated with a single SQL query.
286 |
287 | If we turn on SQL logging (see above), we can see the queries being executed
288 | as we work.
289 |
290 | ```python
291 | In [24]: product = Product.objects.get(pk=1)
292 | (0.001)
293 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price"
294 | FROM "shop_product"
295 | WHERE "shop_product"."id" = 1
296 |
297 | In [25]: category = product.category
298 | (0.001)
299 | SELECT "shop_category"."id", "shop_category"."name"
300 | FROM "shop_category"
301 | WHERE "shop_category"."id" = 2
302 | ```
303 |
304 | Django fetches data linked by `ForeignKey` *on-demand*. The main reason for
305 | doing this, is that you could have an extremely deep hierarchy of ForeignKeys,
306 | but you probably wouldn't want Django to pull all of that data back.
307 |
308 | But there *is* a way to be explicit, and tell Django which relations it should
309 | fetch *eagerly*.
310 |
311 | #### [](#select-related)Select Related
312 |
313 | If you want Django to join the data via SQL and return it in a single query,
314 | you can use [select_related()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#select-related).
315 |
316 | ```python
317 | In [26]: product = Product.objects.select_related('category').get(pk=1)
318 | (0.001)
319 | SELECT
320 | "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price", "shop_category"."id", "shop_category"."name"
321 | FROM "shop_product"
322 | INNER JOIN "shop_category" ON ("shop_product"."category_id" = "shop_category"."id")
323 | WHERE "shop_product"."id" = 1;
324 |
325 | In [27]: category = product.category
326 |
327 | ```
328 |
329 | Here we see that, indeed, a single query is executed to retrieve the product
330 | and the category.
331 |
332 | ### [](#n-plus-1)1 + N queries
333 |
334 | You might have heard the term `1 + N` or `N + 1` queries. It's a common
335 | pitfall when using ORMs to retrieve data from a database.
336 |
337 | Let's say we have 3 products, and we want to show the product name and the
338 | category it came from. First, we'll do it the naive way:
339 |
340 | ```python
341 | In [27]: for p in Product.objects.all():
342 | ...: print(p.name, p.category.name)
343 | ...:
344 | (0.001) SELECT
345 | "shop_product"."id", "shop_product"."name",
346 | "shop_product"."category_id", "shop_product"."price"
347 | FROM "shop_product";
348 | (0.000) SELECT
349 | "shop_category"."id", "shop_category"."name"
350 | FROM "shop_category"
351 | WHERE "shop_category"."id" = 1;
352 |
353 | Samsung 65 inch TVs
354 |
355 | (0.001) SELECT "shop_category"."id", "shop_category"."name"
356 | FROM "shop_category" WHERE "shop_category"."id" = 1;
357 |
358 | Samsung 75 inch TVs
359 |
360 | (0.000) SELECT "shop_category"."id", "shop_category"."name"
361 | FROM "shop_category" WHERE "shop_category"."id" = 2;
362 |
363 | Juicer Homeware
364 | ```
365 |
366 | We executed a single query (1) for the product, then we executed an extra query
367 | for each product to get the category (N). This introduces a lot of latency, as
368 | each query is a new network request, and the database has to recompile and
369 | execute lots of very similar queries.
370 |
371 | Using `select_related` avoids this issue, allowing data for all Products and
372 | Categories to be retrieved once.
373 |
374 | ```python
375 | In [28]: for p in Product.objects.select_related('category').all():
376 | ...: print(p.name, p.category.name)
377 | ...:
378 | (0.005) SELECT "shop_product"."id", "shop_product"."name",
379 | "shop_product"."category_id", "shop_product"."price",
380 | "shop_category"."id", "shop_category"."name"
381 | FROM "shop_product"
382 | INNER JOIN "shop_category"
383 | ON ("shop_product"."category_id" = "shop_category"."id");
384 | Samsung 65 inch TVs
385 | Samsung 75 inch TVs
386 | Juicer Homeware
387 | ```
388 |
389 | ### [](#m2m-field)ManyToManyField
390 |
391 | `ManyToManyField`'s models a list of data. You generally iterate over many
392 | to many fields. Given a `product`, we access its features via the
393 | `product.features` field.
394 |
395 | Remembering that Django tries to avoid returning duplicate data, it must
396 | execute two queries in this instance. One to get the Product, and another to
397 | get the features for that product. Again, with SQL logging activated, we can
398 | see exactly what's happening under the hood.
399 |
400 | ```python
401 | In [29]: for p in Product.objects.all()[0:3]:
402 | ...: print('Product: ', p.name)
403 | ...: for f in p.features.all():
404 | ...: print(f.name, ': ', f.value)
405 | ...: print()
406 | ...:
407 |
408 | (0.001)
409 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price"
410 | FROM "shop_product";
411 |
412 | Product: Samsung 65 inch
413 |
414 | (0.008) SELECT
415 | "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
416 | FROM "shop_feature"
417 | INNER JOIN "shop_product_features"
418 | ON ("shop_feature"."id" = "shop_product_features"."feature_id")
419 | WHERE "shop_product_features"."product_id" = 1 ;
420 |
421 | Supplier : Samsung
422 | Size : 65 inches
423 | Colour : Black
424 |
425 | Product: Samsung 75 inch
426 | (0.001)
427 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
428 | FROM "shop_feature"
429 | INNER JOIN "shop_product_features"
430 | ON ("shop_feature"."id" = "shop_product_features"."feature_id")
431 | WHERE "shop_product_features"."product_id" = 2;
432 |
433 | Supplier : Samsung
434 | Size : 75 inches
435 | Colour : Black
436 |
437 | Product: Juicer
438 | (0.001)
439 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
440 | FROM "shop_feature"
441 | INNER JOIN "shop_product_features"
442 | ON ("shop_feature"."id" = "shop_product_features"."feature_id")
443 | WHERE "shop_product_features"."product_id" = 3;
444 |
445 | Colour : LightSeaGreen
446 | Colour : SpringGreen
447 | Colour : Azure
448 |
449 | ```
450 |
451 | This is a perfect example of a `1 + N` query. We executed `1` query to fetch
452 | products. Then we issued `1` query for each product `N` to get the features.
453 |
454 | Luckily, Django has a way to reduce the number of queries executed when dealing
455 | with lists of related data too.
456 |
457 | #### [](#prefetch-related)Prefetch Related
458 |
459 | The [prefetch_related()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#prefetch-related)
460 | queryset method is what Django provides to help minimise the number of queries
461 | required to get the data we need. It's more complicated than `select_related`
462 | but it comes with a lot of power.
463 |
464 | We can't actually get the data we need with a single query. But we can do it in
465 | two, which is `1 + 1` queries. One for the product, and then a single query to
466 | fetch all of the features for all of the products.
467 |
468 | ```python
469 | In [30]: for p in Product.objects.prefetch_related('features').all()[0:3]:
470 | ...: print('Product: ', p.name)
471 | ...: for f in p.features.all():
472 | ...: print(f.name, ': ', f.value)
473 | ...: print()
474 | ...:
475 | (0.001)
476 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price"
477 | FROM "shop_product";
478 | (0.001)
479 | SELECT
480 | ("shop_product_features"."product_id") AS "_prefetch_related_val_product_id", "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
481 | FROM "shop_feature"
482 | INNER JOIN "shop_product_features"
483 | ON ("shop_feature"."id" = "shop_product_features"."feature_id")
484 | WHERE "shop_product_features"."product_id" IN (1, 2, 3);
485 |
486 | Product: Samsung 65 inch
487 | Supplier : Samsung
488 | Size : 65 inches
489 | Colour : Black
490 |
491 | Product: Samsung 75 inch
492 | Supplier : Samsung
493 | Size : 75 inches
494 | Colour : Black
495 |
496 | Product: Juicer
497 | Colour : LightSeaGreen
498 | Colour : SpringGreen
499 | Colour : Azure
500 | ```
501 |
502 | Prefetching works by issuing the first query, gathering all of the product
503 | `id`s returned, and then issuing the second query for all features that match
504 | the list of product `id`s. It then uses these features as a cache whenever you
505 | attempt to access `product.features.all()`.
506 |
507 | ##### [](#prefetch-related-caveats)Prefetch Related Caveats
508 |
509 | With great power, comes great responsibility. There are a number of ways to
510 | abuse `prefetch_related`, or to skip using the cache you created and execute
511 | new queries.
512 |
513 | **Memory**
514 |
515 | Prefetching can end up using quite a bit of memory if there are lots of
516 | relations. If we were querying for 10,000 products, and each product had 10
517 | features, django would first issue a query to features with a `WHERE` clause
518 | containing 10,000 `id`s which is generally not good for a database. But then
519 | it'd return 100,000 features that it would have to keep cached in memory.
520 |
521 | **Filtering**
522 |
523 | Prefetching only works when you access the many to many field using `.all()`.
524 | But it *is* possible to further filter the many to many field using `.filter()`
525 | and other queryset methods.
526 |
527 | ```python
528 | In [31]: product = Product.objects.prefetch_related('features').get(pk=1)
529 |
530 | (0.001)
531 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price"
532 | FROM "shop_product"
533 | WHERE "shop_product"."id" = 1; args=(1,)
534 | (0.001)
535 | SELECT ("shop_product_features"."product_id") AS "_prefetch_related_val_product_id", "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
536 | FROM "shop_feature"
537 | INNER JOIN "shop_product_features"
538 | ON ("shop_feature"."id" = "shop_product_features"."feature_id")
539 | WHERE "shop_product_features"."product_id" IN (1);
540 |
541 | In [32]: product.features.get(name='Supplier').value
542 |
543 | (0.001)
544 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
545 | FROM "shop_feature"
546 | INNER JOIN "shop_product_features" ON ("shop_feature"."id" = "shop_product_features"."feature_id")
547 | WHERE ("shop_product_features"."product_id" = 1 AND "shop_feature"."name" = 'Supplier');
548 |
549 | Samsung
550 | ```
551 |
552 | Our cache isn't able to interpret extra database operations, so django is
553 | forced to execute another query.
554 |
555 | **iterator()**
556 |
557 | Django provides an [iterator()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#iterator)
558 | queryset method, which allows django to fetch instances as it iterates through
559 | a loop, rather than loading the entire result set in to memory at once. Since
560 | prefetch_related relies on the entire result being available so that it can
561 | collect all the `id`s, `refetch_related` has no effect when used with
562 | `iterator`.
563 |
564 | ### [](#reverse-relations)Reverse Relations
565 |
566 | Given a `ForeignKey` field linking a `product` to a `category`, Django will
567 | setup the reverse link from a `category` to the set (or list) of `products`.
568 | The convention is to name the reverse field `modelname_set`, but this can be
569 | overridden.
570 |
571 | For our examples above, we'd access the products of a category using
572 | `product_set.all()`.
573 |
574 | ```python
575 | In [33]: category = Category.objects.get(pk=1)
576 | (0.001)
577 | SELECT "shop_category"."id", "shop_category"."name"
578 | FROM "shop_category" WHERE "shop_category"."id" = 1;
579 |
580 | In [34]: products = list(category.product_set.all())
581 | (0.001)
582 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price"
583 | FROM "shop_product"
584 | WHERE "shop_product"."category_id" = 1;
585 | ```
586 |
587 | Reverse relations function identically to `ManyToMany` fields, so they can be
588 | prefetched, filtered, or ordered.
589 |
590 | ### [](#prefetch-or-select-related)Prefetch Related vs Select Related
591 |
592 | **select_related**
593 |
594 | Use `select_related()` on `ForeignKey` fields **only**. It has no affect on
595 | `ManyToManyField`s or reverse relations.
596 |
597 | > select_related is used to access a single related object
598 |
599 | **prefetch_related**
600 |
601 | Use `prefetch_related()` on `ManyToManyField`s or reverse relations. It can
602 | also be used on `ForeignKey` fields, but `select_related` is nearly always a
603 | better choice.
604 |
605 | > prefetch_related is used to access multiple related objects
606 |
607 | ## [](#query-counts)Query Count Examples
608 |
609 | Given the following count of objects in the database, how many queries are
610 | executed for each example below?
611 |
612 | - 1000 products
613 | - 5 categories (200 products per category)
614 | - 5000 features (10 features per product, some shared)
615 |
616 | ```python
617 | In [35]: for p in Product.objects.all():
618 | ...: print(p.category.name)
619 | ...: for feature in p.features.all():
620 | ...: print(f'{feature.name}: {feature.value}')
621 | ...:
622 | ```
623 |
624 |
625 | Answer
626 | 2001 - 1 for product, 1000 for category, 1000 for features
627 |
628 |
629 |
630 | ```python
631 | In [36]: for p in Product.objects.select_related('category').all():
632 | ...: print(p.category.name)
633 | ...: for feature in p.features.all():
634 | ...: print(f'{feature.name}: {feature.value}')
635 | ...:
636 | ```
637 |
638 |
639 | Answer
640 | 1001 - 1 for product and category, and 1000 for features
641 |
642 |
643 | ```python
644 | In [37]: for p in Product.objects.prefetch_related('features').all():
645 | ...: print(p.category.name)
646 | ...: for feature in p.features.all():
647 | ...: print(f'{feature.name}: {feature.value}')
648 | ...:
649 | ```
650 |
651 |
652 | Answer
653 | 1002 - 1 for product, 1000 for category, and 1 for features
654 |
655 |
656 | ```python
657 | In [38]: for p in Product.objects.select_related(
658 | ...: 'category'
659 | ...: ).prefetch_related('features').all():
660 | ...: print(p.category.name)
661 | ...: for feature in p.features.all():
662 | ...: print(f'{feature.name}: {feature.value}')
663 | ...:
664 | ```
665 |
666 |
667 | Answer
668 | 2 - 1 for product and category, and 1 for features
669 |
670 |
671 | ```python
672 | In [39]: for p in Product.objects.select_related(
673 | ...: 'category'
674 | ...: ).prefetch_related('features').all():
675 | ...: print(p.category.name)
676 | ...: for feature in p.features.filter(name='Supplier'):
677 | ...: print(f'{feature.name}: {feature.value}')
678 | ...:
679 | ```
680 |
681 |
682 | Answer
683 | 1003 - 1 for product and category, and 1 for features cache, 1000 for features
684 |
685 |
686 | ## [](#query-optimisation)Query Optimisation
687 |
688 | There are a bunch of optimisations you can make at the SQL layer, some of them
689 | obvious, some of them requiring a great deal of knowledge about the specific
690 | database you're using. We're going to focus on the optimisations we can make
691 | at the django ORM level.
692 |
693 | ### [](#query-optimisation-sorting)Sorting
694 |
695 | This one is fairly simple. Don't sort results if they don't absolutely need
696 | to be sorted! Sorting can often take up a large portion of the actual query
697 | time.
698 |
699 | ```sql
700 | postgres=# explain analyze select * from shop_product;
701 | QUERY PLAN
702 | ----------------------------------------------------------------------------------------------------------
703 | Seq Scan on shop_product (cost=0.00..1.30 rows=30 width=104) (actual time=0.011..0.014 rows=30 loops=1)
704 | Planning time: 0.049 ms
705 | Execution time: 0.031 ms
706 |
707 | postgres=# explain analyze select * from shop_product order by price;
708 | QUERY PLAN
709 | ----------------------------------------------------------------------------------------------------------------
710 | Sort (cost=2.04..2.11 rows=30 width=104) (actual time=0.031..0.033 rows=30 loops=1)
711 | Sort Key: price
712 | Sort Method: quicksort Memory: 27kB
713 | -> Seq Scan on shop_product (cost=0.00..1.30 rows=30 width=104) (actual time=0.009..0.013 rows=30 loops=1)
714 | Planning time: 0.060 ms
715 | Execution time: 0.051 ms
716 | ```
717 |
718 | Sometimes, though, sneaky awful ORM developers will add a **default ordering**
719 | to models. So even though we aren't adding ordering ourselves, there is an
720 | implicit ordering added to every single query against that model.
721 |
722 | ```python
723 |
724 | class Feature(models.Model):
725 | name = models.CharField(max_length=32)
726 | value = models.CharField(max_length=32)
727 | visible = models.BooleanField(default=True)
728 |
729 | class Meta:
730 | ordering = ['name']
731 |
732 | In [13]: features = list(Feature.objects.all())
733 | (0.001)
734 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
735 | FROM "shop_feature"
736 | ORDER BY "shop_feature"."name" ASC;
737 | ```
738 |
739 | You can be explicit about removing any default ordering though, which is
740 | especially important when doing any kind of aggregation. Ordering fields are
741 | added to the `GROUP BY`, which can give incorrect results if you weren't
742 | expecting them.
743 |
744 | ```python
745 | In [14]: features = list(Feature.objects.order_by())
746 | (0.001)
747 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
748 | FROM "shop_feature";
749 | ```
750 |
751 | Much better.
752 |
753 | ### [](#filtering-early)Filter Early
754 |
755 | You want your filters (WHERE clauses) to be as restrictive as possible. You
756 | want to return the fewest possible number of rows at each step of a query. This
757 | includes `__in` filters with subqueries!
758 |
759 | Let's write a queryset, and `explain analyze` the SQL that would have been
760 | executed.
761 |
762 | ```python
763 | In [21]: features = Feature.objects.filter(
764 | name='Supplier', product__in=Product.objects.filter(price__gt=500)
765 | )
766 |
767 | In [22]: print(features.query)
768 | SELECT
769 | "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
770 | FROM "shop_feature"
771 | INNER JOIN "shop_product_features"
772 | ON ("shop_feature"."id" = "shop_product_features"."feature_id")
773 | WHERE ("shop_feature"."name" = 'Supplier' AND "shop_product_features"."product_id" IN (
774 | SELECT U0."id" AS Col1
775 | FROM "shop_product" U0 WHERE U0."price" > 500
776 | ))
777 | ORDER BY "shop_feature"."name" ASC
778 | ```
779 |
780 | ```sql
781 | postgres=# explain analyze SELECT
782 | "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible"
783 | FROM "shop_feature"
784 | INNER JOIN "shop_product_features" ON ("shop_feature"."id" = "shop_product_features"."feature_id")
785 | WHERE ("shop_feature"."name" = 'Supplier' AND "shop_product_features"."product_id" IN (
786 | SELECT U0."id" AS Col1
787 | FROM "shop_product" U0 WHERE U0."price" > 500)
788 | )
789 | ORDER BY "shop_feature"."name" ASC;
790 | QUERY PLAN
791 | -----------------------------------------------------------------------------------------------------------------------------
792 | Nested Loop Semi Join (cost=15.29..21.03 rows=1 width=169) (actual time=0.034..0.034 rows=0 loops=1)
793 | -> Hash Join (cost=15.15..20.76 rows=1 width=173) (actual time=0.034..0.034 rows=0 loops=1)
794 | Hash Cond: (shop_product_features.feature_id = shop_feature.id)
795 | -> Seq Scan on shop_product_features (cost=0.00..4.62 rows=262 width=8) (actual time=0.015..0.015 rows=1 loops=1)
796 | -> Hash (cost=15.12..15.12 rows=2 width=169) (actual time=0.011..0.011 rows=0 loops=1)
797 | Buckets: 1024 Batches: 1 Memory Usage: 8kB
798 | -> Seq Scan on shop_feature (cost=0.00..15.12 rows=2 width=169) (actual time=0.011..0.011 rows=0 loops=1)
799 | Filter: ((name)::text = 'Supplier'::text)
800 | Rows Removed by Filter: 35
801 | -> Index Scan using shop_product_pkey on shop_product u0 (cost=0.14..0.20 rows=1 width=4)
802 | Index Cond: (id = shop_product_features.product_id)
803 | Filter: (price > '500'::numeric)
804 | Planning time: 0.313 ms
805 | Execution time: 0.093 ms
806 | (14 rows)
807 | ```
808 |
809 | ### [](#optimising-indexes)Indexes
810 |
811 | Indexes are usually the first thing people think about when trying to
812 | optimise a query. Finding the ideal set of indexes to add to a particular
813 | table is an art form, and always evolving depending on the number and nature
814 | of various queries hitting that table.
815 |
816 | When you add an index, always check to see if it's being used. Simply having
817 | one does not mean it'll be used. `EXPLAIN ANALYZE` a couple of real queries
818 | before and after.
819 |
820 | Good candidates for indexes (all conditions below should be met):
821 |
822 | - A table with rows > 100 or so. A table with fewer rows will probably just
823 | scan each row.
824 | - A column that has lots of different values (cardinality). No use having an
825 | index if it matches 50% of data.
826 | - If 50% of your data is NULL, but the rest is quite varied, then a
827 | `filtered index` might be a good fit.
828 | - If many different queries use the same column in a WHERE clause
829 | - Consider multiple column index if multiple columns exist in a WHERE clause
830 | - Order is important in multiple column indexes. Use the most common column first.
831 | - If there are lots of different WHERE clauses in a single query, an index is
832 | sometimes unlikely to be used.
833 |
834 | ### [](#values-querysets)Values Query Set
835 |
836 | Model objects are very expensive to construct. For large querysets, it can
837 | often add up to a significant percentage of overall time spent in python. Fields
838 | need to be set, signals need to fire, reverse relations need to be set up. There's
839 | a lot of work that goes into building up the model hierarchy.
840 |
841 | If you just need the data from the database, and don't need to access model
842 | properties at all, then use a [values()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#values)
843 | or a [values_list()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#values-list)
844 | method, which returns the data as simple dictionaries or tuples instead.
845 |
846 | ```python
847 | In [25]: %time model_objects = list(Product.objects.all())
848 |
849 | CPU times: user 985 µs, sys: 1.92 ms, total: 2.9 ms
850 | Wall time: 3.09 ms
851 |
852 | In [26]: %time model_objects = list(Product.objects.values())
853 |
854 | CPU times: user 978 µs, sys: 465 µs, total: 1.44 ms
855 | Wall time: 1.65 ms
856 |
857 | In [27]: %time model_objects = list(Product.objects.values('name', 'price'))
858 |
859 | CPU times: user 704 µs, sys: 597 µs, total: 1.3 ms
860 | Wall time: 1.39 ms
861 | ```
862 |
863 | ### [](#queryset-calculations)Calculations
864 |
865 | Some people like to speak about *Fat Models*, meaning that a lot of logic is
866 | encapsulated within methods and properties of the model itself. This can be
867 | a good thing in some cases, unless the method depends on data in a different
868 | table, or a different model depends on the calculation in your method.
869 |
870 | Ideally, we can execute a single query (plus any necessary prefetches) to get
871 | all of the information we need for a particular goal. Sometimes though, we don't
872 | get to choose how the query is constructed, or how it's used once executed.
873 |
874 | If you need to use a model method for a particular calculation, then you have
875 | to have that python object available. You're no longer just depending on the
876 | database.
877 |
878 | Here's an example:
879 |
880 | ```python
881 | def customer_price(self):
882 | return Price.objects.get(
883 | product=self,
884 | start__lte=timezone.now(),
885 | end__isnull=True
886 | ).price
887 | ```
888 |
889 | Now, a bunch of views and other models rely on this `customer_price` method,
890 | so they need to construct a full model object, but it'll also execute a query
891 | every time the price needs to be shown. This can be hugely expensive when
892 | iterating over a large list of products.
893 |
894 | Instead, design your models so that all important questions can be answered
895 | with a `values` queryset.
896 |
897 | [Query expressions](https://docs.djangoproject.com/en/1.11/ref/models/expressions/)
898 | can be extremely helpful by performing calculations across a set of objects,
899 | rather than performing them in python for each object. See my presentation/talk
900 | on [customising sql](https://github.com/jarshwah/customsql_talk/) for more ideas.
901 |
902 | ### [](#advanced-prefetching)Advanced Prefetch Related
903 |
904 | #### Prefetch Objects
905 |
906 | [Prefetch](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#django.db.models.Prefetch)
907 | is a class that gives users greater control over the query executed to fill the
908 | cache.
909 |
910 | In our examples earlier, we seen what happened when you add filtering onto a
911 | ManyToMany field -- it'll skip the cache. But if you only really care about
912 | a subset of the related values, you can filter it in the `Prefetch` object.
913 |
914 | ```python
915 | In [29]: products = (
916 | ...: Product
917 | ...: .objects
918 | ...: .select_related('category')
919 | ...: .prefetch_related(
920 | ...: Prefetch('features', queryset=Feature.objects.filter(name='Supplier'))
921 | ...: )
922 | ...: )
923 |
924 | In [30]: for p in products:
925 | ...: for f in p.features.all():
926 | ...: print(f'{p.name}: {f.value}')
927 | ...:
928 |
929 | Samsung 65 inch: Samsung
930 | Samsung 75 inch: Samsung
931 | Juicer: Breville
932 | ```
933 |
934 | That's only two queries.
935 |
936 | #### Overwriting Inefficient Methods
937 |
938 | Sometimes it's just not possible to avoid adding complex behaviour into our
939 | models. But we can design our methods to be as optimal as possible locally,
940 | while allowing a savvy caller that's using `Prefetch` to take it even further.
941 |
942 | This is a pattern I haven't seen in the wild before, and I was surprised it
943 | worked.
944 |
945 | The concept is to cache the results of the many to many field on first access,
946 | and to perform all filtering from within python. This keeps any access of the
947 | many to many field down to a single query maximum per instance.
948 |
949 | ```python
950 |
951 | from django.utils.functional import cached_property
952 |
953 | class Product(models.Model):
954 | ...
955 | features = models.ManyToManyField(Feature)
956 |
957 | @cached_property
958 | def all_features(self):
959 | return list(self.features.all())
960 |
961 | @property
962 | def visible_features_python(self):
963 | return [feature for feature in self.all_features if feature.visible]
964 |
965 | @property
966 | def invisible_features_python(self):
967 | return [feature for feature in self.all_features if not feature.visible]
968 |
969 | ```
970 |
971 | If `visible_features_python` is accessed, all features will be queried, stored
972 | on the instance, and then only the visible features are returned. If
973 | `invisible_features_python` is accessed after that, we skip the query execution,
974 | and immediately filter the cached values. We've reduced the number of queries
975 | from 2 to 1 for each Product. 500 products will result in 501 queries.
976 |
977 | The great trick here, is that we can use a `Prefetch` objects to store our
978 | cache directly on the `all_features` property, so that any access of visible
979 | or invisible features results in 0 extra queries per product!
980 |
981 | ```python
982 | In [33]: products = (
983 | ...: Product
984 | ...: .objects
985 | ...: .select_related('category')
986 | ...: .prefetch_related(
987 | ...: Prefetch(
988 | ...: 'features',
989 | ...: queryset=Feature.objects.all(),
990 | ...: to_attr='all_features')
991 | ...: )
992 | ...: )
993 |
994 | In [34]: for p in products:
995 | ...: for f in p.visible_features_python:
996 | ...: print(f'{p.name}: {f.value}')
997 | ...:
998 | ```
999 |
1000 | There are a total of 2 queries executed for this query. 1 for the products,
1001 | and 1 for the features prefetch cache.
1002 |
1003 | This pattern allows models to retain their methods and properties, while allowing
1004 | callers to inject ideal caches into a large number of objects for very little
1005 | cost.
1006 |
1007 | ## [](#summary)Summary
1008 |
1009 | We started by going through how the Django ORM maps database concepts to
1010 | python concepts, mainly through relations. How relations work is important
1011 | when trying to optimise query counts with `select_related` and
1012 | `prefetch_related.`
1013 |
1014 | Then we went over some techniques for improving performance of our queries. The
1015 | concepts were:
1016 |
1017 | 1. Reduction in number of queries - avoiding 1 + N queries.
1018 | 2. Utilising data caches to avoid going to the database.
1019 | 3. Making the database do less work:
1020 | - Avoid sorting
1021 | - Filter the data as much as possible
1022 | - Create indexes to efficiently filter
1023 | 4. Prefer performing calculations at the queryset level, rather than the
1024 | model instance level.
1025 | 5. Design your models so that callers can take advantage by providing prefetch
1026 | caches.
1027 |
1028 | What tips and tricks do you use for query performance tuning?
1029 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '2'
2 |
3 | services:
4 | db:
5 | image: postgres:latest
6 | environment:
7 | - POSTGRES_USER=shop
8 | - POSTGRES_PASSWORD=shop
9 | ports:
10 | - "25432:5432"
11 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 | .git,.yml,__pycache__,
4 | max-line-length = 119
5 |
--------------------------------------------------------------------------------
/shop/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == "__main__":
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "shop.settings")
7 | try:
8 | from django.core.management import execute_from_command_line
9 | except ImportError:
10 | # The above import may fail for some other reason. Ensure that the
11 | # issue is really that Django is missing to avoid masking other
12 | # exceptions on Python 2.
13 | try:
14 | import django
15 | except ImportError:
16 | raise ImportError(
17 | "Couldn't import Django. Are you sure it's installed and "
18 | "available on your PYTHONPATH environment variable? Did you "
19 | "forget to activate a virtual environment?"
20 | )
21 | raise
22 | execute_from_command_line(sys.argv)
23 |
--------------------------------------------------------------------------------
/shop/shop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jarshwah/optimising-django-queries/5c984c419236fe73f868f2517a9f9ea9e54c6c05/shop/shop/__init__.py
--------------------------------------------------------------------------------
/shop/shop/data.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | from faker import Faker
4 | from django.utils import timezone
5 |
6 |
7 | def create_initial_data(apps, schema_editor):
8 | Category = apps.get_model('shop', 'Category')
9 | Feature = apps.get_model('shop', 'Feature')
10 | Product = apps.get_model('shop', 'Product')
11 | Sale = apps.get_model('shop', 'Sale')
12 |
13 | fake = Faker()
14 |
15 | categories = [
16 | Category(name='Mens'),
17 | Category(name='Womens'),
18 | Category(name='Kids')
19 | ]
20 | categories = Category.objects.bulk_create(categories)
21 |
22 | features = [
23 | Feature(name='Size', value='XS'),
24 | Feature(name='Size', value='S'),
25 | Feature(name='Size', value='M'),
26 | Feature(name='Size', value='L'),
27 | Feature(name='Size', value='XL'),
28 | ]
29 | for _ in range(30):
30 | features.append(
31 | Feature(
32 | name='Colour',
33 | value=fake.color_name(),
34 | visible=fake.boolean(chance_of_getting_true=70)
35 | )
36 | )
37 | features = Feature.objects.bulk_create(features)
38 |
39 | products = []
40 | for _ in range(30):
41 | products.append(
42 | Product(
43 | name=fake.company(),
44 | category=random.choice(categories),
45 | price=fake.pydecimal(left_digits=3, right_digits=2, positive=True),
46 | )
47 | )
48 | products = Product.objects.bulk_create(products)
49 |
50 | for product in products:
51 | product.features.add(*set(random.choices(features, k=10)))
52 |
53 | sales = []
54 | for _ in range(1000):
55 | sales.append(
56 | Sale(
57 | product=random.choice(products),
58 | sale_date=fake.date_time_this_year(before_now=True, after_now=False, tzinfo=timezone.utc)
59 | )
60 | )
61 | Sale.objects.bulk_create(sales)
62 |
--------------------------------------------------------------------------------
/shop/shop/migrations/0001_initial.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Generated by Django 1.11.1 on 2017-05-29 12:09
3 | from __future__ import unicode_literals
4 |
5 | from django.db import migrations, models
6 | from django.utils import timezone
7 | import django.db.models.deletion
8 |
9 | from shop.data import create_initial_data
10 |
11 |
12 | class Migration(migrations.Migration):
13 |
14 | initial = True
15 |
16 | dependencies = [
17 | ]
18 |
19 | operations = [
20 | migrations.CreateModel(
21 | name='Category',
22 | fields=[
23 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
24 | ('name', models.CharField(max_length=32)),
25 | ],
26 | ),
27 | migrations.CreateModel(
28 | name='Feature',
29 | fields=[
30 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
31 | ('name', models.CharField(max_length=32)),
32 | ('value', models.CharField(max_length=32)),
33 | ('visible', models.BooleanField(default=True)),
34 | ],
35 | options={
36 | 'ordering': ['name'],
37 | },
38 | ),
39 | migrations.CreateModel(
40 | name='Product',
41 | fields=[
42 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
43 | ('name', models.CharField(max_length=32)),
44 | ('price', models.DecimalField(decimal_places=2, max_digits=6)),
45 | ('category', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='shop.Category')),
46 | ('features', models.ManyToManyField(to='shop.Feature')),
47 | ],
48 | ),
49 | migrations.CreateModel(
50 | name='Sale',
51 | fields=[
52 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
53 | ('sale_date', models.DateTimeField(default=timezone.now)),
54 | ('product', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='shop.Product')),
55 | ],
56 | ),
57 | migrations.RunPython(create_initial_data, migrations.RunPython.noop)
58 | ]
59 |
--------------------------------------------------------------------------------
/shop/shop/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jarshwah/optimising-django-queries/5c984c419236fe73f868f2517a9f9ea9e54c6c05/shop/shop/migrations/__init__.py
--------------------------------------------------------------------------------
/shop/shop/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | from django.utils.functional import cached_property as buffered_property
3 | from django.utils import timezone
4 |
5 |
6 | class Category(models.Model):
7 | name = models.CharField(max_length=32)
8 |
9 | def __str__(self):
10 | return self.name
11 |
12 |
13 | class Feature(models.Model):
14 | name = models.CharField(max_length=32)
15 | value = models.CharField(max_length=32)
16 | visible = models.BooleanField(default=True)
17 |
18 | class Meta:
19 | ordering = ['name']
20 |
21 | def __str__(self):
22 | return f'{self.name} = {self.value}'
23 |
24 |
25 | class Product(models.Model):
26 | name = models.CharField(max_length=32)
27 | category = models.ForeignKey(Category)
28 | features = models.ManyToManyField(Feature)
29 | price = models.DecimalField(max_digits=6, decimal_places=2)
30 |
31 | def __str__(self):
32 | return self.name
33 |
34 | @buffered_property
35 | def all_features(self):
36 | return list(self.features.all())
37 |
38 | @property
39 | def visible_features_python(self):
40 | return [feature for feature in self.all_features if feature.visible]
41 |
42 | @property
43 | def invisible_features_python(self):
44 | return [feature for feature in self.all_features if not feature.visible]
45 |
46 | @property
47 | def visible_features_database(self):
48 | return self.features.filter(visible=True)
49 |
50 | @property
51 | def invisible_features_database(self):
52 | return self.features.filter(visible=False)
53 |
54 |
55 | class Sale(models.Model):
56 | product = models.ForeignKey(Product)
57 | sale_date = models.DateTimeField(default=timezone.now)
58 |
--------------------------------------------------------------------------------
/shop/shop/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for shop project Django 1.11.1.
3 | """
4 |
5 | import os
6 |
7 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
8 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
9 |
10 | SECRET_KEY = '87hai)pey)ay+z=^@5-oi3fv4gjo99ns2la9tttaqod@ijv!x1'
11 | DEBUG = True
12 | INSTALLED_APPS = [
13 | 'django.contrib.admin',
14 | 'django.contrib.auth',
15 | 'django.contrib.contenttypes',
16 | 'django_extensions',
17 | 'shop',
18 | ]
19 | ROOT_URLCONF = 'shop.urls'
20 |
21 | DATABASES = {
22 | 'default': {
23 | 'ENGINE': 'django.db.backends.postgresql',
24 | 'NAME': 'postgres',
25 | 'USER': 'shop',
26 | 'PASSWORD': 'shop',
27 | 'HOST': '192.168.99.100', # docker-machine IP
28 | 'PORT': '25432'
29 | }
30 | }
31 |
32 | TIME_ZONE = 'UTC'
33 | USE_TZ = True
34 |
35 | LOGGING = {
36 | 'version': 1,
37 | 'filters': {
38 | 'require_debug_true': {
39 | '()': 'django.utils.log.RequireDebugTrue',
40 | }
41 | },
42 | 'handlers': {
43 | 'console': {
44 | 'level': 'DEBUG',
45 | 'filters': ['require_debug_true'],
46 | 'class': 'logging.StreamHandler',
47 | }
48 | },
49 | 'loggers': {
50 | 'django.db.backends': {
51 | 'level': 'DEBUG',
52 | 'handlers': ['console'],
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/shop/shop/urls.py:
--------------------------------------------------------------------------------
1 | """shop URL Configuration
2 |
3 | The `urlpatterns` list routes URLs to views. For more information please see:
4 | https://docs.djangoproject.com/en/1.11/topics/http/urls/
5 | Examples:
6 | Function views
7 | 1. Add an import: from my_app import views
8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home')
9 | Class-based views
10 | 1. Add an import: from other_app.views import Home
11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 | 1. Import the include() function: from django.conf.urls import url, include
14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url
17 | from django.contrib import admin
18 |
19 | urlpatterns = [
20 | url(r'^admin/', admin.site.urls),
21 | ]
22 |
--------------------------------------------------------------------------------
/shop/shop/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for shop project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "shop.settings")
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------