├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── _config.yml ├── docker-compose.yml ├── setup.cfg └── shop ├── manage.py └── shop ├── __init__.py ├── data.py ├── migrations ├── 0001_initial.py └── __init__.py ├── models.py ├── settings.py ├── urls.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, Josh Smeaton 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.python.org/simple" 3 | verify_ssl = true 4 | 5 | [packages] 6 | Django = "*" 7 | ipython = "*" 8 | flake8 = "*" 9 | isort = "*" 10 | psycopg2 = "*" 11 | Faker = "*" 12 | django-extensions = "*" 13 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "0d4442e575b9fd0d7d1e0b89d85ec1cd81a5c1322a300d2f84665250d123c94e" 5 | }, 6 | "requires": {}, 7 | "sources": [ 8 | { 9 | "url": "https://pypi.python.org/simple", 10 | "verify_ssl": true 11 | } 12 | ] 13 | }, 14 | "default": { 15 | "Django": { 16 | "version": "==1.11.2" 17 | }, 18 | "Faker": { 19 | "version": "==0.7.15" 20 | }, 21 | "Pygments": { 22 | "version": "==2.2.0" 23 | }, 24 | "appnope": { 25 | "version": "==0.1.0" 26 | }, 27 | "decorator": { 28 | "version": "==4.0.11" 29 | }, 30 | "django-extensions": { 31 | "version": "==1.7.9" 32 | }, 33 | "flake8": { 34 | "version": "==3.3.0" 35 | }, 36 | "ipython": { 37 | "version": "==6.1.0" 38 | }, 39 | "ipython_genutils": { 40 | "version": "==0.2.0" 41 | }, 42 | "isort": { 43 | "version": "==4.2.13" 44 | }, 45 | "jedi": { 46 | "version": "==0.10.2" 47 | }, 48 | "mccabe": { 49 | "version": "==0.6.1" 50 | }, 51 | "pexpect": { 52 | "version": "==4.2.1" 53 | }, 54 | "pickleshare": { 55 | "version": "==0.7.4" 56 | }, 57 | "prompt_toolkit": { 58 | "version": "==1.0.14" 59 | }, 60 | "psycopg2": { 61 | "version": "==2.7.1" 62 | }, 63 | "ptyprocess": { 64 | "version": "==0.5.1" 65 | }, 66 | "pycodestyle": { 67 | "version": "==2.3.1" 68 | }, 69 | "pyflakes": { 70 | "version": "==1.5.0" 71 | }, 72 | "python-dateutil": { 73 | "version": "==2.6.0" 74 | }, 75 | "pytz": { 76 | "version": "==2017.2" 77 | }, 78 | "setuptools": { 79 | "version": "==36.0.1" 80 | }, 81 | "simplegeneric": { 82 | "version": "==0.8.1" 83 | }, 84 | "six": { 85 | "version": "==1.10.0" 86 | }, 87 | "traitlets": { 88 | "version": "==4.3.2" 89 | }, 90 | "wcwidth": { 91 | "version": "==0.1.7" 92 | } 93 | }, 94 | "develop": {} 95 | } 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Optimising Django Queries 2 | 3 | Django ORM basics, with some tips and tricks, for writing optimal queries. 4 | 5 | ## [](#getting-started)Getting Started 6 | 7 | ### [](#setup)Setup 8 | 9 | Make sure you have python 3.6 installed, and is the default python3 on your 10 | system. There's a docker-compose file included for running postgres in a docker 11 | container if you don't want to install postgres locally. 12 | 13 | Make sure you edit the `DATABASES` setting in `shop/shop/settings.py` if you 14 | aren't running `postgres` using `Docker Toolbelt for OSX`. 15 | 16 | ```bash 17 | xcode-select --install # OSX only - skip if xcode is already installed 18 | docker-compose up -d db 19 | 20 | pip install pipenv 21 | git clone git@github.com:jarshwah/optimising-django-queries.git 22 | cd optimising-django-queries 23 | pipenv --three install 24 | pipenv shell # activates the virtual environment 25 | cd shop 26 | ./manage.py migrate 27 | ``` 28 | 29 | The above commands will install all the dependencies needed. 30 | 31 | ### [](#query-logging)Query Logging 32 | 33 | When developing it's a good idea to have logging configured so that queries 34 | are printed to the console. A neverending stream of SQL queries in your 35 | terminal is your first hint that you might have some performance issues. 36 | 37 | Below is an extremely minimal config you can add (or merge) into your settings 38 | file: 39 | 40 | ```python 41 | DEBUG = True 42 | LOGGING = { 43 | 'version': 1, 44 | 'filters': { 45 | 'require_debug_true': { 46 | '()': 'django.utils.log.RequireDebugTrue', 47 | } 48 | }, 49 | 'handlers': { 50 | 'console': { 51 | 'level': 'DEBUG', 52 | 'filters': ['require_debug_true'], 53 | 'class': 'logging.StreamHandler', 54 | } 55 | }, 56 | 'loggers': { 57 | 'django.db.backends': { 58 | 'level': 'DEBUG', 59 | 'handlers': ['console'], 60 | } 61 | } 62 | } 63 | ``` 64 | 65 | You can also inspect previously executed queries directly from the django 66 | shell, provided you've set `DEBUG = True` in your settings file. Django will 67 | only remember 9000 queries to avoid memory issues. 68 | 69 | ```python 70 | In [1]: from django.db import connection, reset_queries 71 | 72 | In [2]: print(connection.queries) 73 | [ 74 | { 75 | 'sql': 'SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price" FROM "shop_product" ORDER BY "shop_product"."id" ASC LIMIT 1', 76 | 'time': '0.001' 77 | } 78 | ] 79 | In [3]: reset_queries() 80 | 81 | In [4]: print(connection.queries) 82 | [] 83 | ``` 84 | 85 | ## [](#database-relations)Database Relations 86 | 87 | The Django ORM (Object-Relational Mapper) is an abstraction that uses python 88 | objects to represent database tables and the relations between them. 89 | 90 | ### [](#foreign-key-columns)Foreign Keys 91 | 92 | Foreign Keys are the primary way of defining how two tables are related. They 93 | provide a link from one table to the key of another table. These kind of 94 | relationships are commonly called Many-To-One (M-1) because there are Many 95 | products that map to One Category. 96 | 97 | Product 98 | 99 | | id | name | category_id | 100 | |:----------|:----------------|:------------| 101 | | 1 | Samsung 65 inch | 1 | 102 | | 2 | Samsung 75 inch | 1 | 103 | | 3 | Juicer | 2 | 104 | 105 | Category 106 | 107 | | id | name | 108 | |:----------|:------------| 109 | | 1 | TVs | 110 | | 2 | Homeware | 111 | 112 | In the tables above, you can see that product with `id = 3` is a `Juicer`, in 113 | the `category` with `id = 2`. The data is `related` by the `category_id` 114 | foreign key pointing to the `id` field in the `Category` table. 115 | 116 | The SQL to retrieve the `Juicer` with it's `category` would be something like: 117 | 118 | ```sql 119 | SELECT product.id, 120 | product.name, 121 | category.id, 122 | category.name 123 | FROM product 124 | JOIN category 125 | ON product.category_id = category.id 126 | WHERE product.name = 'Juicer'; 127 | 128 | 1 | Juicer | 2 | Homeware 129 | ``` 130 | 131 | The equivalent `python` code would be: 132 | 133 | ```python 134 | >>> product = Product.objects.get(name='Juicer') 135 | >>> print(product.id, product.name, product.category.id, product.category.name) 136 | 137 | 1 Juicer 2 Homeware 138 | ``` 139 | 140 | You can see that accessing a foreign key in python is equivalent to accessing 141 | an attribute or property. 142 | 143 | ### [](#multivalue-relations)Multivalue Relations 144 | 145 | It's also possible to start from `Category` and find all linked `Products`. But 146 | since we represent data as rows, and a `Category` can have multiple `Products` 147 | it means we'll end up repeating the same category multiple times. 148 | 149 | ```sql 150 | SELECT category.id 151 | category.name 152 | product.name 153 | FROM category 154 | JOIN product 155 | ON category.id = product.category_id 156 | WHERE category.name = 'TVs' 157 | 158 | 1 | TVs | Samsung 65 inch 159 | 1 | TVs | Samsung 75 inch 160 | ``` 161 | 162 | This is a multivalue relation, as there are multiple links between a category 163 | and the products within that category. This is sometimes referred to as a 164 | One-To-Many (1-M) relationship. 165 | 166 | Since Django wants to avoid returning duplicate data, it represents multivalue 167 | relations as `lists`. 168 | 169 | ```python 170 | >>> category = Category.objects.get(name='TVs') 171 | >>> print(category.id, category.name) 172 | 173 | 1 TVs 174 | 175 | >>> for product in category.product_set.all(): 176 | ... print(product.name) 177 | 178 | Samsung 65 inch 179 | Samsung 75 inch 180 | ``` 181 | 182 | ### [](#m2m-relations)Many To Many Relations 183 | 184 | There's a third type of relationship in a database called Many To Many (M-M), 185 | which models multiple rows in a table linking to multiple rows in another 186 | table. This is really just a special case of Multivalue relationships, with 187 | an extra table inbetween the two for tracking the links. 188 | 189 | Consider a Product table and a Size table, where multiple products will share 190 | similar sizing. 191 | 192 | Product 193 | 194 | | id | name | 195 | |:----------|:-------| 196 | | 1 | Jumper | 197 | | 2 | Tshirt | 198 | 199 | Size 200 | 201 | | id | name | 202 | |:----------|:---------| 203 | | 1 | Small | 204 | | 2 | Large | 205 | 206 | Product Size 207 | 208 | | id | product_id | size_id | 209 | |:----------|:-----------|:--------- 210 | | 1 | 1 | 1 | 211 | | 2 | 2 | 1 | 212 | | 3 | 1 | 2 | 213 | 214 | ```sql 215 | SELECT product.name 216 | size.name 217 | FROM product 218 | JOIN productsize 219 | ON product.id = productsize.product_id 220 | JOIN size 221 | ON productsize.size_id = size.id 222 | 223 | Jumper Small 224 | Jumper Large 225 | Tshirt Small 226 | ``` 227 | 228 | The django ORM hides the `ProductSize` mapping table (through relation) by 229 | default, but it's possible to define it yourself. For a many-to-many relation, 230 | django models both sides as lists. 231 | 232 | ```python 233 | >>> product = Product.objects.get(name='Jumper') 234 | >>> for size in product.sizes.all(): 235 | ... print(size.name) 236 | 237 | Small 238 | Large 239 | 240 | >>> size = Size.objects.get(name='Large') 241 | >>> for product in size.product_set.all(): 242 | ... print(product.name) 243 | 244 | Jumper 245 | ``` 246 | 247 | ## [](#orm-models)ORM Models 248 | 249 | Django models represent database tables as classes, rows as instances of those 250 | classes, and columns as attributes. 251 | 252 | Here we'll see three models, with the relationships between them defined as 253 | fields. 254 | 255 | ```python 256 | 257 | class Category(models.Model): 258 | name = models.CharField(max_length=32) 259 | 260 | class Feature(models.Model): 261 | name = models.CharField(max_length=32) 262 | value = models.CharField(max_length=32) 263 | visible = models.BooleanField(default=True) 264 | 265 | class Product(models.Model): 266 | name = models.CharField(max_length=32) 267 | category = models.ForeignKey(Category) 268 | features = models.ManyToManyField(Feature) 269 | price = models.DecimalField(max_digits=6, decimal_places=2) 270 | 271 | ``` 272 | 273 | `Product` is the interesting model here, as it has both a `ForeignKey` field 274 | pointing to `Category`, and a `ManyToMany` field pointing to multiple 275 | `Features`. 276 | 277 | ### [](#foreign-key-field)ForeignKey 278 | 279 | As we learned before, foreign keys are represented with simple attribute 280 | access. Given a `product`, we access the category via the `product.category` 281 | field. 282 | 283 | But what is Django doing to get the data from the database? How many queries 284 | does it need to fetch both `product` and `product.category`? In our examples 285 | above, we seen that all of this data can be generated with a single SQL query. 286 | 287 | If we turn on SQL logging (see above), we can see the queries being executed 288 | as we work. 289 | 290 | ```python 291 | In [24]: product = Product.objects.get(pk=1) 292 | (0.001) 293 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price" 294 | FROM "shop_product" 295 | WHERE "shop_product"."id" = 1 296 | 297 | In [25]: category = product.category 298 | (0.001) 299 | SELECT "shop_category"."id", "shop_category"."name" 300 | FROM "shop_category" 301 | WHERE "shop_category"."id" = 2 302 | ``` 303 | 304 | Django fetches data linked by `ForeignKey` *on-demand*. The main reason for 305 | doing this, is that you could have an extremely deep hierarchy of ForeignKeys, 306 | but you probably wouldn't want Django to pull all of that data back. 307 | 308 | But there *is* a way to be explicit, and tell Django which relations it should 309 | fetch *eagerly*. 310 | 311 | #### [](#select-related)Select Related 312 | 313 | If you want Django to join the data via SQL and return it in a single query, 314 | you can use [select_related()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#select-related). 315 | 316 | ```python 317 | In [26]: product = Product.objects.select_related('category').get(pk=1) 318 | (0.001) 319 | SELECT 320 | "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price", "shop_category"."id", "shop_category"."name" 321 | FROM "shop_product" 322 | INNER JOIN "shop_category" ON ("shop_product"."category_id" = "shop_category"."id") 323 | WHERE "shop_product"."id" = 1; 324 | 325 | In [27]: category = product.category 326 | 327 | ``` 328 | 329 | Here we see that, indeed, a single query is executed to retrieve the product 330 | and the category. 331 | 332 | ### [](#n-plus-1)1 + N queries 333 | 334 | You might have heard the term `1 + N` or `N + 1` queries. It's a common 335 | pitfall when using ORMs to retrieve data from a database. 336 | 337 | Let's say we have 3 products, and we want to show the product name and the 338 | category it came from. First, we'll do it the naive way: 339 | 340 | ```python 341 | In [27]: for p in Product.objects.all(): 342 | ...: print(p.name, p.category.name) 343 | ...: 344 | (0.001) SELECT 345 | "shop_product"."id", "shop_product"."name", 346 | "shop_product"."category_id", "shop_product"."price" 347 | FROM "shop_product"; 348 | (0.000) SELECT 349 | "shop_category"."id", "shop_category"."name" 350 | FROM "shop_category" 351 | WHERE "shop_category"."id" = 1; 352 | 353 | Samsung 65 inch TVs 354 | 355 | (0.001) SELECT "shop_category"."id", "shop_category"."name" 356 | FROM "shop_category" WHERE "shop_category"."id" = 1; 357 | 358 | Samsung 75 inch TVs 359 | 360 | (0.000) SELECT "shop_category"."id", "shop_category"."name" 361 | FROM "shop_category" WHERE "shop_category"."id" = 2; 362 | 363 | Juicer Homeware 364 | ``` 365 | 366 | We executed a single query (1) for the product, then we executed an extra query 367 | for each product to get the category (N). This introduces a lot of latency, as 368 | each query is a new network request, and the database has to recompile and 369 | execute lots of very similar queries. 370 | 371 | Using `select_related` avoids this issue, allowing data for all Products and 372 | Categories to be retrieved once. 373 | 374 | ```python 375 | In [28]: for p in Product.objects.select_related('category').all(): 376 | ...: print(p.name, p.category.name) 377 | ...: 378 | (0.005) SELECT "shop_product"."id", "shop_product"."name", 379 | "shop_product"."category_id", "shop_product"."price", 380 | "shop_category"."id", "shop_category"."name" 381 | FROM "shop_product" 382 | INNER JOIN "shop_category" 383 | ON ("shop_product"."category_id" = "shop_category"."id"); 384 | Samsung 65 inch TVs 385 | Samsung 75 inch TVs 386 | Juicer Homeware 387 | ``` 388 | 389 | ### [](#m2m-field)ManyToManyField 390 | 391 | `ManyToManyField`'s models a list of data. You generally iterate over many 392 | to many fields. Given a `product`, we access its features via the 393 | `product.features` field. 394 | 395 | Remembering that Django tries to avoid returning duplicate data, it must 396 | execute two queries in this instance. One to get the Product, and another to 397 | get the features for that product. Again, with SQL logging activated, we can 398 | see exactly what's happening under the hood. 399 | 400 | ```python 401 | In [29]: for p in Product.objects.all()[0:3]: 402 | ...: print('Product: ', p.name) 403 | ...: for f in p.features.all(): 404 | ...: print(f.name, ': ', f.value) 405 | ...: print() 406 | ...: 407 | 408 | (0.001) 409 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price" 410 | FROM "shop_product"; 411 | 412 | Product: Samsung 65 inch 413 | 414 | (0.008) SELECT 415 | "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 416 | FROM "shop_feature" 417 | INNER JOIN "shop_product_features" 418 | ON ("shop_feature"."id" = "shop_product_features"."feature_id") 419 | WHERE "shop_product_features"."product_id" = 1 ; 420 | 421 | Supplier : Samsung 422 | Size : 65 inches 423 | Colour : Black 424 | 425 | Product: Samsung 75 inch 426 | (0.001) 427 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 428 | FROM "shop_feature" 429 | INNER JOIN "shop_product_features" 430 | ON ("shop_feature"."id" = "shop_product_features"."feature_id") 431 | WHERE "shop_product_features"."product_id" = 2; 432 | 433 | Supplier : Samsung 434 | Size : 75 inches 435 | Colour : Black 436 | 437 | Product: Juicer 438 | (0.001) 439 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 440 | FROM "shop_feature" 441 | INNER JOIN "shop_product_features" 442 | ON ("shop_feature"."id" = "shop_product_features"."feature_id") 443 | WHERE "shop_product_features"."product_id" = 3; 444 | 445 | Colour : LightSeaGreen 446 | Colour : SpringGreen 447 | Colour : Azure 448 | 449 | ``` 450 | 451 | This is a perfect example of a `1 + N` query. We executed `1` query to fetch 452 | products. Then we issued `1` query for each product `N` to get the features. 453 | 454 | Luckily, Django has a way to reduce the number of queries executed when dealing 455 | with lists of related data too. 456 | 457 | #### [](#prefetch-related)Prefetch Related 458 | 459 | The [prefetch_related()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#prefetch-related) 460 | queryset method is what Django provides to help minimise the number of queries 461 | required to get the data we need. It's more complicated than `select_related` 462 | but it comes with a lot of power. 463 | 464 | We can't actually get the data we need with a single query. But we can do it in 465 | two, which is `1 + 1` queries. One for the product, and then a single query to 466 | fetch all of the features for all of the products. 467 | 468 | ```python 469 | In [30]: for p in Product.objects.prefetch_related('features').all()[0:3]: 470 | ...: print('Product: ', p.name) 471 | ...: for f in p.features.all(): 472 | ...: print(f.name, ': ', f.value) 473 | ...: print() 474 | ...: 475 | (0.001) 476 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price" 477 | FROM "shop_product"; 478 | (0.001) 479 | SELECT 480 | ("shop_product_features"."product_id") AS "_prefetch_related_val_product_id", "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 481 | FROM "shop_feature" 482 | INNER JOIN "shop_product_features" 483 | ON ("shop_feature"."id" = "shop_product_features"."feature_id") 484 | WHERE "shop_product_features"."product_id" IN (1, 2, 3); 485 | 486 | Product: Samsung 65 inch 487 | Supplier : Samsung 488 | Size : 65 inches 489 | Colour : Black 490 | 491 | Product: Samsung 75 inch 492 | Supplier : Samsung 493 | Size : 75 inches 494 | Colour : Black 495 | 496 | Product: Juicer 497 | Colour : LightSeaGreen 498 | Colour : SpringGreen 499 | Colour : Azure 500 | ``` 501 | 502 | Prefetching works by issuing the first query, gathering all of the product 503 | `id`s returned, and then issuing the second query for all features that match 504 | the list of product `id`s. It then uses these features as a cache whenever you 505 | attempt to access `product.features.all()`. 506 | 507 | ##### [](#prefetch-related-caveats)Prefetch Related Caveats 508 | 509 | With great power, comes great responsibility. There are a number of ways to 510 | abuse `prefetch_related`, or to skip using the cache you created and execute 511 | new queries. 512 | 513 | **Memory** 514 | 515 | Prefetching can end up using quite a bit of memory if there are lots of 516 | relations. If we were querying for 10,000 products, and each product had 10 517 | features, django would first issue a query to features with a `WHERE` clause 518 | containing 10,000 `id`s which is generally not good for a database. But then 519 | it'd return 100,000 features that it would have to keep cached in memory. 520 | 521 | **Filtering** 522 | 523 | Prefetching only works when you access the many to many field using `.all()`. 524 | But it *is* possible to further filter the many to many field using `.filter()` 525 | and other queryset methods. 526 | 527 | ```python 528 | In [31]: product = Product.objects.prefetch_related('features').get(pk=1) 529 | 530 | (0.001) 531 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price" 532 | FROM "shop_product" 533 | WHERE "shop_product"."id" = 1; args=(1,) 534 | (0.001) 535 | SELECT ("shop_product_features"."product_id") AS "_prefetch_related_val_product_id", "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 536 | FROM "shop_feature" 537 | INNER JOIN "shop_product_features" 538 | ON ("shop_feature"."id" = "shop_product_features"."feature_id") 539 | WHERE "shop_product_features"."product_id" IN (1); 540 | 541 | In [32]: product.features.get(name='Supplier').value 542 | 543 | (0.001) 544 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 545 | FROM "shop_feature" 546 | INNER JOIN "shop_product_features" ON ("shop_feature"."id" = "shop_product_features"."feature_id") 547 | WHERE ("shop_product_features"."product_id" = 1 AND "shop_feature"."name" = 'Supplier'); 548 | 549 | Samsung 550 | ``` 551 | 552 | Our cache isn't able to interpret extra database operations, so django is 553 | forced to execute another query. 554 | 555 | **iterator()** 556 | 557 | Django provides an [iterator()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#iterator) 558 | queryset method, which allows django to fetch instances as it iterates through 559 | a loop, rather than loading the entire result set in to memory at once. Since 560 | prefetch_related relies on the entire result being available so that it can 561 | collect all the `id`s, `refetch_related` has no effect when used with 562 | `iterator`. 563 | 564 | ### [](#reverse-relations)Reverse Relations 565 | 566 | Given a `ForeignKey` field linking a `product` to a `category`, Django will 567 | setup the reverse link from a `category` to the set (or list) of `products`. 568 | The convention is to name the reverse field `modelname_set`, but this can be 569 | overridden. 570 | 571 | For our examples above, we'd access the products of a category using 572 | `product_set.all()`. 573 | 574 | ```python 575 | In [33]: category = Category.objects.get(pk=1) 576 | (0.001) 577 | SELECT "shop_category"."id", "shop_category"."name" 578 | FROM "shop_category" WHERE "shop_category"."id" = 1; 579 | 580 | In [34]: products = list(category.product_set.all()) 581 | (0.001) 582 | SELECT "shop_product"."id", "shop_product"."name", "shop_product"."category_id", "shop_product"."price" 583 | FROM "shop_product" 584 | WHERE "shop_product"."category_id" = 1; 585 | ``` 586 | 587 | Reverse relations function identically to `ManyToMany` fields, so they can be 588 | prefetched, filtered, or ordered. 589 | 590 | ### [](#prefetch-or-select-related)Prefetch Related vs Select Related 591 | 592 | **select_related** 593 | 594 | Use `select_related()` on `ForeignKey` fields **only**. It has no affect on 595 | `ManyToManyField`s or reverse relations. 596 | 597 | > select_related is used to access a single related object 598 | 599 | **prefetch_related** 600 | 601 | Use `prefetch_related()` on `ManyToManyField`s or reverse relations. It can 602 | also be used on `ForeignKey` fields, but `select_related` is nearly always a 603 | better choice. 604 | 605 | > prefetch_related is used to access multiple related objects 606 | 607 | ## [](#query-counts)Query Count Examples 608 | 609 | Given the following count of objects in the database, how many queries are 610 | executed for each example below? 611 | 612 | - 1000 products 613 | - 5 categories (200 products per category) 614 | - 5000 features (10 features per product, some shared) 615 | 616 | ```python 617 | In [35]: for p in Product.objects.all(): 618 | ...: print(p.category.name) 619 | ...: for feature in p.features.all(): 620 | ...: print(f'{feature.name}: {feature.value}') 621 | ...: 622 | ``` 623 | 624 |
625 | Answer 626 | 2001 - 1 for product, 1000 for category, 1000 for features 627 |
628 | 629 | 630 | ```python 631 | In [36]: for p in Product.objects.select_related('category').all(): 632 | ...: print(p.category.name) 633 | ...: for feature in p.features.all(): 634 | ...: print(f'{feature.name}: {feature.value}') 635 | ...: 636 | ``` 637 | 638 |
639 | Answer 640 | 1001 - 1 for product and category, and 1000 for features 641 |
642 | 643 | ```python 644 | In [37]: for p in Product.objects.prefetch_related('features').all(): 645 | ...: print(p.category.name) 646 | ...: for feature in p.features.all(): 647 | ...: print(f'{feature.name}: {feature.value}') 648 | ...: 649 | ``` 650 | 651 |
652 | Answer 653 | 1002 - 1 for product, 1000 for category, and 1 for features 654 |
655 | 656 | ```python 657 | In [38]: for p in Product.objects.select_related( 658 | ...: 'category' 659 | ...: ).prefetch_related('features').all(): 660 | ...: print(p.category.name) 661 | ...: for feature in p.features.all(): 662 | ...: print(f'{feature.name}: {feature.value}') 663 | ...: 664 | ``` 665 | 666 |
667 | Answer 668 | 2 - 1 for product and category, and 1 for features 669 |
670 | 671 | ```python 672 | In [39]: for p in Product.objects.select_related( 673 | ...: 'category' 674 | ...: ).prefetch_related('features').all(): 675 | ...: print(p.category.name) 676 | ...: for feature in p.features.filter(name='Supplier'): 677 | ...: print(f'{feature.name}: {feature.value}') 678 | ...: 679 | ``` 680 | 681 |
682 | Answer 683 | 1003 - 1 for product and category, and 1 for features cache, 1000 for features 684 |
685 | 686 | ## [](#query-optimisation)Query Optimisation 687 | 688 | There are a bunch of optimisations you can make at the SQL layer, some of them 689 | obvious, some of them requiring a great deal of knowledge about the specific 690 | database you're using. We're going to focus on the optimisations we can make 691 | at the django ORM level. 692 | 693 | ### [](#query-optimisation-sorting)Sorting 694 | 695 | This one is fairly simple. Don't sort results if they don't absolutely need 696 | to be sorted! Sorting can often take up a large portion of the actual query 697 | time. 698 | 699 | ```sql 700 | postgres=# explain analyze select * from shop_product; 701 | QUERY PLAN 702 | ---------------------------------------------------------------------------------------------------------- 703 | Seq Scan on shop_product (cost=0.00..1.30 rows=30 width=104) (actual time=0.011..0.014 rows=30 loops=1) 704 | Planning time: 0.049 ms 705 | Execution time: 0.031 ms 706 | 707 | postgres=# explain analyze select * from shop_product order by price; 708 | QUERY PLAN 709 | ---------------------------------------------------------------------------------------------------------------- 710 | Sort (cost=2.04..2.11 rows=30 width=104) (actual time=0.031..0.033 rows=30 loops=1) 711 | Sort Key: price 712 | Sort Method: quicksort Memory: 27kB 713 | -> Seq Scan on shop_product (cost=0.00..1.30 rows=30 width=104) (actual time=0.009..0.013 rows=30 loops=1) 714 | Planning time: 0.060 ms 715 | Execution time: 0.051 ms 716 | ``` 717 | 718 | Sometimes, though, sneaky awful ORM developers will add a **default ordering** 719 | to models. So even though we aren't adding ordering ourselves, there is an 720 | implicit ordering added to every single query against that model. 721 | 722 | ```python 723 | 724 | class Feature(models.Model): 725 | name = models.CharField(max_length=32) 726 | value = models.CharField(max_length=32) 727 | visible = models.BooleanField(default=True) 728 | 729 | class Meta: 730 | ordering = ['name'] 731 | 732 | In [13]: features = list(Feature.objects.all()) 733 | (0.001) 734 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 735 | FROM "shop_feature" 736 | ORDER BY "shop_feature"."name" ASC; 737 | ``` 738 | 739 | You can be explicit about removing any default ordering though, which is 740 | especially important when doing any kind of aggregation. Ordering fields are 741 | added to the `GROUP BY`, which can give incorrect results if you weren't 742 | expecting them. 743 | 744 | ```python 745 | In [14]: features = list(Feature.objects.order_by()) 746 | (0.001) 747 | SELECT "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 748 | FROM "shop_feature"; 749 | ``` 750 | 751 | Much better. 752 | 753 | ### [](#filtering-early)Filter Early 754 | 755 | You want your filters (WHERE clauses) to be as restrictive as possible. You 756 | want to return the fewest possible number of rows at each step of a query. This 757 | includes `__in` filters with subqueries! 758 | 759 | Let's write a queryset, and `explain analyze` the SQL that would have been 760 | executed. 761 | 762 | ```python 763 | In [21]: features = Feature.objects.filter( 764 | name='Supplier', product__in=Product.objects.filter(price__gt=500) 765 | ) 766 | 767 | In [22]: print(features.query) 768 | SELECT 769 | "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 770 | FROM "shop_feature" 771 | INNER JOIN "shop_product_features" 772 | ON ("shop_feature"."id" = "shop_product_features"."feature_id") 773 | WHERE ("shop_feature"."name" = 'Supplier' AND "shop_product_features"."product_id" IN ( 774 | SELECT U0."id" AS Col1 775 | FROM "shop_product" U0 WHERE U0."price" > 500 776 | )) 777 | ORDER BY "shop_feature"."name" ASC 778 | ``` 779 | 780 | ```sql 781 | postgres=# explain analyze SELECT 782 | "shop_feature"."id", "shop_feature"."name", "shop_feature"."value", "shop_feature"."visible" 783 | FROM "shop_feature" 784 | INNER JOIN "shop_product_features" ON ("shop_feature"."id" = "shop_product_features"."feature_id") 785 | WHERE ("shop_feature"."name" = 'Supplier' AND "shop_product_features"."product_id" IN ( 786 | SELECT U0."id" AS Col1 787 | FROM "shop_product" U0 WHERE U0."price" > 500) 788 | ) 789 | ORDER BY "shop_feature"."name" ASC; 790 | QUERY PLAN 791 | ----------------------------------------------------------------------------------------------------------------------------- 792 | Nested Loop Semi Join (cost=15.29..21.03 rows=1 width=169) (actual time=0.034..0.034 rows=0 loops=1) 793 | -> Hash Join (cost=15.15..20.76 rows=1 width=173) (actual time=0.034..0.034 rows=0 loops=1) 794 | Hash Cond: (shop_product_features.feature_id = shop_feature.id) 795 | -> Seq Scan on shop_product_features (cost=0.00..4.62 rows=262 width=8) (actual time=0.015..0.015 rows=1 loops=1) 796 | -> Hash (cost=15.12..15.12 rows=2 width=169) (actual time=0.011..0.011 rows=0 loops=1) 797 | Buckets: 1024 Batches: 1 Memory Usage: 8kB 798 | -> Seq Scan on shop_feature (cost=0.00..15.12 rows=2 width=169) (actual time=0.011..0.011 rows=0 loops=1) 799 | Filter: ((name)::text = 'Supplier'::text) 800 | Rows Removed by Filter: 35 801 | -> Index Scan using shop_product_pkey on shop_product u0 (cost=0.14..0.20 rows=1 width=4) 802 | Index Cond: (id = shop_product_features.product_id) 803 | Filter: (price > '500'::numeric) 804 | Planning time: 0.313 ms 805 | Execution time: 0.093 ms 806 | (14 rows) 807 | ``` 808 | 809 | ### [](#optimising-indexes)Indexes 810 | 811 | Indexes are usually the first thing people think about when trying to 812 | optimise a query. Finding the ideal set of indexes to add to a particular 813 | table is an art form, and always evolving depending on the number and nature 814 | of various queries hitting that table. 815 | 816 | When you add an index, always check to see if it's being used. Simply having 817 | one does not mean it'll be used. `EXPLAIN ANALYZE` a couple of real queries 818 | before and after. 819 | 820 | Good candidates for indexes (all conditions below should be met): 821 | 822 | - A table with rows > 100 or so. A table with fewer rows will probably just 823 | scan each row. 824 | - A column that has lots of different values (cardinality). No use having an 825 | index if it matches 50% of data. 826 | - If 50% of your data is NULL, but the rest is quite varied, then a 827 | `filtered index` might be a good fit. 828 | - If many different queries use the same column in a WHERE clause 829 | - Consider multiple column index if multiple columns exist in a WHERE clause 830 | - Order is important in multiple column indexes. Use the most common column first. 831 | - If there are lots of different WHERE clauses in a single query, an index is 832 | sometimes unlikely to be used. 833 | 834 | ### [](#values-querysets)Values Query Set 835 | 836 | Model objects are very expensive to construct. For large querysets, it can 837 | often add up to a significant percentage of overall time spent in python. Fields 838 | need to be set, signals need to fire, reverse relations need to be set up. There's 839 | a lot of work that goes into building up the model hierarchy. 840 | 841 | If you just need the data from the database, and don't need to access model 842 | properties at all, then use a [values()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#values) 843 | or a [values_list()](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#values-list) 844 | method, which returns the data as simple dictionaries or tuples instead. 845 | 846 | ```python 847 | In [25]: %time model_objects = list(Product.objects.all()) 848 | 849 | CPU times: user 985 µs, sys: 1.92 ms, total: 2.9 ms 850 | Wall time: 3.09 ms 851 | 852 | In [26]: %time model_objects = list(Product.objects.values()) 853 | 854 | CPU times: user 978 µs, sys: 465 µs, total: 1.44 ms 855 | Wall time: 1.65 ms 856 | 857 | In [27]: %time model_objects = list(Product.objects.values('name', 'price')) 858 | 859 | CPU times: user 704 µs, sys: 597 µs, total: 1.3 ms 860 | Wall time: 1.39 ms 861 | ``` 862 | 863 | ### [](#queryset-calculations)Calculations 864 | 865 | Some people like to speak about *Fat Models*, meaning that a lot of logic is 866 | encapsulated within methods and properties of the model itself. This can be 867 | a good thing in some cases, unless the method depends on data in a different 868 | table, or a different model depends on the calculation in your method. 869 | 870 | Ideally, we can execute a single query (plus any necessary prefetches) to get 871 | all of the information we need for a particular goal. Sometimes though, we don't 872 | get to choose how the query is constructed, or how it's used once executed. 873 | 874 | If you need to use a model method for a particular calculation, then you have 875 | to have that python object available. You're no longer just depending on the 876 | database. 877 | 878 | Here's an example: 879 | 880 | ```python 881 | def customer_price(self): 882 | return Price.objects.get( 883 | product=self, 884 | start__lte=timezone.now(), 885 | end__isnull=True 886 | ).price 887 | ``` 888 | 889 | Now, a bunch of views and other models rely on this `customer_price` method, 890 | so they need to construct a full model object, but it'll also execute a query 891 | every time the price needs to be shown. This can be hugely expensive when 892 | iterating over a large list of products. 893 | 894 | Instead, design your models so that all important questions can be answered 895 | with a `values` queryset. 896 | 897 | [Query expressions](https://docs.djangoproject.com/en/1.11/ref/models/expressions/) 898 | can be extremely helpful by performing calculations across a set of objects, 899 | rather than performing them in python for each object. See my presentation/talk 900 | on [customising sql](https://github.com/jarshwah/customsql_talk/) for more ideas. 901 | 902 | ### [](#advanced-prefetching)Advanced Prefetch Related 903 | 904 | #### Prefetch Objects 905 | 906 | [Prefetch](https://docs.djangoproject.com/en/1.11/ref/models/querysets/#django.db.models.Prefetch) 907 | is a class that gives users greater control over the query executed to fill the 908 | cache. 909 | 910 | In our examples earlier, we seen what happened when you add filtering onto a 911 | ManyToMany field -- it'll skip the cache. But if you only really care about 912 | a subset of the related values, you can filter it in the `Prefetch` object. 913 | 914 | ```python 915 | In [29]: products = ( 916 | ...: Product 917 | ...: .objects 918 | ...: .select_related('category') 919 | ...: .prefetch_related( 920 | ...: Prefetch('features', queryset=Feature.objects.filter(name='Supplier')) 921 | ...: ) 922 | ...: ) 923 | 924 | In [30]: for p in products: 925 | ...: for f in p.features.all(): 926 | ...: print(f'{p.name}: {f.value}') 927 | ...: 928 | 929 | Samsung 65 inch: Samsung 930 | Samsung 75 inch: Samsung 931 | Juicer: Breville 932 | ``` 933 | 934 | That's only two queries. 935 | 936 | #### Overwriting Inefficient Methods 937 | 938 | Sometimes it's just not possible to avoid adding complex behaviour into our 939 | models. But we can design our methods to be as optimal as possible locally, 940 | while allowing a savvy caller that's using `Prefetch` to take it even further. 941 | 942 | This is a pattern I haven't seen in the wild before, and I was surprised it 943 | worked. 944 | 945 | The concept is to cache the results of the many to many field on first access, 946 | and to perform all filtering from within python. This keeps any access of the 947 | many to many field down to a single query maximum per instance. 948 | 949 | ```python 950 | 951 | from django.utils.functional import cached_property 952 | 953 | class Product(models.Model): 954 | ... 955 | features = models.ManyToManyField(Feature) 956 | 957 | @cached_property 958 | def all_features(self): 959 | return list(self.features.all()) 960 | 961 | @property 962 | def visible_features_python(self): 963 | return [feature for feature in self.all_features if feature.visible] 964 | 965 | @property 966 | def invisible_features_python(self): 967 | return [feature for feature in self.all_features if not feature.visible] 968 | 969 | ``` 970 | 971 | If `visible_features_python` is accessed, all features will be queried, stored 972 | on the instance, and then only the visible features are returned. If 973 | `invisible_features_python` is accessed after that, we skip the query execution, 974 | and immediately filter the cached values. We've reduced the number of queries 975 | from 2 to 1 for each Product. 500 products will result in 501 queries. 976 | 977 | The great trick here, is that we can use a `Prefetch` objects to store our 978 | cache directly on the `all_features` property, so that any access of visible 979 | or invisible features results in 0 extra queries per product! 980 | 981 | ```python 982 | In [33]: products = ( 983 | ...: Product 984 | ...: .objects 985 | ...: .select_related('category') 986 | ...: .prefetch_related( 987 | ...: Prefetch( 988 | ...: 'features', 989 | ...: queryset=Feature.objects.all(), 990 | ...: to_attr='all_features') 991 | ...: ) 992 | ...: ) 993 | 994 | In [34]: for p in products: 995 | ...: for f in p.visible_features_python: 996 | ...: print(f'{p.name}: {f.value}') 997 | ...: 998 | ``` 999 | 1000 | There are a total of 2 queries executed for this query. 1 for the products, 1001 | and 1 for the features prefetch cache. 1002 | 1003 | This pattern allows models to retain their methods and properties, while allowing 1004 | callers to inject ideal caches into a large number of objects for very little 1005 | cost. 1006 | 1007 | ## [](#summary)Summary 1008 | 1009 | We started by going through how the Django ORM maps database concepts to 1010 | python concepts, mainly through relations. How relations work is important 1011 | when trying to optimise query counts with `select_related` and 1012 | `prefetch_related.` 1013 | 1014 | Then we went over some techniques for improving performance of our queries. The 1015 | concepts were: 1016 | 1017 | 1. Reduction in number of queries - avoiding 1 + N queries. 1018 | 2. Utilising data caches to avoid going to the database. 1019 | 3. Making the database do less work: 1020 | - Avoid sorting 1021 | - Filter the data as much as possible 1022 | - Create indexes to efficiently filter 1023 | 4. Prefer performing calculations at the queryset level, rather than the 1024 | model instance level. 1025 | 5. Design your models so that callers can take advantage by providing prefetch 1026 | caches. 1027 | 1028 | What tips and tricks do you use for query performance tuning? 1029 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | 3 | services: 4 | db: 5 | image: postgres:latest 6 | environment: 7 | - POSTGRES_USER=shop 8 | - POSTGRES_PASSWORD=shop 9 | ports: 10 | - "25432:5432" 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | .git,.yml,__pycache__, 4 | max-line-length = 119 5 | -------------------------------------------------------------------------------- /shop/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "shop.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError: 10 | # The above import may fail for some other reason. Ensure that the 11 | # issue is really that Django is missing to avoid masking other 12 | # exceptions on Python 2. 13 | try: 14 | import django 15 | except ImportError: 16 | raise ImportError( 17 | "Couldn't import Django. Are you sure it's installed and " 18 | "available on your PYTHONPATH environment variable? Did you " 19 | "forget to activate a virtual environment?" 20 | ) 21 | raise 22 | execute_from_command_line(sys.argv) 23 | -------------------------------------------------------------------------------- /shop/shop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jarshwah/optimising-django-queries/5c984c419236fe73f868f2517a9f9ea9e54c6c05/shop/shop/__init__.py -------------------------------------------------------------------------------- /shop/shop/data.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from faker import Faker 4 | from django.utils import timezone 5 | 6 | 7 | def create_initial_data(apps, schema_editor): 8 | Category = apps.get_model('shop', 'Category') 9 | Feature = apps.get_model('shop', 'Feature') 10 | Product = apps.get_model('shop', 'Product') 11 | Sale = apps.get_model('shop', 'Sale') 12 | 13 | fake = Faker() 14 | 15 | categories = [ 16 | Category(name='Mens'), 17 | Category(name='Womens'), 18 | Category(name='Kids') 19 | ] 20 | categories = Category.objects.bulk_create(categories) 21 | 22 | features = [ 23 | Feature(name='Size', value='XS'), 24 | Feature(name='Size', value='S'), 25 | Feature(name='Size', value='M'), 26 | Feature(name='Size', value='L'), 27 | Feature(name='Size', value='XL'), 28 | ] 29 | for _ in range(30): 30 | features.append( 31 | Feature( 32 | name='Colour', 33 | value=fake.color_name(), 34 | visible=fake.boolean(chance_of_getting_true=70) 35 | ) 36 | ) 37 | features = Feature.objects.bulk_create(features) 38 | 39 | products = [] 40 | for _ in range(30): 41 | products.append( 42 | Product( 43 | name=fake.company(), 44 | category=random.choice(categories), 45 | price=fake.pydecimal(left_digits=3, right_digits=2, positive=True), 46 | ) 47 | ) 48 | products = Product.objects.bulk_create(products) 49 | 50 | for product in products: 51 | product.features.add(*set(random.choices(features, k=10))) 52 | 53 | sales = [] 54 | for _ in range(1000): 55 | sales.append( 56 | Sale( 57 | product=random.choice(products), 58 | sale_date=fake.date_time_this_year(before_now=True, after_now=False, tzinfo=timezone.utc) 59 | ) 60 | ) 61 | Sale.objects.bulk_create(sales) 62 | -------------------------------------------------------------------------------- /shop/shop/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.1 on 2017-05-29 12:09 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | from django.utils import timezone 7 | import django.db.models.deletion 8 | 9 | from shop.data import create_initial_data 10 | 11 | 12 | class Migration(migrations.Migration): 13 | 14 | initial = True 15 | 16 | dependencies = [ 17 | ] 18 | 19 | operations = [ 20 | migrations.CreateModel( 21 | name='Category', 22 | fields=[ 23 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 24 | ('name', models.CharField(max_length=32)), 25 | ], 26 | ), 27 | migrations.CreateModel( 28 | name='Feature', 29 | fields=[ 30 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 31 | ('name', models.CharField(max_length=32)), 32 | ('value', models.CharField(max_length=32)), 33 | ('visible', models.BooleanField(default=True)), 34 | ], 35 | options={ 36 | 'ordering': ['name'], 37 | }, 38 | ), 39 | migrations.CreateModel( 40 | name='Product', 41 | fields=[ 42 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 43 | ('name', models.CharField(max_length=32)), 44 | ('price', models.DecimalField(decimal_places=2, max_digits=6)), 45 | ('category', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='shop.Category')), 46 | ('features', models.ManyToManyField(to='shop.Feature')), 47 | ], 48 | ), 49 | migrations.CreateModel( 50 | name='Sale', 51 | fields=[ 52 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 53 | ('sale_date', models.DateTimeField(default=timezone.now)), 54 | ('product', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='shop.Product')), 55 | ], 56 | ), 57 | migrations.RunPython(create_initial_data, migrations.RunPython.noop) 58 | ] 59 | -------------------------------------------------------------------------------- /shop/shop/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jarshwah/optimising-django-queries/5c984c419236fe73f868f2517a9f9ea9e54c6c05/shop/shop/migrations/__init__.py -------------------------------------------------------------------------------- /shop/shop/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.utils.functional import cached_property as buffered_property 3 | from django.utils import timezone 4 | 5 | 6 | class Category(models.Model): 7 | name = models.CharField(max_length=32) 8 | 9 | def __str__(self): 10 | return self.name 11 | 12 | 13 | class Feature(models.Model): 14 | name = models.CharField(max_length=32) 15 | value = models.CharField(max_length=32) 16 | visible = models.BooleanField(default=True) 17 | 18 | class Meta: 19 | ordering = ['name'] 20 | 21 | def __str__(self): 22 | return f'{self.name} = {self.value}' 23 | 24 | 25 | class Product(models.Model): 26 | name = models.CharField(max_length=32) 27 | category = models.ForeignKey(Category) 28 | features = models.ManyToManyField(Feature) 29 | price = models.DecimalField(max_digits=6, decimal_places=2) 30 | 31 | def __str__(self): 32 | return self.name 33 | 34 | @buffered_property 35 | def all_features(self): 36 | return list(self.features.all()) 37 | 38 | @property 39 | def visible_features_python(self): 40 | return [feature for feature in self.all_features if feature.visible] 41 | 42 | @property 43 | def invisible_features_python(self): 44 | return [feature for feature in self.all_features if not feature.visible] 45 | 46 | @property 47 | def visible_features_database(self): 48 | return self.features.filter(visible=True) 49 | 50 | @property 51 | def invisible_features_database(self): 52 | return self.features.filter(visible=False) 53 | 54 | 55 | class Sale(models.Model): 56 | product = models.ForeignKey(Product) 57 | sale_date = models.DateTimeField(default=timezone.now) 58 | -------------------------------------------------------------------------------- /shop/shop/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for shop project Django 1.11.1. 3 | """ 4 | 5 | import os 6 | 7 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 8 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 9 | 10 | SECRET_KEY = '87hai)pey)ay+z=^@5-oi3fv4gjo99ns2la9tttaqod@ijv!x1' 11 | DEBUG = True 12 | INSTALLED_APPS = [ 13 | 'django.contrib.admin', 14 | 'django.contrib.auth', 15 | 'django.contrib.contenttypes', 16 | 'django_extensions', 17 | 'shop', 18 | ] 19 | ROOT_URLCONF = 'shop.urls' 20 | 21 | DATABASES = { 22 | 'default': { 23 | 'ENGINE': 'django.db.backends.postgresql', 24 | 'NAME': 'postgres', 25 | 'USER': 'shop', 26 | 'PASSWORD': 'shop', 27 | 'HOST': '192.168.99.100', # docker-machine IP 28 | 'PORT': '25432' 29 | } 30 | } 31 | 32 | TIME_ZONE = 'UTC' 33 | USE_TZ = True 34 | 35 | LOGGING = { 36 | 'version': 1, 37 | 'filters': { 38 | 'require_debug_true': { 39 | '()': 'django.utils.log.RequireDebugTrue', 40 | } 41 | }, 42 | 'handlers': { 43 | 'console': { 44 | 'level': 'DEBUG', 45 | 'filters': ['require_debug_true'], 46 | 'class': 'logging.StreamHandler', 47 | } 48 | }, 49 | 'loggers': { 50 | 'django.db.backends': { 51 | 'level': 'DEBUG', 52 | 'handlers': ['console'], 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /shop/shop/urls.py: -------------------------------------------------------------------------------- 1 | """shop URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.11/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url 17 | from django.contrib import admin 18 | 19 | urlpatterns = [ 20 | url(r'^admin/', admin.site.urls), 21 | ] 22 | -------------------------------------------------------------------------------- /shop/shop/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for shop project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "shop.settings") 15 | 16 | application = get_wsgi_application() 17 | --------------------------------------------------------------------------------