├── .gitignore ├── Dockerfile ├── README.md ├── db.sqlite3 ├── docker-compose.yml ├── download ├── __init__.py ├── admin.py ├── apps.py ├── migrations │ └── __init__.py ├── models.py ├── tests.py └── views.py ├── manage.py ├── nginx ├── Dockerfile ├── conf.d │ └── default.conf └── nginx.conf ├── requirements.txt └── streaming ├── __init__.py ├── settings.py ├── urls.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | .idea 92 | 93 | tags 94 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3-onbuild 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # How does Django's `StreamingHttpResponse` work, exactly? 2 | 3 | This repository exists to explain just what goes on when you use Django's 4 | `StreamingHttpResponse`. 5 | 6 | I will discuss what happens in your Django application, what happens at the 7 | Python Web Server Gateway Interface (WSGI) layer, and look at some examples. 8 | 9 | ## How to use this repository 10 | 11 | Just read this document (README.md). 12 | 13 | If you want to experiment with running `curl` requests against a streaming vs. 14 | non-streaming Django view, follow the next section, "Running the 15 | `streaming_django` project," to install the included example Django project. 16 | 17 | ### Running the `streaming_django` project 18 | 19 | First, [install docker](https://www.docker.com/), including `docker-compose`, 20 | and then get a machine started. 21 | 22 | When you have a Docker machine running, do the following: 23 | 24 | $ git clone git@github.com:abrookins/streaming_django.git 25 | $ cd streaming_django 26 | $ docker-compose build 27 | $ docker-compose up 28 | 29 | Now you're ready to make a request: 30 | 31 | $ curl -vv --raw "http://192.168.99.100/download_csv_streaming" 32 | 33 | Or: 34 | 35 | $ curl -vv --raw "http://192.168.99.100/download_csv" 36 | 37 | **Pro tip**: The `--raw` flag is important if you want to see that a response 38 | is actually streaming. Without it, you won't see much difference between a 39 | streaming and non-streaming response. 40 | 41 | ## So, what even is a `StreamingHttpResponse`? 42 | 43 | Most Django responses use `HttpResponse`. At a high level, this means that the 44 | body of the response is built in memory and sent to the HTTP client in a single 45 | piece. 46 | 47 | Here's a short example of using `HttpResponse`: 48 | 49 | ```python 50 | def my_view(request): 51 | message = 'Hello, there!' 52 | response = HttpResponse(message) 53 | response['Content-Length'] = len(message) 54 | 55 | return response 56 | ``` 57 | 58 | A `StreamingHttpResponse`, on the other hand, is a response whose body is sent 59 | to the client in multiple pieces, or "chunks." 60 | 61 | Here's a short example of using `StreamingHttpResponse`: 62 | 63 | ```python 64 | def hello(): 65 | yield 'Hello,' 66 | yield 'there!' 67 | 68 | def my_view(request): 69 | # NOTE: No Content-Length header! 70 | return StreamingHttpResponse(hello) 71 | ``` 72 | 73 | You can read more about how to use these two classes in [Django's 74 | documentation](https://docs.djangoproject.com/en/1.9/ref/request-response/). 75 | The interesting part is what happens next -- *after* you return the response. 76 | 77 | ## When would you use a `StreamingHttpResponse`? 78 | 79 | But before we talk about what happens *after* you return the response, let us 80 | digress for a moment: why would you even use a `StreamingHttpResponse`? 81 | 82 | One of the best use cases for streaming responses is to send large files, e.g. 83 | a large CSV file. 84 | 85 | With an `HttpResponse`, you would typically load the entire file into memory 86 | (produced dynamically or not) and then send it to the client. For a large file, 87 | this costs memory on the server and "time to first byte" (TTFB) sent to the 88 | client. 89 | 90 | With a `StreamingHttpResponse`, you can load parts of the file into memory, or 91 | produce parts of the file dynamically, and begin sending these parts to the 92 | client immediately. **Crucially,** there is no need to load the entire file 93 | into memory. 94 | 95 | ## A quick note about WSGI 96 | 97 | Now we're approaching the part of our journey that lies just beyond most Django 98 | developers' everyday experience of working with Django's response classes. 99 | 100 | Yes, we're about to discuss the [Python Web Server Gateway Interface (WSGI) 101 | specification](https://www.python.org/dev/peps/pep-3333/). 102 | 103 | So, a quick note if you aren't familiar with WSGI. WSGI is a specification that 104 | proposes rules that web frameworks and web servers should follow in order that 105 | you, the framework user, can swap out one WSGI server (like uWSGI) for another 106 | (Gunicorn) and expect your Python web application to continue to function. 107 | 108 | ## Django and WSGI 109 | 110 | And now, back to our journey into deeper knowledge! 111 | 112 | So, what happens after your Django view returns a `StreamingHttpResponse`? In 113 | most Python web applications, the response is passed off to a WSGI server like 114 | uWSGI or Gunicorn (AKA, Green Unicorn). 115 | 116 | As with `HttpResponse`, Django ensures that `StreamingHttpResponse` conforms to 117 | the WSGI spec, which states this: 118 | 119 | > When called by the server, the application object must return an iterable 120 | > yielding zero or more bytestrings. This can be accomplished in a variety of 121 | > ways, such as by returning a list of bytestrings, or by the application being a 122 | > generator function that yields bytestrings, or by the application being a class 123 | > whose instances are iterable. 124 | 125 | Here's how `StreamingHttpResponse` satisfies these requirements ([full 126 | source](https://docs.djangoproject.com/en/1.9/_modules/django/http/response/#StreamingHttpResponse)): 127 | 128 | ```python 129 | @property 130 | def streaming_content(self): 131 | return map(self.make_bytes, self._iterator) 132 | # ... 133 | 134 | def __iter__(self): 135 | return self.streaming_content 136 | ``` 137 | 138 | You give the class a generator and it coerces the values that it produces into 139 | bytestrings. 140 | 141 | Compare that with the approach in `HttpResponse` ([full source](https://docs.djangoproject.com/en/1.9/_modules/django/http/response/#HttpResponse)): 142 | 143 | ```python 144 | @content.setter 145 | def content(self, value): 146 | # ... 147 | self._container = [value] 148 | 149 | def __iter__(self): 150 | return iter(self._container) 151 | ``` 152 | 153 | Ah ha! An iterator with a single item. Very interesting. Now, let's take a look 154 | at what a WSGI server does with these two different responses. 155 | 156 | ## The WSGI server 157 | 158 | Gunicorn's synchronous worker offers a good example of what happens after 159 | Django returns a response object. The code is [relatively 160 | short](https://github.com/benoitc/gunicorn/blob/39f62ac66beaf83ceccefbfabd5e3af7735d2aff/gunicorn/workers/sync.py#L176-L183) 161 | -- here's the important part (for our purposes): 162 | 163 | ```python 164 | respiter = self.wsgi(environ, resp.start_response) 165 | try: 166 | if isinstance(respiter, environ['wsgi.file_wrapper']): 167 | resp.write_file(respiter) 168 | else: 169 | for item in respiter: 170 | resp.write(item) 171 | resp.close() 172 | ``` 173 | 174 | Whether your response is streaming or not, Gunicorn iterates over it and writes 175 | each string the response yields. If that's the case, then what makes your 176 | streaming response actually "stream"? 177 | 178 | First, some conditions must be true: 179 | 180 | * The client must be speaking HTTP/1.1 or newer 181 | * The request method wasn't a HEAD 182 | * The response does not include a Content-Length header 183 | * The response status wasn't 204 or 304 184 | 185 | If these conditions are true, then Gunicorn will add a `Transfer-Encoding: 186 | chunked` header to the response, signaling to the client that the response will 187 | stream in chunks. 188 | 189 | In fact, Gunicorn will respond with `Transfer-Encoding: chunked` even if you 190 | used an `HttpResponse`, if those conditions are true! 191 | 192 | To really stream a response, that is, to send it to the client in pieces, the 193 | conditions must be true, *and* your response needs to be an iterable with 194 | multiple items. 195 | 196 | ### What does the client get? 197 | 198 | If the streaming response worked, the client should get an HTTP 1.1 response 199 | with the `Transfer-Encoding: chunked` header, and instead of a single piece of 200 | content with a `Content-Length`, the client should see each bytestring that 201 | your generator/iterator yielded, sent with the length of that chunk. 202 | 203 | Here is an example that uses the code in this repository: 204 | 205 | ``` 206 | (streaming_django) ❯ curl -vv --raw "http://192.168.99.100/download_csv_streaming" 207 | * Trying 192.168.99.100... 208 | * Connected to 192.168.99.100 (192.168.99.100) port 80 (#0) 209 | > GET /download_csv_streaming HTTP/1.1 210 | > Host: 192.168.99.100 211 | > User-Agent: curl/7.43.0 212 | > Accept: */* 213 | > 214 | < HTTP/1.1 200 OK 215 | < Server: nginx/1.11.1 216 | < Date: Fri, 29 Jul 2016 14:27:58 GMT 217 | < Content-Type: text/csv 218 | < Transfer-Encoding: chunked 219 | < Connection: keep-alive 220 | < X-Frame-Options: SAMEORIGIN 221 | < Content-Disposition: attachment; filename=big.csv 222 | < 223 | f 224 | One,Two,Three 225 | 226 | f 227 | Hello,world,1 228 | 229 | ... 230 | 231 | 10 232 | Hello,world,99 233 | 234 | 0 235 | 236 | * Connection #0 to host 192.168.99.100 left intact 237 | ``` 238 | 239 | So there you have it. We journeyed from considering when to use 240 | `StreamingHttpResponse` over `HttpResponse`, to an example of using the class 241 | in your Django project, then into the dungeons of WSGI and WSGI servers, and 242 | finally to the client's experience. And we managed to stream a response -- go 243 | us! 244 | -------------------------------------------------------------------------------- /db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abrookins/streaming_django/0a3b2ff19089cb9712e72cd8ac4c33e00b5e8b28/db.sqlite3 -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | web: 2 | restart: always 3 | build: . 4 | expose: 5 | - "8000" 6 | command: /usr/local/bin/gunicorn streaming.wsgi:application -w 2 -b :8000 7 | 8 | nginx: 9 | restart: always 10 | build: ./nginx/ 11 | ports: 12 | - "80:80" 13 | links: 14 | - web:web 15 | volumes: 16 | - ./nginx/conf.d:/etc/nginx/conf.d 17 | -------------------------------------------------------------------------------- /download/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abrookins/streaming_django/0a3b2ff19089cb9712e72cd8ac4c33e00b5e8b28/download/__init__.py -------------------------------------------------------------------------------- /download/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /download/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class DownloadConfig(AppConfig): 5 | name = 'download' 6 | -------------------------------------------------------------------------------- /download/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abrookins/streaming_django/0a3b2ff19089cb9712e72cd8ac4c33e00b5e8b28/download/migrations/__init__.py -------------------------------------------------------------------------------- /download/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /download/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /download/views.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from io import StringIO 3 | 4 | from django.http import StreamingHttpResponse, HttpResponse 5 | 6 | 7 | def big_csv(num_rows): 8 | for row in range(num_rows): 9 | output = StringIO() 10 | writer = csv.writer(output) 11 | 12 | if row == 0: 13 | writer.writerow(['One', 'Two', 'Three']) 14 | else: 15 | writer.writerow(['Hello', 'world', row]) 16 | 17 | output.seek(0) 18 | yield output.read() 19 | 20 | 21 | def download_csv(request): 22 | """Return a CSV file. 23 | 24 | This view reponds with the entire content of the CSV file in a single piece. 25 | """ 26 | csv_file = ''.join(big_csv(100)) 27 | response = HttpResponse(csv_file, content_type='text/csv') 28 | response['Content-Disposition'] = 'attachment; filename=big.csv' 29 | response['Content-Length'] = len(csv_file) 30 | 31 | return response 32 | 33 | 34 | def download_csv_streaming(request): 35 | """Return a CSV file. 36 | 37 | This view responds with a generator that yields each row of the response as 38 | it's created. 39 | """ 40 | response = StreamingHttpResponse(big_csv(100), content_type='text/csv') 41 | response['Content-Disposition'] = 'attachment; filename=big.csv' 42 | 43 | return response 44 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "streaming.settings") 7 | 8 | from django.core.management import execute_from_command_line 9 | 10 | execute_from_command_line(sys.argv) 11 | -------------------------------------------------------------------------------- /nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx 2 | -------------------------------------------------------------------------------- /nginx/conf.d/default.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | server_name localhost; 4 | charset utf-8; 5 | 6 | location / { 7 | proxy_pass http://web:8000; 8 | proxy_set_header Host $host; 9 | proxy_set_header X-Real-IP $remote_addr; 10 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 11 | # Allow chunked responses. 12 | proxy_buffering off; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /nginx/nginx.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abrookins/streaming_django/0a3b2ff19089cb9712e72cd8ac4c33e00b5e8b28/nginx/nginx.conf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Django==1.9.8 2 | gunicorn==19.6.0 -------------------------------------------------------------------------------- /streaming/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abrookins/streaming_django/0a3b2ff19089cb9712e72cd8ac4c33e00b5e8b28/streaming/__init__.py -------------------------------------------------------------------------------- /streaming/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for streaming project. 3 | 4 | Generated by 'django-admin startproject' using Django 1.8.3. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.8/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/1.8/ref/settings/ 11 | """ 12 | 13 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 14 | import os 15 | 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/1.8/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | SECRET_KEY = 'sn^7o*=uk6ua4n)=l!!*^w-b@=#p_n=yleza1i+6vxi!3&g6lu' 24 | 25 | # SECURITY WARNING: don't run with debug turned on in production! 26 | DEBUG = True 27 | 28 | ALLOWED_HOSTS = [] 29 | 30 | 31 | # Application definition 32 | 33 | INSTALLED_APPS = ( 34 | 'django.contrib.admin', 35 | 'django.contrib.auth', 36 | 'django.contrib.contenttypes', 37 | 'django.contrib.sessions', 38 | 'django.contrib.messages', 39 | 'django.contrib.staticfiles', 40 | ) 41 | 42 | MIDDLEWARE_CLASSES = ( 43 | 'django.contrib.sessions.middleware.SessionMiddleware', 44 | 'django.middleware.common.CommonMiddleware', 45 | 'django.middleware.csrf.CsrfViewMiddleware', 46 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 47 | 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', 48 | 'django.contrib.messages.middleware.MessageMiddleware', 49 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 50 | 'django.middleware.security.SecurityMiddleware', 51 | ) 52 | 53 | ROOT_URLCONF = 'streaming.urls' 54 | 55 | TEMPLATES = [ 56 | { 57 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 58 | 'DIRS': [], 59 | 'APP_DIRS': True, 60 | 'OPTIONS': { 61 | 'context_processors': [ 62 | 'django.template.context_processors.debug', 63 | 'django.template.context_processors.request', 64 | 'django.contrib.auth.context_processors.auth', 65 | 'django.contrib.messages.context_processors.messages', 66 | ], 67 | }, 68 | }, 69 | ] 70 | 71 | WSGI_APPLICATION = 'streaming.wsgi.application' 72 | 73 | 74 | # Database 75 | # https://docs.djangoproject.com/en/1.8/ref/settings/#databases 76 | 77 | DATABASES = { 78 | 'default': { 79 | 'ENGINE': 'django.db.backends.sqlite3', 80 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 81 | } 82 | } 83 | 84 | 85 | # Internationalization 86 | # https://docs.djangoproject.com/en/1.8/topics/i18n/ 87 | 88 | LANGUAGE_CODE = 'en-us' 89 | 90 | TIME_ZONE = 'UTC' 91 | 92 | USE_I18N = True 93 | 94 | USE_L10N = True 95 | 96 | USE_TZ = True 97 | 98 | 99 | # Static files (CSS, JavaScript, Images) 100 | # https://docs.djangoproject.com/en/1.8/howto/static-files/ 101 | 102 | STATIC_URL = '/static/' 103 | -------------------------------------------------------------------------------- /streaming/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import include, url 2 | from django.contrib import admin 3 | 4 | from download import views 5 | 6 | urlpatterns = [ 7 | url(r'^admin/', include(admin.site.urls)), 8 | url('^download_csv_streaming$', views.download_csv_streaming, name='download_csv_streaming'), 9 | url('^download_csv$', views.download_csv, name='download_csv') 10 | ] 11 | -------------------------------------------------------------------------------- /streaming/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for streaming project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "streaming.settings") 15 | 16 | application = get_wsgi_application() 17 | --------------------------------------------------------------------------------