├── MANIFEST.in ├── django_hashedfilenamestorage ├── models.py ├── __init__.py ├── test_settings.py ├── storage.py └── tests.py ├── .gitignore ├── pytest.ini ├── .travis.yml ├── tox.ini ├── CHANGELOG.rst ├── setup.py ├── LICENSE └── README.rst /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /django_hashedfilenamestorage/models.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /django_hashedfilenamestorage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | 4 | MANIFEST 5 | 6 | .cache/ 7 | .tox/ 8 | dist/ 9 | django_hashedfilenamestorage.egg-info/* 10 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | django_find_project = false 3 | DJANGO_SETTINGS_MODULE = django_hashedfilenamestorage.test_settings 4 | python_files = tests.py 5 | python_paths = django_hashedfilenamestorage -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "2.7" 5 | - "3.5" 6 | - "3.6" 7 | - "3.7" 8 | 9 | install: 10 | - pip install tox-travis 11 | 12 | script: 13 | - tox 14 | 15 | dist: xenial -------------------------------------------------------------------------------- /django_hashedfilenamestorage/test_settings.py: -------------------------------------------------------------------------------- 1 | SECRET_KEY = "test-secret" 2 | 3 | DATABASES = { 4 | 'default': { 5 | 'ENGINE': 'django.db.backends.sqlite3', 6 | 'NAME': 'test', 7 | } 8 | } 9 | 10 | INSTALLED_APPS = [ 11 | 'django_hashedfilenamestorage', 12 | ] 13 | 14 | DEFAULT_FILE_STORAGE = 'django_hashedfilenamestorage.storage.HashedFilenameFileSystemStorage' 15 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | setenv= 3 | PYTHONPATH = {toxinidir}:{env:PYTHONPATH:} 4 | 5 | envlist = 6 | py37-django{2.1,2.0,1.11,1.10,1.9,1.8} 7 | py36-django{2.1,2.0,1.11,1.10,1.9,1.8} 8 | py35-django{2.1,2.0,1.11,1.10,1.9,1.8} 9 | py27-django{1.11,1.10,1.9,1.8} 10 | 11 | [testenv] 12 | commands=pytest django_hashedfilenamestorage/tests.py 13 | deps = 14 | pytest 15 | pytest-django 16 | pytest-pythonpath 17 | django2.1: Django>=2.1,<2.2 18 | django2.0: Django>=2.0.10,<2.1 19 | django1.11: Django>=1.11,<2.0 20 | django1.10: Django>=1.10,<1.11 21 | django1.9: Django>=1.9,<1.10 22 | django1.8: Django>=1.8,<1.9 23 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Release Notes 3 | ============= 4 | 5 | 2.4.1 6 | ----- 7 | * Bump Django dependency requirement again 8 | 9 | 2.4 10 | ----- 11 | * Bump Django dependency requirement to avoid vulnerable Django versions 12 | 13 | 2.3 14 | ----- 15 | 16 | * Support Django 2.1+ 17 | * Remove duplicate hash calculation 18 | * Always lower filename extensions 19 | 20 | 21 | 2.2 22 | ----- 23 | * get_available_filename() now returns the default filename instead of raising 24 | an exception, for compatibility with other packages (overridden by the hashed 25 | filename on save) 26 | 27 | 2.1 28 | ----- 29 | * Fix bug for bytes content 30 | * Update setup.py and tox to support Django 2.0 for python version >= 3.4 31 | 32 | 2.0.1 33 | ----- 34 | * Specify django <2.0 in setup.py 35 | 36 | 37 | 2.0 38 | ----- 39 | 40 | * Add support for Python 3.5 and 3.6 41 | * Drop support for Python 2.6 42 | * Drop support for Django <1.8 43 | * Use pytest and tox for testing 44 | * Set up CI with travis 45 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | 4 | from setuptools import setup 5 | 6 | 7 | if sys.version_info < (3, 4): 8 | install_requires = ['Django>=1.8,<2.0'] 9 | else: 10 | install_requires = ['Django>=1.8,!=2.0,!=2.0.1,!=2.0.2,!=2.0.3,!=2.0.4,!=2.0.5,!=2.0.6,!=2.0.7,!=2.0.8,!=2.0.9'] 11 | 12 | 13 | setup( 14 | name='django-hashedfilenamestorage', 15 | version='2.4.1', 16 | description=('A Django storage backend that names files by hash value.'), 17 | long_description=open('README.rst', 'r').read(), 18 | author='Ecometrica', 19 | author_email='info@ecometrica.com', 20 | url='http://github.com/ecometrica/django-hashedfilenamestorage/', 21 | packages=['django_hashedfilenamestorage'], 22 | install_requires=install_requires, 23 | classifiers=[ 24 | 'Development Status :: 5 - Production/Stable', 25 | 'Environment :: Web Environment', 26 | 'Framework :: Django', 27 | 'Intended Audience :: Developers', 28 | 'License :: OSI Approved :: MIT License', 29 | 'Operating System :: OS Independent', 30 | 'Programming Language :: Python', 31 | 'Programming Language :: Python :: 2.7', 32 | 'Programming Language :: Python :: 3.5', 33 | 'Programming Language :: Python :: 3.6', 34 | 'Programming Language :: Python :: 3.7', 35 | 'Topic :: Software Development :: Libraries', 36 | ], 37 | zip_safe=True, 38 | tests_require=['pytest', 'tox'] 39 | ) 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Ecometrica 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://travis-ci.org/ecometrica/django-hashedfilenamestorage.svg?branch=master 2 | :target: https://travis-ci.org/ecometrica/django-hashedfilenamestorage 3 | 4 | ``django-hashedfilenamestorage`` 5 | ================================ 6 | 7 | A Django storage backend that names files by hash value. 8 | 9 | By default, ``django.core.files.storage.FileSystemStorage`` deals with 10 | conflicting filenames by appending an underscore and a random 7 11 | character alphanumeric string to the file. For 12 | instance, if you try to create ``hello.txt`` when it already exists, 13 | it will rename it as e.g. ``hello_a12mkj3.txt``. 14 | 15 | ``django-hashedfilenamestorage`` creates hashed filenames, so if you 16 | try to create ``hello.txt`` with the content ``Hello world!``, it will 17 | save it as ``d3486ae9136e7856bc42212385ea797094475802.txt``. Directory 18 | names and extensions are preserved, only the root filename is 19 | changed. This reduces the number of duplicates stored in the 20 | underlying backend, and implies that these files can be served from a 21 | static cache that never expires. 22 | 23 | Installing 24 | ---------- 25 | 26 | The easiest way to install ``django-hashedfilenamestorage`` is to use 27 | **pip**:: 28 | 29 | pip install django-hashedfilenamestorage 30 | 31 | 32 | Quick Start 33 | ----------- 34 | 35 | In your Django ``settings`` file: 36 | 37 | * Set ``DEFAULT_FILE_STORAGE`` to 38 | ``'django_hashedfilenamestorage.storage.HashedFilenameFileSystemStorage'`` 39 | 40 | This gives you hashed filenames, backed on Django's 41 | ``FileSystemStorage`` storage class. 42 | 43 | 44 | ``HashedFilenameMetaStorage`` 45 | ----------------------------- 46 | 47 | You can define a new underlying storage class by using 48 | ``HashedFilenameMetaStorage`` to wrap it:: 49 | 50 | from django.core.files.storage import get_storage_class 51 | 52 | from django_hashedfilenamestorage.storage import HashedFilenameMetaStorage 53 | 54 | HashedFilenameMyStorage = HashedFilenameMetaStorage( 55 | storage_class=get_storage_class('myapp.storage.MyStorage'), 56 | ) 57 | 58 | 59 | Hashing algorithm 60 | ----------------- 61 | 62 | HashedFilenameMetaStorage is meant to generate duplicate filenames for 63 | files with identical contents. To do this, it reads the contents of 64 | the file and generates a SHA-1 hash of them. 65 | 66 | Filenames have their extensions preserved, so it is possible to have 67 | duplicate contents on the filesystem, but it is important to help 68 | serve files with their proper content types. 69 | -------------------------------------------------------------------------------- /django_hashedfilenamestorage/storage.py: -------------------------------------------------------------------------------- 1 | from errno import EEXIST 2 | import hashlib 3 | import os 4 | 5 | from django.core.files import File 6 | from django.core.files.storage import FileSystemStorage 7 | from django.utils.encoding import force_text 8 | from django.core.exceptions import ImproperlyConfigured 9 | 10 | 11 | def HashedFilenameMetaStorage(storage_class): 12 | class HashedFilenameStorage(storage_class): 13 | def __init__(self, *args, **kwargs): 14 | # Try to tell storage_class not to uniquify filenames. 15 | # This class will be the one that uniquifies. 16 | try: 17 | new_kwargs = dict(kwargs, uniquify_names=False) 18 | super(HashedFilenameStorage, self).__init__(*args, 19 | **new_kwargs) 20 | except (TypeError, ImproperlyConfigured): 21 | super(HashedFilenameStorage, self).__init__(*args, **kwargs) 22 | 23 | def _get_content_name(self, name, content, chunk_size=None): 24 | dir_name, file_name = os.path.split(name) 25 | file_ext = os.path.splitext(file_name)[1].lower() 26 | file_root = self._compute_hash(content=content, 27 | chunk_size=chunk_size) 28 | # file_ext includes the dot. 29 | return os.path.join(dir_name, file_root + file_ext) 30 | 31 | def _compute_hash(self, content, chunk_size=None): 32 | if chunk_size is None: 33 | chunk_size = getattr(content, 'DEFAULT_CHUNK_SIZE', 34 | File.DEFAULT_CHUNK_SIZE) 35 | 36 | hasher = hashlib.sha1() 37 | 38 | cursor = content.tell() 39 | content.seek(0) 40 | try: 41 | while True: 42 | data = content.read(chunk_size) 43 | if not data: 44 | break 45 | if not isinstance(data, bytes): 46 | data = data.encode('utf-8') 47 | hasher.update(data) 48 | return hasher.hexdigest() 49 | finally: 50 | content.seek(cursor) 51 | 52 | def save(self, name, content, max_length=None): 53 | # Get the proper name for the file, as it will actually be saved. 54 | if name is None: 55 | name = content.name 56 | 57 | name = self._save(name, content) 58 | 59 | # Store filenames with forward slashes, even on Windows 60 | return force_text(name.replace('\\', '/')) 61 | 62 | def _save(self, name, content, *args, **kwargs): 63 | name = self._get_content_name(name=name, content=content) 64 | if self.exists(name): 65 | # File already exists, so we can safely do nothing 66 | # because their contents match. 67 | return name 68 | 69 | try: 70 | return super(HashedFilenameStorage, self)._save( 71 | name, content, *args, **kwargs 72 | ) 73 | except OSError as e: 74 | if e.errno == EEXIST: 75 | # We have a safe storage layer and file exists. 76 | pass 77 | else: 78 | raise 79 | return name 80 | 81 | HashedFilenameStorage.__name__ = 'HashedFilename' + storage_class.__name__ 82 | return HashedFilenameStorage 83 | 84 | 85 | HashedFilenameFileSystemStorage = HashedFilenameMetaStorage( 86 | storage_class=FileSystemStorage, 87 | ) 88 | -------------------------------------------------------------------------------- /django_hashedfilenamestorage/tests.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | 3 | import os 4 | import shutil 5 | import warnings 6 | 7 | try: 8 | from warnings import catch_warnings 9 | except ImportError: 10 | def catch_warnings(): 11 | original_filters = warnings.filters 12 | try: 13 | yield 14 | finally: 15 | warnings.filters = original_filters 16 | 17 | from django.conf import settings 18 | from django.core.files.base import ContentFile 19 | from django.test import TestCase 20 | from django.utils.functional import LazyObject 21 | 22 | from django_hashedfilenamestorage.storage import ( 23 | HashedFilenameMetaStorage, HashedFilenameFileSystemStorage, 24 | ) 25 | 26 | 27 | class StubStorage(object): 28 | def __init__(self): 29 | pass 30 | 31 | 32 | class StubSafeStorage(StubStorage): 33 | def __init__(self, uniquify_names=False, *args, **kwargs): 34 | # Support uniquify_names as an argument 35 | super(StubSafeStorage, self).__init__(*args, **kwargs) 36 | 37 | 38 | def stub_random_string(*args, **kwargs): 39 | stub_random_string.count += 1 40 | return str(stub_random_string.count) 41 | 42 | 43 | class HashedFilenameTestCase(TestCase): 44 | CONTENT = 'Hello world!' 45 | BCONTENT = b'Hello world!' 46 | SHA1SUM = 'd3486ae9136e7856bc42212385ea797094475802' 47 | 48 | def test_init(self): 49 | # SafeStorage supports uniquify_names 50 | HashedFilenameMetaStorage(storage_class=StubSafeStorage)() 51 | # Normal Storage classes do not 52 | HashedFilenameMetaStorage(storage_class=StubStorage)() 53 | 54 | def test_get_available_name(self): 55 | with media_root(): 56 | storage = HashedFilenameFileSystemStorage() 57 | self.assertEqual(storage.get_available_name('foo.txt'), 'foo.txt') 58 | 59 | def test_get_content_name(self): 60 | with media_root(): 61 | storage = HashedFilenameFileSystemStorage() 62 | self.assertEqual( 63 | storage._get_content_name(name='', 64 | content=ContentFile(self.CONTENT)), 65 | '%s' % self.SHA1SUM 66 | ) 67 | self.assertEqual( 68 | storage._get_content_name(name='', 69 | content=ContentFile(self.CONTENT), 70 | chunk_size=1), 71 | '%s' % self.SHA1SUM 72 | ) 73 | self.assertEqual( 74 | storage._get_content_name(name='foo', 75 | content=ContentFile(self.CONTENT)), 76 | '%s' % self.SHA1SUM 77 | ) 78 | self.assertEqual( 79 | storage._get_content_name(name='foo.txt', 80 | content=ContentFile(self.CONTENT)), 81 | '%s.txt' % self.SHA1SUM 82 | ) 83 | self.assertEqual( 84 | storage._get_content_name(name='foo/bar', 85 | content=ContentFile(self.CONTENT)), 86 | 'foo/%s' % self.SHA1SUM 87 | ) 88 | self.assertEqual( 89 | storage._get_content_name(name='foo/bar.txt', 90 | content=ContentFile(self.CONTENT)), 91 | 'foo/%s.txt' % self.SHA1SUM 92 | ) 93 | 94 | def test_get_bytes_content_name(self): 95 | with media_root(): 96 | storage = HashedFilenameFileSystemStorage() 97 | self.assertEqual( 98 | storage._get_content_name(name='', 99 | content=ContentFile(self.BCONTENT)), 100 | '%s' % self.SHA1SUM 101 | ) 102 | self.assertEqual( 103 | storage._get_content_name(name='', 104 | content=ContentFile(self.BCONTENT), 105 | chunk_size=1), 106 | '%s' % self.SHA1SUM 107 | ) 108 | self.assertEqual( 109 | storage._get_content_name(name='foo', 110 | content=ContentFile(self.BCONTENT)), 111 | '%s' % self.SHA1SUM 112 | ) 113 | self.assertEqual( 114 | storage._get_content_name(name='foo.txt', 115 | content=ContentFile(self.BCONTENT)), 116 | '%s.txt' % self.SHA1SUM 117 | ) 118 | self.assertEqual( 119 | storage._get_content_name(name='foo/bar', 120 | content=ContentFile(self.BCONTENT)), 121 | 'foo/%s' % self.SHA1SUM 122 | ) 123 | self.assertEqual( 124 | storage._get_content_name(name='foo/bar.txt', 125 | content=ContentFile(self.BCONTENT)), 126 | 'foo/%s.txt' % self.SHA1SUM 127 | ) 128 | 129 | def test_compute_hash(self): 130 | with media_root(): 131 | storage = HashedFilenameFileSystemStorage() 132 | self.assertEqual( 133 | storage._compute_hash(content=ContentFile(self.CONTENT)), 134 | self.SHA1SUM 135 | ) 136 | self.assertEqual( 137 | storage._compute_hash(content=ContentFile(self.CONTENT), 138 | chunk_size=1), 139 | self.SHA1SUM 140 | ) 141 | self.assertEqual( 142 | storage._compute_hash(content=ContentFile(self.BCONTENT)), 143 | self.SHA1SUM 144 | ) 145 | self.assertEqual( 146 | storage._compute_hash(content=ContentFile(self.BCONTENT), 147 | chunk_size=1), 148 | self.SHA1SUM 149 | ) 150 | 151 | def test_get_available_name_overridden_on_save(self): 152 | with media_root(): 153 | storage = HashedFilenameFileSystemStorage() 154 | # get_available_name returns the filename given, but is overridden 155 | # on save 156 | self.assertEqual( 157 | storage.get_available_name('foo/bar.txt'), 'foo/bar.txt' 158 | ) 159 | name1 = storage.save('foo/bar.txt', ContentFile(self.CONTENT)) 160 | self.assertEqual(name1, 'foo/%s.txt' % self.SHA1SUM) 161 | self.assertEqual(storage.open(name1, 'rt').read(), self.CONTENT) 162 | 163 | self.assertTrue( 164 | os.path.exists( 165 | os.path.join(settings.MEDIA_ROOT, 'foo/%s.txt' % self.SHA1SUM) 166 | ) 167 | ) 168 | self.assertFalse( 169 | os.path.exists(os.path.join(settings.MEDIA_ROOT, 'foo/bar.txt')) 170 | ) 171 | 172 | def test_save(self): 173 | with media_root(): 174 | storage = HashedFilenameFileSystemStorage() 175 | name1 = storage.save('foo/bar.txt', ContentFile(self.CONTENT)) 176 | 177 | self.assertEqual(name1, 'foo/%s.txt' % self.SHA1SUM) 178 | self.assertEqual(storage.open(name1, 'rt').read(), self.CONTENT) 179 | 180 | storage.delete(name1) 181 | name2 = storage.save('foo/bar.txt', ContentFile(self.CONTENT)) 182 | self.assertEqual(name2, name1) 183 | self.assertEqual(storage.open(name2, 'rt').read(), self.CONTENT) 184 | 185 | name3 = storage.save('foo/another.txt', ContentFile(self.CONTENT)) 186 | self.assertEqual(name3, name1) 187 | self.assertEqual(storage.open(name3, 'rt').read(), self.CONTENT) 188 | 189 | def test_save_bytes_content(self): 190 | with media_root(): 191 | storage = HashedFilenameFileSystemStorage() 192 | name1 = storage.save('foo/bar.txt', ContentFile(self.BCONTENT)) 193 | self.assertEqual(name1, 'foo/%s.txt' % self.SHA1SUM) 194 | self.assertEqual(storage.open(name1, 'rb').read(), self.BCONTENT) 195 | 196 | storage.delete(name1) 197 | name2 = storage.save('foo/bar.txt', ContentFile(self.BCONTENT)) 198 | self.assertEqual(name2, name1) 199 | self.assertEqual(storage.open(name2, 'rb').read(), self.BCONTENT) 200 | 201 | name3 = storage.save('foo/another.txt', ContentFile(self.BCONTENT)) 202 | self.assertEqual(name3, name1) 203 | self.assertEqual(storage.open(name3, 'rb').read(), self.BCONTENT) 204 | 205 | 206 | @contextmanager 207 | def patch(namespace, **values): 208 | """Patches `namespace`.`name` with `value` for (name, value) in values""" 209 | originals = {} 210 | if isinstance(namespace, LazyObject): 211 | if namespace._wrapped is None: 212 | namespace._setup() 213 | namespace = namespace._wrapped 214 | for (name, value) in values.items(): 215 | try: 216 | originals[name] = getattr(namespace, name) 217 | except AttributeError: 218 | originals[name] = NotImplemented 219 | if value is NotImplemented: 220 | if originals[name] is not NotImplemented: 221 | delattr(namespace, name) 222 | else: 223 | setattr(namespace, name, value) 224 | try: 225 | yield 226 | finally: 227 | for (name, original_value) in originals.items(): 228 | if original_value is NotImplemented: 229 | if values[name] is not NotImplemented: 230 | delattr(namespace, name) 231 | else: 232 | setattr(namespace, name, original_value) 233 | 234 | 235 | @contextmanager 236 | def media_root(dirname='test_media/'): 237 | if os.path.exists(dirname): 238 | raise Exception('Cannot run tests safely, %r already exists!' % 239 | dirname) 240 | try: 241 | with patch(settings, MEDIA_ROOT=dirname): 242 | yield 243 | finally: 244 | shutil.rmtree(dirname, ignore_errors=True) 245 | --------------------------------------------------------------------------------