├── .gitignore ├── LICENSE ├── README.md ├── Screenshot.png ├── django_404_middleware ├── __init__.py ├── admin.py ├── apps.py ├── match.py ├── migrations │ ├── 0001_initial.py │ ├── 0002_ignorable404referer.py │ ├── 0003_failedurl.py │ └── __init__.py ├── models.py └── tests.py ├── requirements.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Michael Herrmann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Django 404 Middleware 2 | Django's 3 | [BrokenLinkEmailsMiddleware](https://docs.djangoproject.com/en/2.1/howto/error-reporting/#errors) 4 | can email you about broken links on your site: 5 | 6 | ``` 7 | Broken link on mysite.com 8 | ========================= 9 | Referrer: https://www.google.com 10 | Requested URL: /nonexistent 11 | User agent: Mozilla/5.0 (...) 12 | IP address: 1.2.3.4 13 | ``` 14 | 15 | The problem is, these emails often contain false positives. To tell Django to 16 | ignore particular broken links, you need to change the setting 17 | `IGNORABLE_404_URLS`. This quickly becomes tedious. 18 | 19 | This library is a drop-in replacement for `BrokenLinkEmailsMiddleware`. It sends 20 | the same emails, but is configured via the database (and Django's Admin 21 | interface) instead of a setting. This makes it much easier to mark URLs as 22 | ignorable. 23 | 24 | When a broken URL is encountered, this library adds a link to the bottom of the 25 | 404 email: 26 | 27 | ``` 28 | Broken link on mysite.com 29 | ========================= 30 | ... 31 | 32 | To ignore this link, visit mysite.com/admin/... 33 | ``` 34 | 35 | Clicking on the link opens Django's Admin interface with a pre-filled form for 36 | ignoring the 404: 37 | 38 | ![Add](Screenshot.png?raw=true "Optional Title") 39 | 40 | Just click _Save_ to never be notified of this particular false positive again. 41 | 42 | ## Installation 43 | 44 | Install this library via: 45 | 46 | pip install django-404-middleware 47 | 48 | Add it to the `INSTALLED_APPS` in your Django settings file: 49 | 50 | ``` 51 | INSTALLED_APPS = [ 52 | ..., 53 | 'django_404_middleware' 54 | ] 55 | ``` 56 | 57 | Also add it to your `MIDDLEWARE` setting. Typically, you would already have an 58 | entry `django.middleware.common.BrokenLinkEmailsMiddleware`. Replace it by the 59 | following: 60 | 61 | ``` 62 | MIDDLEWARE = [ 63 | ..., 64 | 'django_404_middleware.BrokenLinkEmailsDbMiddleware', 65 | ... 66 | ] 67 | ``` 68 | 69 | (Note that if you are using Django < 2, the setting is called 70 | `MIDDLEWARE_CLASSES`, not `MIDDLEWARE`.) 71 | 72 | The same caveat as for Django's built-in 404 middleware applies: 73 | `BrokenLinkEmailsDbMiddleware` must appear before other middleware that 74 | intercepts 404 errors. Put it towards the top of your `MIDDLEWARE` setting. 75 | 76 | Finally, apply migrations to initialise the database: 77 | 78 | python manage.py migrate 79 | 80 | ## Caveats 81 | 82 | The current implementation is not optimized for performance in any way. -------------------------------------------------------------------------------- /Screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mherrmann/django-404-middleware/7cfb7d84b6f7d36ffd1559825e97c28e7289f9d7/Screenshot.png -------------------------------------------------------------------------------- /django_404_middleware/__init__.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.core.mail import mail_managers 3 | from django.middleware.common import BrokenLinkEmailsMiddleware 4 | from django.utils.encoding import force_text 5 | from urllib.parse import urlencode 6 | 7 | try: 8 | from django.urls import reverse 9 | except ImportError: 10 | # Django < 2: 11 | from django.core.urlresolvers import reverse 12 | 13 | class BrokenLinkEmailsDbMiddleware(BrokenLinkEmailsMiddleware): 14 | 15 | def process_response(self, request, response): 16 | """ 17 | Send broken link emails for relevant 404 NOT FOUND responses. 18 | """ 19 | if response.status_code == 404 and not settings.DEBUG: 20 | domain = request.get_host() 21 | path = request.get_full_path() 22 | referer = force_text( 23 | request.META.get('HTTP_REFERER', ''), errors='replace' 24 | ) 25 | FailedUrl = _import_models()[0] 26 | failed_url = FailedUrl.objects.get_or_create(path=path)[0] 27 | failed_url.num_occurrences += 1 28 | failed_url.save() 29 | if not self.is_ignorable_request(request, path, domain, referer): 30 | _notify_managers_of_broken_link(request, path, domain, referer) 31 | return response 32 | 33 | def is_ignorable_request(self, request, uri, domain, referer): 34 | if super().is_ignorable_request(request, uri, domain, referer): 35 | return True 36 | FailedUrl, Ignorable404Url, Ignorable404Referer = _import_models() 37 | num_occurrences = FailedUrl.objects.get(path=request.get_full_path())\ 38 | .num_occurrences 39 | allowed = getattr(settings, 'NUM_ALLOWED_404s_PER_PATH', 1) 40 | if num_occurrences <= allowed: 41 | return True 42 | if any(i.matches(referer) for i in Ignorable404Referer.objects.all()): 43 | return True 44 | return any(i.matches(uri) for i in Ignorable404Url.objects.all()) 45 | 46 | def _notify_managers_of_broken_link(request, path, domain, referer): 47 | subject = _get_email_subject(domain, referer) 48 | message = _get_email_message(request, path, referer) 49 | mail_managers(subject, message, fail_silently=True) 50 | 51 | def _get_email_subject(domain, referer): 52 | result = 'Broken ' 53 | if BrokenLinkEmailsMiddleware.is_internal_request(None, domain, referer): 54 | result += 'INTERNAL ' 55 | result += 'link on ' + domain 56 | return result 57 | 58 | def _get_email_message(request, path, referer): 59 | user_agent = force_text( 60 | request.META.get('HTTP_USER_AGENT', ''), errors='replace' 61 | ) 62 | ip = request.META.get('REMOTE_ADDR', '') 63 | result = 'Referrer: %s\n' % referer 64 | result += 'Requested URL: %s\n' % path 65 | result += 'User agent: %s\n' % user_agent 66 | result += 'IP address: %s' % ip 67 | 68 | Ignorable404Url, Ignorable404Referer = _import_models()[1:] 69 | result += '\n\nTo ignore this link, visit %s.' % \ 70 | _get_admin_add_url(request, Ignorable404Url, pattern=path) 71 | result += '\n\nTo ignore all links from this referer, visit %s.' % \ 72 | _get_admin_add_url(request, Ignorable404Referer, pattern=referer) 73 | return result 74 | 75 | def _get_admin_add_url(request, model, **defaults): 76 | return request.build_absolute_uri( 77 | reverse('admin:%s_%s_add' % (__name__, model.__name__.lower())) 78 | + '?' + urlencode(defaults) 79 | ) 80 | 81 | def _import_models(): 82 | # If we import the models at module level, we get exception 83 | # AppRegistryNotReady when starting Django. So import them late, here: 84 | from .models import FailedUrl, Ignorable404Url, Ignorable404Referer 85 | return FailedUrl, Ignorable404Url, Ignorable404Referer -------------------------------------------------------------------------------- /django_404_middleware/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib.admin import ModelAdmin, register 2 | from django.contrib.admin.widgets import AdminTextInputWidget 3 | from django.db.models import TextField 4 | 5 | from .models import Ignorable404Url, Ignorable404Referer 6 | 7 | @register(Ignorable404Url) 8 | class Ignorable404UrlAdmin(ModelAdmin): 9 | formfield_overrides = { 10 | # Use a smaller text field: 11 | TextField: {'widget': AdminTextInputWidget}, 12 | } 13 | 14 | @register(Ignorable404Referer) 15 | class Ignorable404RefererAdmin(ModelAdmin): 16 | formfield_overrides = { 17 | # Use a smaller text field: 18 | TextField: {'widget': AdminTextInputWidget}, 19 | } -------------------------------------------------------------------------------- /django_404_middleware/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | class Django404MiddlewareConfig(AppConfig): 4 | name = 'django_404_middleware' -------------------------------------------------------------------------------- /django_404_middleware/match.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def match(pattern, path, exact=True, is_re=False, case_sensitive=False): 4 | """ 5 | The main motivation for keeping this function in a separate Python module, 6 | and not eg. as a method of Ignorable404Url, is so we can test it without any 7 | dependencies on Django (and in particular without having to initialize 8 | Django's settings just for a simple unit test). 9 | """ 10 | regex = pattern if is_re else re.escape(pattern) 11 | if exact: 12 | regex = '^%s/?$' % regex 13 | flags = 0 if case_sensitive else re.I 14 | return re.search(regex, path, flags) -------------------------------------------------------------------------------- /django_404_middleware/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9.5 on 2019-03-11 06:56 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | initial = True 11 | 12 | dependencies = [ 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='Ignorable404Url', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('pattern', models.TextField(help_text='URLs matching this pattern are ignored.')), 21 | ('exact', models.BooleanField(default=True, verbose_name='The full URL must match')), 22 | ('is_re', models.BooleanField(default=False, verbose_name='Is regular expression')), 23 | ('case_sensitive', models.BooleanField(default=False, verbose_name='Is case sensitive')), 24 | ], 25 | options={ 26 | 'verbose_name': 'Ignorable 404 URL', 27 | }, 28 | ), 29 | ] 30 | -------------------------------------------------------------------------------- /django_404_middleware/migrations/0002_ignorable404referer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9.5 on 2019-04-08 06:00 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('django_404_middleware', '0001_initial'), 12 | ] 13 | 14 | operations = [ 15 | migrations.CreateModel( 16 | name='Ignorable404Referer', 17 | fields=[ 18 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 19 | ('pattern', models.TextField(help_text='Referers matching this pattern are ignored.')), 20 | ('exact', models.BooleanField(default=True, verbose_name='The full referer must match')), 21 | ('is_re', models.BooleanField(default=False, verbose_name='Is regular expression')), 22 | ('case_sensitive', models.BooleanField(default=False, verbose_name='Is case sensitive')), 23 | ], 24 | options={ 25 | 'verbose_name': 'Ignorable 404 Referer', 26 | }, 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /django_404_middleware/migrations/0003_failedurl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9.5 on 2020-03-25 05:39 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('django_404_middleware', '0002_ignorable404referer'), 12 | ] 13 | 14 | operations = [ 15 | migrations.CreateModel( 16 | name='FailedUrl', 17 | fields=[ 18 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 19 | ('path', models.TextField(unique=True)), 20 | ('num_occurrences', models.PositiveIntegerField(default=0)), 21 | ], 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /django_404_middleware/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mherrmann/django-404-middleware/7cfb7d84b6f7d36ffd1559825e97c28e7289f9d7/django_404_middleware/migrations/__init__.py -------------------------------------------------------------------------------- /django_404_middleware/models.py: -------------------------------------------------------------------------------- 1 | from django.db.models import Model, TextField, BooleanField, PositiveIntegerField 2 | from django_404_middleware.match import match 3 | 4 | class FailedUrl(Model): 5 | 6 | path = TextField(unique=True) 7 | num_occurrences = PositiveIntegerField(default=0) 8 | 9 | class Ignorable404Url(Model): 10 | 11 | class Meta: 12 | verbose_name = 'Ignorable 404 URL' 13 | 14 | pattern = TextField(help_text='URLs matching this pattern are ignored.') 15 | exact = BooleanField( 16 | verbose_name='The full URL must match', blank=False, default=True 17 | ) 18 | is_re = BooleanField( 19 | verbose_name='Is regular expression', blank=False, default=False 20 | ) 21 | case_sensitive = BooleanField( 22 | verbose_name='Is case sensitive', blank=False, default=False 23 | ) 24 | 25 | def matches(self, path): 26 | return bool(match( 27 | self.pattern, path, self.exact, self.is_re, self.case_sensitive 28 | )) 29 | 30 | def __str__(self): 31 | return self.pattern 32 | 33 | class Ignorable404Referer(Model): 34 | 35 | class Meta: 36 | verbose_name = 'Ignorable 404 Referer' 37 | 38 | pattern = TextField(help_text='Referers matching this pattern are ignored.') 39 | exact = BooleanField( 40 | verbose_name='The full referer must match', blank=False, default=True 41 | ) 42 | is_re = BooleanField( 43 | verbose_name='Is regular expression', blank=False, default=False 44 | ) 45 | case_sensitive = BooleanField( 46 | verbose_name='Is case sensitive', blank=False, default=False 47 | ) 48 | 49 | def matches(self, path): 50 | return bool(match( 51 | self.pattern, path, self.exact, self.is_re, self.case_sensitive 52 | )) 53 | 54 | def __str__(self): 55 | return self.pattern -------------------------------------------------------------------------------- /django_404_middleware/tests.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from .match import match 3 | 4 | class MatchTest(TestCase): 5 | def test_exact(self): 6 | self.assertTrue(match('/nonexistent', '/nonexistent')) 7 | self.assertFalse(match('/nonexistent', '/other-pattern')) 8 | def test_regex(self): 9 | self.assertTrue(match('/[0-9]+', '/1234', is_re=True)) 10 | self.assertFalse(match('/[0-9]+', '/abc', is_re=True)) 11 | def test_inexact(self): 12 | self.assertTrue(match('substr', '/some/substr/in/path', exact=False)) 13 | self.assertFalse(match('substr', '/some/substr/in/path')) 14 | def test_case_sensitive(self): 15 | self.assertTrue(match('/foo', '/Foo')) 16 | self.assertFalse(match('/foo', '/Foo', case_sensitive=True)) 17 | def test_inexact_regex(self): 18 | self.assertFalse(match('[0-9]', '/hi/3/there', is_re=True)) 19 | self.assertTrue(match('[0-9]', '/hi/3/there', is_re=True, exact=False)) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Django>=1.9.5 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """An alternative to Django's BrokenLinkEmailsMiddleware 2 | 3 | See: 4 | https://github.com/mherrmann/django-404-middleware 5 | """ 6 | 7 | from setuptools import setup, find_packages 8 | 9 | description = "An alternative to Django's BrokenLinkEmailsMiddleware" 10 | url = 'https://github.com/mherrmann/django-404-middleware' 11 | setup( 12 | name='django-404-middleware', 13 | version='0.0.5', 14 | description=description, 15 | long_description=description + '\n\nHome page: ' + url, 16 | author='Michael Herrmann', 17 | author_email='michael+removethisifyouarehuman@herrmann.io', 18 | url=url, 19 | packages=find_packages(), 20 | classifiers=[ 21 | 'Development Status :: 4 - Beta', 22 | 'Intended Audience :: Developers', 23 | 24 | 'License :: OSI Approved :: MIT License', 25 | 26 | 'Operating System :: OS Independent', 27 | 28 | 'Programming Language :: Python', 29 | 'Programming Language :: Python :: 3', 30 | 'Programming Language :: Python :: 3.5', 31 | 'Programming Language :: Python :: 3.6', 32 | 'Programming Language :: Python :: 3.7', 33 | 34 | 'Framework :: Django', 35 | 36 | 'Topic :: Software Development :: Libraries', 37 | 'Topic :: Software Development :: Libraries :: Python Modules' 38 | ], 39 | license='MIT', 40 | keywords='Django 404 Middleware Email', 41 | platforms=['MacOS', 'Windows', 'Debian', 'Fedora', 'CentOS', 'Arch'], 42 | test_suite='django_404_middleware.tests' 43 | ) --------------------------------------------------------------------------------