├── .editorconfig ├── .gitignore ├── .travis.yml ├── MANIFEST.in ├── README.md ├── example_dj18 ├── manage.py └── project │ ├── __init__.py │ ├── myapp │ ├── __init__.py │ ├── admin.py │ ├── models.py │ ├── templates │ │ ├── index.html │ │ └── not_valid.html │ ├── tests.py │ ├── urls.py │ └── views.py │ ├── settings.py │ ├── urls.py │ └── wsgi.py ├── example_dj21 ├── db.sqlite3 ├── manage.py └── project │ ├── __init__.py │ ├── myapp │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── migrations │ │ └── __init__.py │ ├── models.py │ ├── templates │ │ ├── index.html │ │ └── not_valid.html │ ├── tests.py │ ├── urls.py │ └── views.py │ ├── settings.py │ ├── urls.py │ └── wsgi.py ├── htmlvalidator ├── __init__.py ├── client.py ├── core.py ├── exceptions.py ├── middleware.py └── tests │ ├── __init__.py │ ├── models.py │ ├── settings.py │ ├── templates │ └── view.html │ ├── test_client.py │ ├── urls.py │ └── views.py ├── requirements.txt ├── runtests.py ├── setup.cfg ├── setup.py ├── tox.ini └── vnu └── README.md /.editorconfig: -------------------------------------------------------------------------------- 1 | [*.py] 2 | indent_style = space 3 | indent_size = 4 4 | 5 | [*.js] 6 | indent_style = space 7 | indent_size = 4 8 | 9 | [*.css] 10 | indent_style = space 11 | indent_size = 4 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist/ 2 | /build/ 3 | /django_html_validator.egg-info/ 4 | example/db.sqlite3 5 | .tox/ 6 | __pycache__ 7 | *.pyc 8 | .vscode/ 9 | vnu/vnu.jar 10 | example_dj18/db.sqlite3 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: python 4 | 5 | python: 6 | - "2.7" 7 | - "3.5" 8 | - "3.6" 9 | 10 | install: 11 | - pip install codecov tox-travis 12 | 13 | branches: 14 | only: 15 | - master 16 | 17 | script: 18 | - tox 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include requirements.txt 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | django-html-validator 2 | ===================== 3 | 4 | [![Build Status](https://travis-ci.org/peterbe/django-html-validator.svg?branch=master)](https://travis-ci.org/peterbe/django-html-validator) 5 | 6 | A tool to do validation of your HTML generated from your Django app. 7 | Python 3 compatible. 8 | 9 | License: [MPL 2](http://www.mozilla.org/MPL/2.0/) 10 | 11 | [Original blog post](https://www.peterbe.com/plog/django-html-validator) 12 | 13 | [Blog post update about Django 2 support](https://www.peterbe.com/plog/django-html-validator-now-supports-django-2.x) 14 | 15 | Warning! 16 | -------- 17 | 18 | If you don't download a local `vnu.jar` file (see below), it will use 19 | [validator.nu](https://validator.nu/) and **send your HTML there**. 20 | 21 | If you use `htmlvalidator` to validate tests it's unlikely your HTML contains 22 | anything sensitive or personally identifiable but if you use the middleware 23 | option there's a potential risk. 24 | 25 | Install 26 | ------- 27 | 28 | First things first, very simple: 29 | 30 | pip install django-html-validator 31 | 32 | Note, it won't do anything until you chose how you want to use it and you also 33 | need to explicitly enable it with a setting. 34 | 35 | Basically, you have a choice of how you want to use this: 36 | 37 | * As a middleware 38 | * In your unit tests (technically they're integration tests in Django) 39 | 40 | If you chose to set it up as a middleware and enable it accordingly it will 41 | run for every rendered template in the tests too. Not just when you run the 42 | server. 43 | 44 | Settings 45 | -------- 46 | 47 | Independent of how you use `htmlvalidator` you need to switch it on. 48 | It's not on by default. The setting to do that is: 49 | 50 | ```python 51 | HTMLVALIDATOR_ENABLED = True 52 | ``` 53 | 54 | What this does, is that it prints all validation errors to `stdout`. 55 | But it doesn't stop the execution from running. Even if there are errors. 56 | 57 | To make it so that the execution stops as soon as there is any validation 58 | error switch this on in your settings: 59 | 60 | ```python 61 | HTMLVALIDATOR_FAILFAST = True 62 | ``` 63 | 64 | Now, if there's any validation error going through the client you'll 65 | get a `htmlvalidator.exceptions.ValidationError` exception raised. 66 | 67 | Equally, if you're running it as a middleware and have this setting on it 68 | will raise the exception in the request. 69 | 70 | When validation errors and warnings are encountered, `htmlvalidator` will 71 | dump the HTML to a file and the errors in a file with the same name except 72 | with the extension `.txt` instead. It will dump this into, by default, the 73 | systems tmp directory and in sub-directory called `htmlvalidator`. 74 | E.g. `/tmp/htmlvalidator/`. If you want to override that change: 75 | 76 | ```python 77 | HTMLVALIDATOR_DUMPDIR = '~/validationerrors/' # default it /tmp 78 | ``` 79 | Whatever you set, the directory doesn't need to exist but its parent does. 80 | 81 | By default when `htmlvalidator` encounters validation errors it stores 82 | the relevant HTML file in the `HTMLVALIDATOR_DUMPDIR` together with a file 83 | with the extension `.txt` in the same directory. Alternatively you can just let 84 | it dump the validation errors and warnings straight onto stdout with: 85 | 86 | ```python 87 | HTMLVALIDATOR_OUTPUT = 'stdout' # default is 'file' 88 | ``` 89 | 90 | Setting the vnu.jar path 91 | ------------------------ 92 | 93 | By default, all validation is done by sending your HTML with HTTP POST to 94 | [html5.validator.nu](https://html5.validator.nu/). 95 | 96 | Not only does this put a lot of stress on their server. Especially if you have 97 | a lot of tests. It's also slow because it depends on network latency. A much 98 | better way is to download the `vnu.jar` file from their 99 | [latest release](https://github.com/validator/validator/releases) on 100 | [GitHub page](https://github.com/validator/). 101 | 102 | You set it up simply like this: 103 | 104 | ```python 105 | HTMLVALIDATOR_VNU_JAR = '~/downloads/vnu.jar' 106 | ``` 107 | 108 | This also **requires java to be installed** because that's how `.jar` files are 109 | executed on the command line. 110 | 111 | Be aware that calling this `vnu.jar` file is quite slow. Over 2 seconds is 112 | not unusual. A faster alternative is to use the `vnu.jar` to run a local web 113 | instance of the validator, and pointing validation to use that by *NOT* setting 114 | `HTMLVALIDATOR_VNU_JAR` and doing this instead: 115 | 116 | ```python 117 | HTMLVALIDATOR_VNU_URL = 'http://localhost:8888/' 118 | ``` 119 | 120 | The local web instance of the validator can be started typically by: 121 | 122 | ``` 123 | java -cp vnu.jar nu.validator.servlet.Main 8888 124 | ``` 125 | 126 | Validating during running the server 127 | ------------------------------------ 128 | 129 | A way to do HTML validation is to do it during running the 130 | server. E.g. with `./manage.py runserver`. 131 | 132 | To do that you need to enable the middleware. In your settings module, 133 | append `htmlvalidator.middleware.HTMLValidator` 134 | to `MIDDLEWARE_CLASSES` for example like this: 135 | 136 | ```python 137 | if HTMLVALIDATOR_ENABLED: 138 | MIDDLEWARE_CLASSES += ("htmlvalidator.middleware.HTMLValidator",) 139 | ``` 140 | 141 | You can also add it directly and unconditionally to `MIDDLEWARE_CLASSES` 142 | and it won't do anything (except be loaded) unless enabled, see 143 | the note above about `HTMLVALIDATOR_ENABLED` for more info. 144 | 145 | Also, if you enable `HTMLVALIDATOR_FAILFAST`, when running the 146 | `htmlvalidator` middleware it will raise an exception as soon as it 147 | sees some invalid HTML. 148 | 149 | 150 | Validating HTML in tests 151 | ------------------------ 152 | 153 | Suppose you have a class that does tests. By default it already has a 154 | `self.client` which you use to make requests. All you need to do is to 155 | replace it with the `htmlvalidator.client.ValidatingClient` 156 | class. For example: 157 | 158 | ```python 159 | 160 | from django.test import TestCase 161 | from htmlvalidator.client import ValidatingClient 162 | 163 | 164 | class MyAppTests(TestCase): 165 | 166 | def setUp(self): 167 | super(MyAppTests, self).setUp() 168 | self.client = ValidatingClient() 169 | 170 | def test_homepage(self): 171 | response = self.client.get('/') 172 | self.assertEqual(response.status_code, 200) 173 | ``` 174 | -------------------------------------------------------------------------------- /example_dj18/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | sys.path.insert(0, os.path.normpath('..')) 6 | 7 | if __name__ == "__main__": 8 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "project.settings") 9 | 10 | from django.core.management import execute_from_command_line 11 | 12 | execute_from_command_line(sys.argv) 13 | -------------------------------------------------------------------------------- /example_dj18/project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterbe/django-html-validator/93fc86d881ba132b4f46f2d1e73a6c417c45ba27/example_dj18/project/__init__.py -------------------------------------------------------------------------------- /example_dj18/project/myapp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterbe/django-html-validator/93fc86d881ba132b4f46f2d1e73a6c417c45ba27/example_dj18/project/myapp/__init__.py -------------------------------------------------------------------------------- /example_dj18/project/myapp/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /example_dj18/project/myapp/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /example_dj18/project/myapp/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Some title 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 22 | 23 | 24 |

Hello world! This is HTML5 Boilerplate. 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /example_dj18/project/myapp/templates/not_valid.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 23 | 24 | 25 |

Hello world! This HTML response is supposedly not valid.

26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 41 | 42 | -------------------------------------------------------------------------------- /example_dj18/project/myapp/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | from htmlvalidator.client import ValidatingClient 3 | 4 | 5 | class Tests(TestCase): 6 | 7 | def setUp(self): 8 | super(Tests, self).setUp() 9 | self.client = ValidatingClient() 10 | 11 | def test_render_home_page(self): 12 | response = self.client.get('/') 13 | self.assertEqual(response.status_code, 200) 14 | 15 | def test_render_not_valid(self): 16 | response = self.client.get('/not/') 17 | self.assertEqual(response.status_code, 200) 18 | -------------------------------------------------------------------------------- /example_dj18/project/myapp/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | 3 | from . import views 4 | 5 | urlpatterns = [ 6 | url(r'^$', views.home_page, name='home_page'), 7 | url(r'^not/$', views.not_valid, name='not_valid'), 8 | ] 9 | -------------------------------------------------------------------------------- /example_dj18/project/myapp/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | 4 | def home_page(request): 5 | return render(request, 'index.html') 6 | 7 | 8 | def not_valid(request): 9 | return render(request, 'not_valid.html') 10 | -------------------------------------------------------------------------------- /example_dj18/project/settings.py: -------------------------------------------------------------------------------- 1 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 2 | import os 3 | BASE_DIR = os.path.dirname(os.path.dirname(__file__)) 4 | 5 | 6 | # Quick-start development settings - unsuitable for production 7 | # See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/ 8 | 9 | # SECURITY WARNING: keep the secret key used in production secret! 10 | SECRET_KEY = ')p&!&4gj#8a01yqerefi6r58r=^%rl&_k+6nbpi!di5zy6*4u3' 11 | 12 | # SECURITY WARNING: don't run with debug turned on in production! 13 | DEBUG = True 14 | 15 | TEMPLATE_DEBUG = True 16 | 17 | DEBUG_PROPAGATE_EXCEPTIONS = True 18 | 19 | ALLOWED_HOSTS = [] 20 | 21 | 22 | # Application definition 23 | 24 | INSTALLED_APPS = ( 25 | 'django.contrib.admin', 26 | 'django.contrib.auth', 27 | 'django.contrib.contenttypes', 28 | 'django.contrib.sessions', 29 | 'django.contrib.messages', 30 | 'django.contrib.staticfiles', 31 | 'project.myapp', 32 | ) 33 | 34 | MIDDLEWARE_CLASSES = ( 35 | 'django.contrib.sessions.middleware.SessionMiddleware', 36 | 'django.middleware.common.CommonMiddleware', 37 | 'django.middleware.csrf.CsrfViewMiddleware', 38 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 39 | 'django.contrib.messages.middleware.MessageMiddleware', 40 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 41 | 'htmlvalidator.middleware.HTMLValidator', 42 | ) 43 | 44 | ROOT_URLCONF = 'project.urls' 45 | 46 | TEMPLATES = [ 47 | { 48 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 49 | 'APP_DIRS': True, 50 | 'OPTIONS': { 51 | 'context_processors': [ 52 | 'django.contrib.auth.context_processors.auth', 53 | ], 54 | }, 55 | }, 56 | ] 57 | 58 | WSGI_APPLICATION = 'project.wsgi.application' 59 | 60 | 61 | # Database 62 | # https://docs.djangoproject.com/en/1.6/ref/settings/#databases 63 | 64 | DATABASES = { 65 | 'default': { 66 | 'ENGINE': 'django.db.backends.sqlite3', 67 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 68 | } 69 | } 70 | 71 | # Internationalization 72 | # https://docs.djangoproject.com/en/1.6/topics/i18n/ 73 | 74 | LANGUAGE_CODE = 'en-us' 75 | 76 | TIME_ZONE = 'UTC' 77 | 78 | USE_I18N = True 79 | 80 | USE_L10N = True 81 | 82 | USE_TZ = True 83 | 84 | 85 | # Static files (CSS, JavaScript, Images) 86 | # https://docs.djangoproject.com/en/1.6/howto/static-files/ 87 | 88 | STATIC_URL = '/static/' 89 | 90 | 91 | HTMLVALIDATOR_ENABLED = os.environ.get( 92 | 'HTMLVALIDATOR_ENABLED', 93 | 'True' 94 | ).lower() in ('1', 'true') 95 | 96 | HTMLVALIDATOR_FAILFAST = os.environ.get( 97 | 'HTMLVALIDATOR_FAILFAST', 98 | 'False' 99 | ).lower() in ('1', 'true') 100 | 101 | HTMLVALIDATOR_VNU_JAR = os.environ.get( 102 | 'HTMLVALIDATOR_VNU_JAR', 103 | '../vnu/vnu.jar' 104 | ) 105 | 106 | HTMLVALIDATOR_OUTPUT = os.environ.get( 107 | 'HTMLVALIDATOR_OUTPUT', 108 | 'stdout' 109 | ) 110 | HTMLVALIDATOR_DUMPDIR = os.environ.get( 111 | 'HTMLVALIDATOR_OUTPUT', 112 | '/tmp/htmlvalidator' 113 | ) 114 | -------------------------------------------------------------------------------- /example_dj18/project/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import include, url 2 | 3 | 4 | urlpatterns = [ 5 | url(r'', include('project.myapp.urls')), 6 | ] 7 | -------------------------------------------------------------------------------- /example_dj18/project/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for example project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.6/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "example.settings") 12 | 13 | from django.core.wsgi import get_wsgi_application 14 | application = get_wsgi_application() 15 | -------------------------------------------------------------------------------- /example_dj21/db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterbe/django-html-validator/93fc86d881ba132b4f46f2d1e73a6c417c45ba27/example_dj21/db.sqlite3 -------------------------------------------------------------------------------- /example_dj21/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "project.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError: 10 | # The above import may fail for some other reason. Ensure that the 11 | # issue is really that Django is missing to avoid masking other 12 | # exceptions on Python 2. 13 | try: 14 | import django 15 | except ImportError: 16 | raise ImportError( 17 | "Couldn't import Django. Are you sure it's installed and " 18 | "available on your PYTHONPATH environment variable? Did you " 19 | "forget to activate a virtual environment?" 20 | ) 21 | raise 22 | execute_from_command_line(sys.argv) 23 | -------------------------------------------------------------------------------- /example_dj21/project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterbe/django-html-validator/93fc86d881ba132b4f46f2d1e73a6c417c45ba27/example_dj21/project/__init__.py -------------------------------------------------------------------------------- /example_dj21/project/myapp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterbe/django-html-validator/93fc86d881ba132b4f46f2d1e73a6c417c45ba27/example_dj21/project/myapp/__init__.py -------------------------------------------------------------------------------- /example_dj21/project/myapp/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /example_dj21/project/myapp/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class MyappConfig(AppConfig): 5 | name = 'myapp' 6 | -------------------------------------------------------------------------------- /example_dj21/project/myapp/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterbe/django-html-validator/93fc86d881ba132b4f46f2d1e73a6c417c45ba27/example_dj21/project/myapp/migrations/__init__.py -------------------------------------------------------------------------------- /example_dj21/project/myapp/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /example_dj21/project/myapp/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Some title 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 22 | 23 | 24 |

Hello world! This is HTML5 Boilerplate. 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /example_dj21/project/myapp/templates/not_valid.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 23 | 24 | 25 |

Hello world! This HTML response is supposedly not valid.

26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 41 | 42 | -------------------------------------------------------------------------------- /example_dj21/project/myapp/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | from htmlvalidator.client import ValidatingClient 3 | 4 | 5 | class Tests(TestCase): 6 | 7 | def setUp(self): 8 | super(Tests, self).setUp() 9 | self.client = ValidatingClient() 10 | 11 | def test_render_home_page(self): 12 | response = self.client.get('/') 13 | self.assertEqual(response.status_code, 200) 14 | 15 | def test_render_not_valid(self): 16 | response = self.client.get('/not/') 17 | self.assertEqual(response.status_code, 200) 18 | -------------------------------------------------------------------------------- /example_dj21/project/myapp/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | 3 | from . import views 4 | 5 | urlpatterns = [ 6 | url(r'^$', views.home_page, name='home_page'), 7 | url(r'^not/$', views.not_valid, name='not_valid'), 8 | ] 9 | -------------------------------------------------------------------------------- /example_dj21/project/myapp/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | 4 | def home_page(request): 5 | return render(request, 'index.html') 6 | 7 | 8 | def not_valid(request): 9 | return render(request, 'not_valid.html') 10 | -------------------------------------------------------------------------------- /example_dj21/project/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for example project. 3 | 4 | Generated by 'django-admin startproject' using Django 1.11.11. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/1.11/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | SECRET_KEY = 'ig9fy(+0-p@co+_!yez2fav!4ghfjh18r9$@b%=@@6+d_5@n%z' 24 | 25 | # SECURITY WARNING: don't run with debug turned on in production! 26 | DEBUG = True 27 | 28 | ALLOWED_HOSTS = [] 29 | 30 | 31 | # Application definition 32 | 33 | INSTALLED_APPS = [ 34 | 'django.contrib.admin', 35 | 'django.contrib.auth', 36 | 'django.contrib.contenttypes', 37 | 'django.contrib.sessions', 38 | 'django.contrib.messages', 39 | 'django.contrib.staticfiles', 40 | 'project.myapp', 41 | ] 42 | 43 | MIDDLEWARE = [ 44 | 'django.middleware.security.SecurityMiddleware', 45 | 'django.contrib.sessions.middleware.SessionMiddleware', 46 | 'django.middleware.common.CommonMiddleware', 47 | 'django.middleware.csrf.CsrfViewMiddleware', 48 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 49 | 'django.contrib.messages.middleware.MessageMiddleware', 50 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 51 | 'htmlvalidator.middleware.HTMLValidator', 52 | ] 53 | 54 | ROOT_URLCONF = 'project.urls' 55 | 56 | TEMPLATES = [ 57 | { 58 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 59 | 'DIRS': [], 60 | 'APP_DIRS': True, 61 | 'OPTIONS': { 62 | 'context_processors': [ 63 | 'django.template.context_processors.debug', 64 | 'django.template.context_processors.request', 65 | 'django.contrib.auth.context_processors.auth', 66 | 'django.contrib.messages.context_processors.messages', 67 | ], 68 | }, 69 | }, 70 | ] 71 | 72 | WSGI_APPLICATION = 'project.wsgi.application' 73 | 74 | 75 | # Database 76 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases 77 | 78 | DATABASES = { 79 | 'default': { 80 | 'ENGINE': 'django.db.backends.sqlite3', 81 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 82 | } 83 | } 84 | 85 | 86 | # Password validation 87 | # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators 88 | 89 | AUTH_PASSWORD_VALIDATORS = [ 90 | { 91 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 92 | }, 93 | { 94 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 95 | }, 96 | { 97 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 98 | }, 99 | { 100 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 101 | }, 102 | ] 103 | 104 | 105 | # Internationalization 106 | # https://docs.djangoproject.com/en/1.11/topics/i18n/ 107 | 108 | LANGUAGE_CODE = 'en-us' 109 | 110 | TIME_ZONE = 'UTC' 111 | 112 | USE_I18N = True 113 | 114 | USE_L10N = True 115 | 116 | USE_TZ = True 117 | 118 | 119 | # Static files (CSS, JavaScript, Images) 120 | # https://docs.djangoproject.com/en/1.11/howto/static-files/ 121 | 122 | STATIC_URL = '/static/' 123 | 124 | 125 | 126 | HTMLVALIDATOR_ENABLED = os.environ.get( 127 | 'HTMLVALIDATOR_ENABLED', 128 | 'True' 129 | ).lower() in ('1', 'true') 130 | 131 | HTMLVALIDATOR_FAILFAST = os.environ.get( 132 | 'HTMLVALIDATOR_FAILFAST', 133 | 'False' 134 | ).lower() in ('1', 'true') 135 | 136 | HTMLVALIDATOR_VNU_JAR = os.environ.get( 137 | 'HTMLVALIDATOR_VNU_JAR', 138 | '../vnu/vnu.jar' 139 | ) 140 | 141 | HTMLVALIDATOR_OUTPUT = os.environ.get( 142 | 'HTMLVALIDATOR_OUTPUT', 143 | 'stdout' 144 | ) 145 | HTMLVALIDATOR_DUMPDIR = os.environ.get( 146 | 'HTMLVALIDATOR_OUTPUT', 147 | '/tmp/htmlvalidator' 148 | ) 149 | -------------------------------------------------------------------------------- /example_dj21/project/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url, include 2 | 3 | urlpatterns = [ 4 | url(r'', include('project.myapp.urls')), 5 | ] 6 | -------------------------------------------------------------------------------- /example_dj21/project/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for example project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "example.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /htmlvalidator/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.5.1' 2 | -------------------------------------------------------------------------------- /htmlvalidator/client.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | from django.conf import settings 4 | from django.test.client import Client 5 | 6 | from .core import validate_html 7 | 8 | 9 | class ValidatingClient(Client): 10 | 11 | def get(self, *args, **kwargs): 12 | response = super(ValidatingClient, self).get(*args, **kwargs) 13 | enabled = getattr( 14 | settings, 15 | 'HTMLVALIDATOR_ENABLED', 16 | False 17 | ) 18 | if not enabled: 19 | return response 20 | 21 | # perhaps you already, for some reason have the middleware installed 22 | try: 23 | middlewares = settings.MIDDLEWARE 24 | except AttributeError: 25 | # Django <2 26 | middlewares = settings.MIDDLEWARE_CLASSES 27 | if ( 28 | 'htmlvalidator.middleware.HTMLValidator' 29 | in middlewares 30 | ): 31 | # no point doing it here too 32 | return response 33 | 34 | caller = inspect.stack()[1] 35 | caller_line = caller[2] 36 | caller_name = caller[3] 37 | 38 | if ( 39 | response.status_code == 200 and ( 40 | response['Content-Type'].startswith('text/html') or 41 | response['Content-Type'].startswith('application/xhtml+xml') 42 | ) 43 | ): 44 | if not response.content: 45 | raise ValueError('No response.content', args[0]) 46 | 47 | validate_html( 48 | response.content, 49 | response['Content-Type'], 50 | '%s-%s.html' % (caller_name, caller_line), 51 | (args, kwargs) 52 | ) 53 | return response 54 | -------------------------------------------------------------------------------- /htmlvalidator/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import cgi 3 | import codecs 4 | import os 5 | import tempfile 6 | import gzip 7 | import re 8 | import subprocess 9 | from io import BytesIO 10 | 11 | import requests 12 | 13 | from django.conf import settings 14 | from django.core.exceptions import ImproperlyConfigured 15 | 16 | from .exceptions import ValidatorOperationalError, ValidationError 17 | 18 | 19 | def validate_html(html, content_type, filename, args_kwargs): 20 | temp_dir = getattr( 21 | settings, 22 | 'HTMLVALIDATOR_DUMPDIR', 23 | None 24 | ) 25 | if temp_dir is None: 26 | temp_dir = os.path.join( 27 | tempfile.gettempdir(), 'htmlvalidator' 28 | ) 29 | else: 30 | temp_dir = os.path.expanduser(temp_dir) 31 | temp_dir = os.path.abspath(temp_dir) 32 | if content_type.startswith("application/xhtml+xml"): 33 | # *.xhtml extension triggers correct parser in CLI jar mode 34 | filename = re.sub(r'\.html$', '.xhtml', filename) 35 | 36 | if not os.path.isdir(temp_dir): 37 | os.mkdir(temp_dir) 38 | temp_file = os.path.join( 39 | temp_dir, 40 | filename 41 | ) 42 | if _validate(temp_file, html, content_type, args_kwargs): 43 | os.remove(temp_file) 44 | 45 | 46 | def _validate(html_file, html, content_type, args_kwargs): 47 | args, kwargs = args_kwargs 48 | 49 | if getattr(settings, 'HTMLVALIDATOR_VNU_JAR', None): 50 | # jar mode expects files in utf-8, recode 51 | content_type, params = cgi.parse_header(content_type) 52 | encoding = params.get('charset', 'utf-8') 53 | with codecs.open(html_file, 'w', 'utf-8') as f: 54 | f.write(html.decode(encoding)) 55 | 56 | vnu_jar_path = settings.HTMLVALIDATOR_VNU_JAR 57 | vnu_jar_path = os.path.expanduser(vnu_jar_path) 58 | vnu_jar_path = os.path.abspath(vnu_jar_path) 59 | if not os.path.isfile(vnu_jar_path): 60 | raise ImproperlyConfigured( 61 | '%s is not a file' % vnu_jar_path 62 | ) 63 | status, _, err = _run_command( 64 | 'java', '-jar', vnu_jar_path, html_file 65 | ) 66 | if status not in (0, 1): 67 | # 0 if it worked and no validation errors/warnings 68 | # 1 if it worked but there was validation errors/warnings 69 | raise ValidatorOperationalError(err) 70 | 71 | err = err.decode('utf-8') 72 | output = err # cryptic, I know 73 | output = re.sub( 74 | '"file:%s":' % re.escape(html_file), 75 | '', 76 | output 77 | ) 78 | 79 | else: 80 | with open(html_file, 'wb') as f: 81 | f.write(html) 82 | with BytesIO() as buf: 83 | with gzip.GzipFile(fileobj=buf, mode='wb') as gzipper: 84 | gzipper.write(html) 85 | gzippeddata = buf.getvalue() 86 | 87 | vnu_url = getattr( 88 | settings, 89 | 'HTMLVALIDATOR_VNU_URL', 90 | 'https://html5.validator.nu/' 91 | ) 92 | 93 | req = requests.post( 94 | vnu_url, 95 | params={ 96 | 'out': 'gnu', 97 | }, 98 | headers={ 99 | 'Content-Type': content_type, 100 | 'Accept-Encoding': 'gzip', 101 | 'Content-Encoding': 'gzip', 102 | 'Content-Length': str(len(gzippeddata)), 103 | }, 104 | data=gzippeddata 105 | ) 106 | 107 | if req.status_code != 200: 108 | raise ValidatorOperationalError(req) 109 | 110 | output = req.text 111 | 112 | raise_exceptions = getattr( 113 | settings, 114 | 'HTMLVALIDATOR_FAILFAST', 115 | False 116 | ) 117 | 118 | how_to_output = getattr( 119 | settings, 120 | 'HTMLVALIDATOR_OUTPUT', 121 | 'file' 122 | ) 123 | if output and not re.search(r'The document (is valid|validates)', output): 124 | print("VALIDATION TROUBLE") 125 | if how_to_output == 'stdout': 126 | print(output) 127 | print() 128 | else: 129 | print("To debug, see:") 130 | print("\t", html_file) 131 | txt_file = re.sub(r'\.x?html$', '.txt', html_file) 132 | assert txt_file != html_file 133 | print("\t", txt_file) 134 | with codecs.open(txt_file, 'w', 'utf-8') as f: 135 | f.write('Arguments to GET:\n') 136 | for arg in args: 137 | f.write('\t%s\n' % arg) 138 | for k, w in kwargs.items(): 139 | f.write('\t%s=%s\n' % (k, w)) 140 | f.write('\n') 141 | f.write(output) 142 | 143 | if raise_exceptions: 144 | raise ValidationError(output) 145 | 146 | 147 | def _run_command(*command): 148 | proc = subprocess.Popen( 149 | command, 150 | stdout=subprocess.PIPE, 151 | stderr=subprocess.PIPE 152 | ) 153 | out, err = proc.communicate() 154 | return proc.returncode, out.strip(), err.strip() 155 | -------------------------------------------------------------------------------- /htmlvalidator/exceptions.py: -------------------------------------------------------------------------------- 1 | class ValidatorOperationalError(Exception): 2 | pass 3 | 4 | 5 | class ValidationError(AssertionError): 6 | pass 7 | -------------------------------------------------------------------------------- /htmlvalidator/middleware.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.contrib.sites.requests import RequestSite 3 | try: 4 | from django.utils.deprecation import MiddlewareMixin 5 | except ImportError: 6 | MiddlewareMixin = object 7 | 8 | 9 | from .core import validate_html 10 | 11 | 12 | class HTMLValidator(MiddlewareMixin): 13 | 14 | def process_response(self, request, response): 15 | if not getattr(settings, 'HTMLVALIDATOR_ENABLED', False): 16 | return response 17 | 18 | if ( 19 | response.status_code == 200 and ( 20 | response['content-type'].startswith('text/html') or 21 | response['content-type'].startswith('application/xhtml+xml') 22 | ) 23 | ): 24 | path = request.path[1:] 25 | if path.endswith('/'): 26 | path = path[:-1] 27 | 28 | filename = path.replace('/', '_') 29 | if not filename: 30 | # e.g. an error on `/` 31 | filename = 'index.html' 32 | if not filename.endswith('.html'): 33 | filename += '.html' 34 | filename = '%s-%s' % (RequestSite(request).domain, filename) 35 | filename = filename.replace(':', '-') # Windows 36 | validate_html( 37 | response.content, 38 | response['Content-Type'], 39 | filename, 40 | ([], request.GET) 41 | ) 42 | return response 43 | -------------------------------------------------------------------------------- /htmlvalidator/tests/__init__.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase as DjangoTestCase 2 | 3 | 4 | class TestCase(DjangoTestCase): 5 | 6 | def shortDescription(self): 7 | # Stop nose using the test docstring and instead the test method 8 | # name. 9 | pass 10 | -------------------------------------------------------------------------------- /htmlvalidator/tests/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterbe/django-html-validator/93fc86d881ba132b4f46f2d1e73a6c417c45ba27/htmlvalidator/tests/models.py -------------------------------------------------------------------------------- /htmlvalidator/tests/settings.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the Mozilla Public 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 | SECRET_KEY = 'asdf' 5 | 6 | DATABASES = { 7 | 'default': { 8 | 'NAME': 'test.db', 9 | 'ENGINE': 'django.db.backends.sqlite3', 10 | } 11 | } 12 | 13 | INSTALLED_APPS = ( 14 | 'htmlvalidator', 15 | 'htmlvalidator.tests', 16 | 17 | 'django.contrib.auth', 18 | 'django.contrib.contenttypes', 19 | # 'django.contrib.staticfiles', 20 | ) 21 | 22 | # To make the django app check shut up, we include middleware even if we 23 | # don't really need it. 24 | MIDDLEWARE = [ 25 | 'django.middleware.security.SecurityMiddleware', 26 | 'django.contrib.sessions.middleware.SessionMiddleware', 27 | 'django.middleware.common.CommonMiddleware', 28 | 'django.middleware.csrf.CsrfViewMiddleware', 29 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 30 | 'django.contrib.messages.middleware.MessageMiddleware', 31 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 32 | # 'htmlvalidator.middleware.HTMLValidator', 33 | ] 34 | 35 | ROOT_URLCONF = 'htmlvalidator.tests.urls' 36 | 37 | TEMPLATES = [ 38 | { 39 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 40 | 'APP_DIRS': True, 41 | }, 42 | ] 43 | 44 | # STATIC_URL = 'static/' 45 | -------------------------------------------------------------------------------- /htmlvalidator/tests/templates/view.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hi! 6 | 7 | 8 |

Hi!

9 | 10 | 11 | -------------------------------------------------------------------------------- /htmlvalidator/tests/test_client.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import codecs 4 | import os 5 | import shutil 6 | import tempfile 7 | from glob import glob 8 | 9 | import requests 10 | from django.utils import six 11 | from mock import Mock, patch 12 | 13 | from htmlvalidator import client 14 | from htmlvalidator.tests import TestCase 15 | 16 | 17 | class ClientTestCase(TestCase): 18 | 19 | def setUp(self): 20 | super(ClientTestCase, self).setUp() 21 | self.tmpdir = tempfile.mkdtemp() 22 | 23 | def tearDown(self): 24 | shutil.rmtree(self.tmpdir) 25 | super(ClientTestCase, self).tearDown() 26 | 27 | def _response(self, **kwargs): 28 | return Mock(spec=requests.Response, **kwargs) 29 | 30 | @patch('htmlvalidator.core.requests.post') 31 | def test_get(self, post): 32 | 33 | content = ( 34 | u'Error: End tag “h2” seen, but there were open elements.\n' 35 | u'From line 8, column 12; to line 8, column 16\n' 36 | u'There were errors. (Tried in the text/html mode.)' 37 | ) 38 | post.return_value = self._response( 39 | text=content, 40 | status_code=200 41 | ) 42 | 43 | c = client.ValidatingClient() 44 | response = c.get('/view/') 45 | self.assertEqual(response.status_code, 200) 46 | self.assertTrue( 47 | 'Hi!' in response.content.decode('utf-8') 48 | ) 49 | 50 | with self.settings( 51 | HTMLVALIDATOR_ENABLED=True, HTMLVALIDATOR_DUMPDIR=self.tmpdir 52 | ): 53 | response = c.get('/view/') 54 | self.assertEqual(response.status_code, 200) 55 | self.assertTrue( 56 | 'Hi!' in response.content.decode('utf-8') 57 | ) 58 | html_file, = glob(os.path.join(self.tmpdir, '*.html')) 59 | with codecs.open(html_file, encoding='utf-8') as f: 60 | self.assertEqual(f.read(), response.content.decode('utf-8')) 61 | txt_file, = glob(os.path.join(self.tmpdir, '*.txt')) 62 | with codecs.open(txt_file, encoding='utf-8') as f: 63 | self.assertTrue(content in f.read()) 64 | 65 | # Make sure the "headers" argument to htmlvalidator.core.requests.post 66 | # was a mapping of str to str. 67 | post.assert_called() 68 | headers = post.call_args[1]['headers'] 69 | for header in headers: 70 | self.assertTrue(isinstance(header, six.text_type) or 71 | isinstance(header, six.binary_type)) 72 | self.assertTrue(isinstance(headers[header], six.text_type) or 73 | isinstance(headers[header], six.binary_type)) 74 | -------------------------------------------------------------------------------- /htmlvalidator/tests/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | 3 | from htmlvalidator.tests import views 4 | 5 | 6 | urlpatterns = [ 7 | url(r'^view/', views.html_view, name='html_view') 8 | ] 9 | -------------------------------------------------------------------------------- /htmlvalidator/tests/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | 4 | def html_view(request): 5 | return render(request, 'view.html') 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | -------------------------------------------------------------------------------- /runtests.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import django 5 | from django.test.utils import get_runner 6 | from django.conf import settings 7 | 8 | 9 | def runtests(): 10 | test_dir = os.path.join(os.path.dirname(__file__), 'htmlvalidator/tests') 11 | sys.path.insert(0, test_dir) 12 | 13 | os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' 14 | os.environ['REUSE_DB'] = '0' 15 | django.setup() 16 | 17 | TestRunner = get_runner(settings) 18 | test_runner = TestRunner(interactive=False, failfast=False) 19 | failures = test_runner.run_tests(['htmlvalidator.tests']) 20 | 21 | sys.exit(bool(failures)) 22 | 23 | 24 | if __name__ == '__main__': 25 | runtests() 26 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import re 4 | import codecs 5 | import os 6 | 7 | # Prevent spurious errors during `python setup.py test`, a la 8 | # http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html: 9 | try: 10 | import multiprocessing 11 | except ImportError: 12 | pass 13 | 14 | 15 | from setuptools import setup 16 | 17 | 18 | def read(*parts): 19 | with codecs.open( 20 | os.path.join(os.path.dirname(__file__), *parts), 21 | encoding='utf-8' 22 | ) as f: 23 | return f.read() 24 | 25 | 26 | def find_version(*file_paths): 27 | version_file = read(*file_paths) 28 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", 29 | version_file, re.M) 30 | if version_match: 31 | return version_match.group(1) 32 | raise RuntimeError('Unable to find version string.') 33 | 34 | 35 | def find_install_requires(): 36 | return [x.strip() for x in 37 | read('requirements.txt').splitlines() 38 | if x.strip() and not x.startswith('#')] 39 | 40 | 41 | README = read('README.md') 42 | 43 | setup( 44 | name='django-html-validator', 45 | version=find_version('htmlvalidator', '__init__.py'), 46 | url='https://github.com/peterbe/django-html-validator', 47 | author='Peter Bengtsson', 48 | author_email='mail@peterbe.com', 49 | description="Yo! Check your HTML!", 50 | long_description=README, 51 | long_description_content_type='text/markdown', 52 | packages=['htmlvalidator'], 53 | license='MPL v2.0', 54 | include_package_data=True, 55 | install_requires=find_install_requires(), 56 | test_suite="runtests.runtests", 57 | zip_safe=False, 58 | classifiers=[ 59 | 'Framework :: Django', 60 | 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', 61 | 'Operating System :: OS Independent', 62 | 'Framework :: Django', 63 | 'Framework :: Django :: 1.8', 64 | 'Framework :: Django :: 1.9', 65 | 'Framework :: Django :: 1.10', 66 | 'Programming Language :: Python', 67 | 'Programming Language :: Python :: 2', 68 | 'Programming Language :: Python :: 2.7', 69 | 'Programming Language :: Python :: 3', 70 | 'Programming Language :: Python :: 3.4', 71 | 'Programming Language :: Python :: 3.5', 72 | 'Topic :: Software Development :: Libraries :: Python Modules', 73 | ], 74 | ) 75 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | py{27,35}-django18 4 | py{27,35,36}-django111 5 | py{35,36}-django200 6 | 7 | [travis] 8 | python = 9 | 2.7: py27 10 | 3.5: py35 11 | 3.6: py36 12 | #3.6: py36, coverage, lint 13 | 14 | [testenv] 15 | commands = {envpython} setup.py test 16 | setenv = 17 | PYTHONPATH={toxinidir} 18 | deps = 19 | -r{toxinidir}/requirements.txt 20 | mock 21 | django18: Django>=1.8,<1.9 22 | django111: Django>=1.11,<2.0.0 23 | django200: Django>=2.0.0 24 | -------------------------------------------------------------------------------- /vnu/README.md: -------------------------------------------------------------------------------- 1 | To get your own `vnu.jar`, go to 2 | https://github.com/validator/validator/releases 3 | download the latest .zip and unzip it here. 4 | 5 | You might need to... `mv dist/vnu.jar .` 6 | --------------------------------------------------------------------------------