├── .gitignore ├── setup.cfg ├── .coveragerc ├── .travis.yml ├── test ├── data │ ├── valid.html │ └── invalid.html └── test_html_linter.py ├── setup.py ├── README.rst ├── scripts └── html_lint.py ├── pylintrc ├── LICENSE └── html_linter.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.coverage 3 | *.egg-info 4 | __pycache__ 5 | build/ 6 | dist/ 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | detailed-errors=1 3 | with-coverage=1 4 | cover-package=html_linter 5 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | */python?.?/* 4 | */lib-python/?.?/*.py 5 | */lib_pypy/_*.py 6 | */site-packages/ordereddict.py 7 | */site-packages/nose/* 8 | */unittest2/* 9 | 10 | exclude_lines = 11 | # Have to re-enable the standard pragma 12 | pragma: no cover 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.2" 5 | - "3.3" 6 | - "3.4" 7 | # command to install dependencies 8 | install: 9 | - "pip install ." 10 | - "pip install coveralls" 11 | # command to run tests 12 | script: python -R setup.py nosetests 13 | after_success: 14 | coveralls 15 | -------------------------------------------------------------------------------- /test/data/valid.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | A tab 14 | 15 | 16 |
17 | 18 | 19 | A 20 | 21 | 22 | Foo.com 23 | 24 | 25 |
26 | 27 | 28 | foo 29 | 30 | 31 | Foo 32 | 33 | 34 | foo 35 | 36 | 37 |
38 |
39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | Void 0 Link 50 | 51 | 52 | A 53 | 54 | 55 | A 56 | 57 | 58 |
59 |

The Euro sign is € or €. In a link they render like this Euro Doc 60 |

61 | -------------------------------------------------------------------------------- /test/data/invalid.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | A tab 17 | 18 | 19 |
20 | 21 | 22 | A 23 | 24 | 25 | Foo.com 26 | 27 | 28 |
29 | 30 | 31 | foo 32 | 33 | 34 | Foo 35 | 36 | 37 | foo 38 | 39 | 40 |
41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | Void 0 Link 52 | 53 | 54 | 55 | A 56 | 57 | 58 | A 59 | 60 | 61 |
62 |

The Euro sign is € or €. In a link they render like this Euro Doc

63 |
64 | 65 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Deezer (http://www.deezer.com) 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from setuptools import setup, find_packages 16 | 17 | 18 | setup( 19 | name='html-linter', 20 | version='0.1.5', 21 | description='Lints an HTML5 file using Google\'s style guide', 22 | long_description=open('README.rst').read(), 23 | author='Sebastian Kreft - Deezer', 24 | author_email='skreft@deezer.com', 25 | url='http://github.com/deezer/html-linter', 26 | py_modules=['html_linter'], 27 | install_requires=['template-remover', 'docopt==0.6.1'], 28 | tests_require=['nose>=1.3'], 29 | scripts=['scripts/html_lint.py'], 30 | classifiers=[ 31 | 'Development Status :: 3 - Alpha', 32 | 'Environment :: Console', 33 | 'Intended Audience :: Developers', 34 | 'License :: OSI Approved :: Apache Software License', 35 | 'Operating System :: Unix', 36 | 'Programming Language :: Python :: 2', 37 | 'Programming Language :: Python :: 2.7', 38 | 'Programming Language :: Python :: 3', 39 | 'Programming Language :: Python :: 3.2', 40 | 'Programming Language :: Python :: 3.3', 41 | 'Programming Language :: Python :: 3.4', 42 | 'Topic :: Software Development', 43 | ], 44 | ) 45 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | HTML Linter 2 | =========== 3 | 4 | .. image:: https://badge.fury.io/py/html-linter.png 5 | :target: http://badge.fury.io/py/html-linter 6 | 7 | .. image:: https://travis-ci.org/deezer/html-linter.png?branch=master 8 | :target: https://travis-ci.org/deezer/html-linter 9 | 10 | .. image:: https://coveralls.io/repos/deezer/html-linter/badge.png?branch=master 11 | :target: https://coveralls.io/r/deezer/html-linter?branch=master 12 | 13 | 14 | HTML Linter is an HTML5 linter that follows the style guide defined by Google. 15 | 16 | Motivation 17 | ---------- 18 | 19 | Handling HTML5 files generated by lots of different of people is a difficult 20 | task, because the standard is evolving quite fast and also because browsers are 21 | quite open to accept any malformed/invalid/incomplete input. 22 | 23 | That's why we decided to have an automated tool to check our coding standard. 24 | 25 | We start with the 26 | `Google standard `_ 27 | and we enhance it with some extra rules deinfed by the project 28 | `html-minifier `_. You can read his 29 | detailed `article `_. 30 | 31 | The list of extra rules we added are: 32 | 33 | * Boolean attributes should not have an explicit value. 34 | * Do not use the name attribute in a tags. 35 | * Do not use the language attribute in script tags. 36 | * Do not use the charset attribute in script tags when there is no source. 37 | * Javascript:void(0) links are evil. 38 | * onclick='javascript: ...' is not required. This will raise also a Concerns Separation error. 39 | * meta http-equiv: use only standard properties + X-UA-compatible 40 | * No extra whitespaces between attributes or before the opening or closing tag. 41 | 42 | What is missing? 43 | ---------------- 44 | 45 | Check if the file has BOM. 46 | 47 | The abbility to validate the HTML using the tool 48 | `HTML5 tidy `_ and to integrate some 49 | schema.org or microdata validator. 50 | 51 | However, we do not have any short terms plans to handle the latter, due to a 52 | couple of reasons: 53 | 54 | * Tidy can be easily integrated using the tool `git-lint `_ 55 | * Any other validator can also be integrated with `git-lint `_ 56 | * Adding any extra validator would pull many more dependencies and change the scope. 57 | 58 | Limitations 59 | ----------- 60 | 61 | html_linter used the project 62 | `template-remover `_ to remove the 63 | PHP and Jinja markup from the files and this project works has some limitations. 64 | 65 | 66 | One example that won't work is the following:: 67 | 68 | " ?> 69 | 70 | The reason it does not work is because when the method sees the first '?>' 71 | (the one inside the string), it thinks it's a closing tag. 72 | 73 | 74 | Example use 75 | ----------- 76 | 77 | Below are example of how template_remover.py is used:: 78 | 79 | $ html_lint.py filename.html 80 | 81 | 82 | Installation 83 | ------------ 84 | 85 | You can install, upgrade or uninstall html-linter with these commands:: 86 | 87 | $ pip install html-linter 88 | $ pip install --upgrade html-linter 89 | $ pip uninstall html-linter 90 | 91 | Python Versions 92 | --------------- 93 | 94 | Python 2.7 is officially supported, 3.2, 3.3 and 3.4 should also work. 95 | 96 | Development 97 | ----------- 98 | 99 | Help for this project is more than welcomed, so feel free to create an issue or 100 | to send a pull request via http://github.com/deezer/html-linter. 101 | 102 | Tests are run using nose, either with:: 103 | 104 | $ python -R setup.py nosetests 105 | $ nosetests 106 | 107 | Use the tool `git-lint `_ before any commit, so 108 | errors and style problems are caught early. 109 | 110 | TODOS and Possible Features 111 | --------------------------- 112 | 113 | * Make the output less verbose. 114 | * Integrate with HTML5 tidy. 115 | * Integrate with a schema.org/micrdata validator. 116 | 117 | 118 | Changelog 119 | ========= 120 | 121 | v0.1 (2014-05-07) 122 | ------------------- 123 | 124 | * Initial commit. 125 | -------------------------------------------------------------------------------- /scripts/html_lint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2014 Deezer (http://www.deezer.com) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """html_lint.py 17 | 18 | This HTML5 linter follows the style guide open sourced by Google 19 | https://google-styleguide.googlecode.com/svn/trunk/htmlcssguide.xml. 20 | 21 | It also extends the guide with some rules defined by the project 22 | https://github.com/kangax/html-minifier. 23 | 24 | This software is released under the Apache License. Copyright Deezer 2014. 25 | 26 | Usage: 27 | html5_lint.py [--disable=DISABLE] FILENAME 28 | html5_lint.py (-h | --help) 29 | html5_lint.py --version 30 | 31 | Options: 32 | -h --help Show this screen. 33 | --version Show version. 34 | --disable=checks A comma separated list of checks to disable. Valid names are: 35 | doctype, entities, trailing_whitespace, tabs, charset, 36 | void_element, optional_tag, type_attribute, 37 | concerns_separation, protocol, names, 38 | capitalization, quotation, indentation, formatting, 39 | boolean_attribute, invalid_attribute, void_zero, 40 | invalid_handler, http_equiv, extra_whitespace. 41 | 42 | """ 43 | 44 | from __future__ import absolute_import 45 | from __future__ import division 46 | from __future__ import print_function 47 | from __future__ import unicode_literals 48 | 49 | import codecs 50 | import io 51 | import sys 52 | 53 | import docopt 54 | 55 | import html_linter 56 | import template_remover 57 | 58 | _DISABLE_MAP = { 59 | 'doctype': html_linter.DocumentTypeMessage, 60 | 'entities': html_linter.EntityReferenceMessage, 61 | 'trailing_whitespace': html_linter.TrailingWhitespaceMessage, 62 | 'tabs': html_linter.TabMessage, 63 | 'charset': html_linter.CharsetMessage, 64 | 'void_element': html_linter.VoidElementMessage, 65 | 'optional_tag': html_linter.OptionalTagMessage, 66 | 'type_attribute': html_linter.TypeAttributeMessage, 67 | 'concerns_separation': html_linter.ConcernsSeparationMessage, 68 | 'protocol': html_linter.ProtocolMessage, 69 | 'names': html_linter.NameMessage, 70 | 'capitalization': html_linter.CapitalizationMessage, 71 | 'quotation': html_linter.QuotationMessage, 72 | 'indentation': html_linter.IndentationMessage, 73 | 'formatting': html_linter.FormattingMessage, 74 | 'boolean_attribute': html_linter.BooleanAttributeMessage, 75 | 'invalid_attribute': html_linter.InvalidAttributeMessage, 76 | 'void_zero': html_linter.VoidZeroMessage, 77 | 'invalid_handler': html_linter.InvalidHandlerMessage, 78 | 'http_equiv': html_linter.HTTPEquivMessage, 79 | 'extra_whitespace': html_linter.ExtraWhitespaceMessage, 80 | } 81 | 82 | 83 | __VERSION__ = '0.1' 84 | 85 | 86 | def main(): 87 | """Entry point for the HTML5 Linter.""" 88 | 89 | # Wrap sys stdout for python 2, so print can understand unicode. 90 | if sys.version_info[0] < 3: 91 | sys.stdout = codecs.getwriter("utf-8")(sys.stdout) 92 | 93 | options = docopt.docopt(__doc__, 94 | help=True, 95 | version='html5_lint v%s' % __VERSION__) 96 | 97 | disable_str = options['--disable'] or '' 98 | disable = disable_str.split(',') 99 | 100 | invalid_disable = set(disable) - set(_DISABLE_MAP.keys()) - set(('',)) 101 | if invalid_disable: 102 | sys.stderr.write( 103 | 'Invalid --disable arguments: %s\n\n' % ', '.join(invalid_disable)) 104 | sys.stderr.write(__doc__) 105 | return 1 106 | 107 | exclude = [_DISABLE_MAP[d] for d in disable if d in _DISABLE_MAP] 108 | clean_html = template_remover.clean(io.open(options['FILENAME']).read()) 109 | print(html_linter.lint(clean_html, exclude=exclude)) 110 | 111 | return 0 112 | 113 | if __name__ == '__main__': 114 | sys.exit(main()) 115 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | #rcfile= 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook='import sys; import os; sys.path.append(os.path.abspath("."))' 9 | 10 | # Profiled execution. 11 | profile=no 12 | 13 | # Add files or directories to the blacklist. They should be base names, not 14 | # paths. 15 | ignore=CVS,.git 16 | 17 | # Pickle collected data for later comparisons. 18 | persistent=no 19 | 20 | # List of plugins (as comma separated values of python modules names) to load, 21 | # usually to register additional checkers. 22 | load-plugins= 23 | 24 | 25 | [MESSAGES CONTROL] 26 | 27 | # Enable the message, report, category or checker with the given id(s). You can 28 | # either give multiple identifier separated by comma (,) or put this option 29 | # multiple time. See also the "--disable" option for examples. 30 | #enable= 31 | 32 | # Disable the message, report, category or checker with the given id(s). You 33 | # can either give multiple identifiers separated by comma (,) or put this 34 | # option multiple times (only on the command line, not in the configuration 35 | # file where it should appear only once).You can also use "--disable=all" to 36 | # disable everything first and then reenable specific checks. For example, if 37 | # you want to run only the similarities checker, you can use "--disable=all 38 | # --enable=similarities". If you want to run only the classes checker, but have 39 | # no Warning level messages displayed, use"--disable=all --enable=classes 40 | # --disable=W" 41 | #disable= 42 | 43 | 44 | [REPORTS] 45 | 46 | # Set the output format. Available formats are text, parseable, colorized, msvs 47 | # (visual studio) and html. You can also give a reporter class, eg 48 | # mypackage.mymodule.MyReporterClass. 49 | output-format=text 50 | 51 | # Put messages in a separate file for each module / package specified on the 52 | # command line instead of printing them on stdout. Reports (if any) will be 53 | # written in a file name "pylint_global.[txt|html]". 54 | files-output=no 55 | 56 | # Tells whether to display a full report or only the messages 57 | reports=no 58 | 59 | # Python expression which should return a note less than 10 (10 is the highest 60 | # note). You have access to the variables errors warning, statement which 61 | # respectively contain the number of errors / warnings messages and the total 62 | # number of statements analyzed. This is used by the global evaluation report 63 | # (RP0004). 64 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 65 | 66 | # Add a comment according to your evaluation note. This is used by the global 67 | # evaluation report (RP0004). 68 | comment=no 69 | 70 | # Template used to display messages. This is a python new-style format string 71 | # used to format the massage information. See doc for all details 72 | #msg-template= 73 | 74 | 75 | [MISCELLANEOUS] 76 | 77 | # List of note tags to take in consideration, separated by a comma. 78 | notes=FIXME,XXX,TODO 79 | 80 | 81 | [VARIABLES] 82 | 83 | # Tells whether we should check for unused import in __init__ files. 84 | init-import=no 85 | 86 | # A regular expression matching the beginning of the name of dummy variables 87 | # (i.e. not used). 88 | dummy-variables-rgx=_$|dummy|unused_ 89 | 90 | # List of additional names supposed to be defined in builtins. Remember that 91 | # you should avoid to define new builtins when possible. 92 | additional-builtins= 93 | 94 | 95 | [FORMAT] 96 | 97 | # Maximum number of characters on a single line. 98 | max-line-length=80 99 | 100 | # Regexp for a line that is allowed to be longer than the limit. 101 | ignore-long-lines=^\s*(# )??$|'https?://\S+'$ 102 | 103 | # Maximum number of lines in a module 104 | max-module-lines=2000 105 | 106 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 107 | # tab). 108 | indent-string=' ' 109 | 110 | 111 | [BASIC] 112 | 113 | # Required attributes for module, separated by a comma 114 | required-attributes= 115 | 116 | # List of builtins function names that should not be used, separated by a comma 117 | bad-functions=filter,apply,input 118 | 119 | # Regular expression which should only match correct module names 120 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 121 | 122 | # Regular expression which should only match correct module level names 123 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__)|logger)$ 124 | 125 | # Regular expression which should only match correct class names 126 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 127 | 128 | # Regular expression which should only match correct function names 129 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 130 | 131 | # Regular expression which should only match correct method names 132 | method-rgx=[a-z_][a-z0-9_]{2,30}|test_[a-z0-9_]{2,40}$ 133 | 134 | # Regular expression which should only match correct instance attribute names 135 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 136 | 137 | # Regular expression which should only match correct argument names 138 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 139 | 140 | # Regular expression which should only match correct variable names 141 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 142 | 143 | # Regular expression which should only match correct attribute names in class 144 | # bodies 145 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 146 | 147 | # Regular expression which should only match correct list comprehension / 148 | # generator expression variable names 149 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 150 | 151 | # Good variable names which should always be accepted, separated by a comma 152 | good-names=f,i,j,k,ex,Run,_ 153 | 154 | # Bad variable names which should always be refused, separated by a comma 155 | bad-names=foo,bar,baz,toto,tutu,tata 156 | 157 | # Regular expression which should only match function or class names that do 158 | # not require a docstring. 159 | no-docstring-rgx=__.*__|_.*|test_.*|Test.* 160 | 161 | # Minimum line length for functions/classes that require docstrings, shorter 162 | # ones are exempt. 163 | docstring-min-length=-1 164 | 165 | 166 | [TYPECHECK] 167 | 168 | # Tells whether missing members accessed in mixin class should be ignored. A 169 | # mixin class is detected if its name ends with "mixin" (case insensitive). 170 | ignore-mixin-members=yes 171 | 172 | # List of classes names for which member attributes should not be checked 173 | # (useful for classes with attributes dynamically set). 174 | ignored-classes=SQLObject 175 | 176 | # When zope mode is activated, add a predefined set of Zope acquired attributes 177 | # to generated-members. 178 | zope=no 179 | 180 | # List of members which are set dynamically and missed by pylint inference 181 | # system, and so shouldn't trigger E0201 when accessed. Python regular 182 | # expressions are accepted. 183 | generated-members=REQUEST,acl_users,aq_parent 184 | 185 | 186 | [SIMILARITIES] 187 | 188 | # Minimum lines number of a similarity. 189 | min-similarity-lines=4 190 | 191 | # Ignore comments when computing similarities. 192 | ignore-comments=yes 193 | 194 | # Ignore docstrings when computing similarities. 195 | ignore-docstrings=yes 196 | 197 | # Ignore imports when computing similarities. 198 | ignore-imports=no 199 | 200 | 201 | [IMPORTS] 202 | 203 | # Deprecated modules which should not be used, separated by a comma 204 | deprecated-modules=regsub,TERMIOS,Bastion,rexec 205 | 206 | # Create a graph of every (i.e. internal and external) dependencies in the 207 | # given file (report RP0402 must not be disabled) 208 | import-graph= 209 | 210 | # Create a graph of external dependencies in the given file (report RP0402 must 211 | # not be disabled) 212 | ext-import-graph= 213 | 214 | # Create a graph of internal dependencies in the given file (report RP0402 must 215 | # not be disabled) 216 | int-import-graph= 217 | 218 | 219 | [DESIGN] 220 | 221 | # Maximum number of arguments for function / method 222 | max-args=8 223 | 224 | # Argument names that match this expression will be ignored. Default to name 225 | # with leading underscore 226 | ignored-argument-names=_.* 227 | 228 | # Maximum number of locals for function / method body 229 | max-locals=15 230 | 231 | # Maximum number of return / yield for function / method body 232 | max-returns=6 233 | 234 | # Maximum number of branch for function / method body 235 | max-branches=18 236 | 237 | # Maximum number of statements in function / method body 238 | max-statements=50 239 | 240 | # Maximum number of parents for a class (see R0901). 241 | max-parents=7 242 | 243 | # Maximum number of attributes for a class (see R0902). 244 | max-attributes=15 245 | 246 | # Minimum number of public methods for a class (see R0903). 247 | min-public-methods=1 248 | 249 | # Maximum number of public methods for a class (see R0904). 250 | max-public-methods=20 251 | 252 | 253 | [CLASSES] 254 | 255 | # List of interface methods to ignore, separated by a comma. This is used for 256 | # instance to not check methods defines in Zope's Interface base class. 257 | ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by 258 | 259 | # List of method names used to declare (i.e. assign) instance attributes. 260 | defining-attr-methods=__init__,__new__,setUp 261 | 262 | # List of valid names for the first argument in a class method. 263 | valid-classmethod-first-arg=cls 264 | 265 | # List of valid names for the first argument in a metaclass class method. 266 | valid-metaclass-classmethod-first-arg=mcs 267 | 268 | 269 | [EXCEPTIONS] 270 | 271 | # Exceptions that will emit a warning when being caught. Defaults to 272 | # "Exception" 273 | overgeneral-exceptions=Exception 274 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /test/test_html_linter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Deezer (http://www.deezer.com) 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for the html_linter module.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | from __future__ import unicode_literals 21 | 22 | import io 23 | import os 24 | import unittest 25 | 26 | import html_linter 27 | 28 | 29 | # pylint: disable=too-many-public-methods,protected-access 30 | 31 | 32 | class TestHTML5Linter(unittest.TestCase): 33 | def test_doctype(self): 34 | # Non HTML5 doctype 35 | self.assertEquals( 36 | [html_linter.DocumentTypeMessage( 37 | line=1, column=1, declaration='')], 38 | html_linter.HTML5Linter('').messages 39 | ) 40 | # Extra whitespace 41 | self.assertEquals( 42 | [html_linter.DocumentTypeMessage( 43 | line=1, column=1, declaration='')], 44 | html_linter.HTML5Linter('').messages 45 | ) 46 | # The right doctype 47 | self.assertEquals( 48 | [], 49 | html_linter.HTML5Linter('').messages 50 | ) 51 | 52 | def test_entity_references(self): 53 | self.assertEquals( 54 | [html_linter.EntityReferenceMessage( 55 | line=1, column=2, entity='á')], 56 | html_linter.HTML5Linter(' á ').messages 57 | ) 58 | 59 | self.assertEquals( 60 | [], 61 | html_linter.HTML5Linter(' < >   & ').messages 62 | ) 63 | 64 | def test_entity_references_in_attributes(self): 65 | self.assertEquals( 66 | [html_linter.EntityReferenceMessage( 67 | line=1, column=11, entity='á')], 68 | html_linter.HTML5Linter('').messages 69 | ) 70 | 71 | self.assertEquals( 72 | [], 73 | html_linter.HTML5Linter( 74 | '').messages 75 | ) 76 | 77 | def test_entity_reference_must_have_semicolon(self): 78 | self.assertEquals( 79 | [], 80 | html_linter.HTML5Linter( 81 | '').messages 82 | ) 83 | self.assertEquals( 84 | [], 85 | html_linter.HTML5Linter('').messages 86 | ) 87 | 88 | def test_char_references(self): 89 | self.assertEquals( 90 | [html_linter.EntityReferenceMessage( 91 | line=1, column=2, entity=' ')], 92 | html_linter.HTML5Linter(' ').messages 93 | ) 94 | 95 | def test_char_references_in_attributes(self): 96 | self.assertEquals( 97 | [html_linter.EntityReferenceMessage( 98 | line=1, column=11, entity=' ')], 99 | html_linter.HTML5Linter('').messages 100 | ) 101 | 102 | def test_char_reference_must_have_semicolon(self): 103 | self.assertEquals( 104 | [], 105 | html_linter.HTML5Linter('').messages 106 | ) 107 | 108 | def test_trailing_whitespace(self): 109 | self.assertEquals( 110 | [html_linter.TrailingWhitespaceMessage( 111 | line=1, column=4, whitespace=' ')], 112 | html_linter.HTML5Linter('foo \n').messages 113 | ) 114 | self.assertEquals( 115 | [html_linter.TrailingWhitespaceMessage( 116 | line=1, column=4, whitespace=' '), 117 | html_linter.TrailingWhitespaceMessage( 118 | line=2, column=5, whitespace=' ')], 119 | html_linter.HTML5Linter('foo \nbarz \n').messages 120 | ) 121 | self.assertEquals( 122 | [html_linter.TrailingWhitespaceMessage( 123 | line=1, column=4, whitespace='\t \t'), 124 | html_linter.TabMessage(line=1, column=4), 125 | html_linter.TabMessage(line=1, column=6)], 126 | html_linter.HTML5Linter('foo\t \t\r').messages 127 | ) 128 | # Only complaint before a newline 129 | self.assertEquals( 130 | [], 131 | html_linter.HTML5Linter('a ').messages 132 | ) 133 | 134 | def test_tabs(self): 135 | self.assertEquals( 136 | [html_linter.TabMessage(line=1, column=3)], 137 | html_linter.HTML5Linter(' \t\t').messages 138 | ) 139 | self.assertEquals( 140 | [html_linter.TabMessage(line=1, column=3), 141 | html_linter.TabMessage(line=2, column=1)], 142 | html_linter.HTML5Linter(' \ta\n\ta').messages 143 | ) 144 | 145 | def test_charset(self): 146 | self.assertEquals( 147 | [html_linter.CharsetMessage(line=1, column=16, charset='foo')], 148 | html_linter.HTML5Linter('').messages 149 | ) 150 | self.assertEquals( 151 | [html_linter.CharsetMessage(line=1, column=16, charset='UTF-8')], 152 | html_linter.HTML5Linter('').messages 153 | ) 154 | self.assertEquals( 155 | [], 156 | html_linter.HTML5Linter('').messages 157 | ) 158 | 159 | def test_charset_not_present(self): 160 | self.assertEquals( 161 | [html_linter.CharsetMessage(line=1, column=1)], 162 | html_linter.HTML5Linter('').messages 163 | ) 164 | # We add the attribute so the optional tag check is not raised 165 | self.assertEquals( 166 | [html_linter.CharsetMessage(line=2, column=22)], 167 | html_linter.HTML5Linter('\n').messages 168 | ) 169 | 170 | def test_close_void_tags(self): 171 | self.assertEquals( 172 | [html_linter.VoidElementMessage( 173 | line=1, column=4, tag='br', trailing_chars='/'), 174 | html_linter.VoidElementMessage( 175 | line=1, column=20, tag='img', trailing_chars='/'), 176 | html_linter.VoidElementMessage( 177 | line=2, column=6, tag='img')], 178 | html_linter.HTML5Linter( 179 | '
\n').messages 180 | ) 181 | 182 | def test_close_optional_tags(self): 183 | self.assertEquals( 184 | [html_linter.OptionalTagMessage(line=1, column=7, tag='p'), 185 | html_linter.OptionalTagMessage(line=2, column=3, tag='body'), 186 | html_linter.OptionalTagMessage(line=3, column=1, tag='html')], 187 | html_linter.HTML5Linter('

foo

\n \n').messages 188 | ) 189 | 190 | def test_open_optional_tag(self): 191 | self.assertEquals( 192 | [html_linter.OptionalTagMessage( 193 | line=1, column=1, tag='html', opening=True), 194 | html_linter.OptionalTagMessage( 195 | line=1, column=10, tag='body', opening=True)], 196 | html_linter.HTML5Linter('foo').messages 197 | ) 198 | self.assertEquals( 199 | [], 200 | html_linter.HTML5Linter( 201 | 'foo').messages 202 | ) 203 | 204 | def test_link_type(self): 205 | self.assertEquals( 206 | [html_linter.TypeAttributeMessage(line=1, column=7, tag='link')], 207 | html_linter.HTML5Linter( 208 | '').messages 209 | ) 210 | self.assertEquals( 211 | [], 212 | html_linter.HTML5Linter( 213 | '\n' + 214 | '\n').messages 215 | ) 216 | 217 | def test_style_type(self): 218 | self.assertEquals( 219 | [html_linter.ConcernsSeparationMessage( 220 | line=1, column=1, tag='style'), 221 | html_linter.TypeAttributeMessage(line=1, column=8, tag='style')], 222 | html_linter.HTML5Linter('').messages 285 | ) 286 | 287 | def test_style_attribute(self): 288 | self.assertEquals( 289 | [html_linter.ConcernsSeparationMessage( 290 | line=1, column=4, tag='a', attribute='style')], 291 | html_linter.HTML5Linter('
a').messages 292 | ) 293 | 294 | def test_a_tag_with_javascript(self): 295 | self.assertEquals( 296 | [html_linter.ConcernsSeparationMessage( 297 | line=1, column=10, tag='a', attribute='href')], 298 | html_linter.HTML5Linter('').messages 299 | ) 300 | self.assertEquals( 301 | [], 302 | html_linter.HTML5Linter('').messages 303 | ) 304 | 305 | def test_a_tag_with_void_zero(self): 306 | self.assertEquals( 307 | [html_linter.VoidZeroMessage(line=1, column=10), 308 | html_linter.VoidZeroMessage(line=2, column=10), 309 | html_linter.VoidZeroMessage(line=3, column=10)], 310 | html_linter.HTML5Linter( 311 | '\n' + 312 | '\n' + 313 | '').messages 314 | ) 315 | 316 | def test_a_tag_with_name_attribute(self): 317 | self.assertEquals( 318 | [html_linter.InvalidAttributeMessage( 319 | line=1, column=4, attribute='name')], 320 | html_linter.HTML5Linter('').messages 321 | ) 322 | 323 | def test_tag_with_event_handler(self): 324 | self.assertEquals( 325 | [html_linter.ConcernsSeparationMessage( 326 | line=1, column=7, tag='body', attribute='onload')], 327 | html_linter.HTML5Linter('').messages 328 | ) 329 | 330 | def test_tag_with_event_handler_and_js_protocol(self): 331 | self.assertEquals( 332 | [html_linter.ConcernsSeparationMessage( 333 | line=1, column=7, tag='body', attribute='onload'), 334 | html_linter.InvalidHandlerMessage( 335 | line=1, column=15, attribute='onload')], 336 | html_linter.HTML5Linter( 337 | '').messages 338 | ) 339 | 340 | def test_urls_have_protocol(self): 341 | self.assertEquals( 342 | [html_linter.ProtocolMessage(line=1, column=10, protocol='http:'), 343 | html_linter.ProtocolMessage( 344 | line=2, column=11, protocol='https:')], 345 | html_linter.HTML5Linter( 346 | '\n' + 347 | '').messages 348 | ) 349 | self.assertEquals( 350 | [], 351 | html_linter.HTML5Linter( 352 | '\n').messages 353 | ) 354 | 355 | def test_names(self): 356 | self.assertEquals( 357 | [html_linter.NameMessage( 358 | line=1, column=10, attribute='id', value='a_b'), 359 | html_linter.NameMessage( 360 | line=2, column=13, attribute='class', value='Foo'), 361 | html_linter.NameMessage( 362 | line=3, column=13, attribute='class', value='a_b'), 363 | html_linter.NameMessage( 364 | line=3, column=22, attribute='id', value='Foo')], 365 | html_linter.HTML5Linter( 366 | '
\n' + 367 | '\n' + 368 | '
').messages 369 | ) 370 | self.assertEquals( 371 | [], 372 | html_linter.HTML5Linter( 373 | '
\n').messages 374 | ) 375 | 376 | def test_case(self): 377 | self.assertEquals( 378 | [html_linter.CapitalizationMessage(line=1, column=2, tag='A'), 379 | html_linter.CapitalizationMessage( 380 | line=1, column=4, tag='A', attribute='HREF'), 381 | html_linter.CapitalizationMessage( 382 | line=1, column=17, tag='A', closing=True), 383 | html_linter.CapitalizationMessage( 384 | line=2, column=4, tag='A', attribute='itemScope')], 385 | html_linter.HTML5Linter( 386 | 'foo\n').messages 387 | ) 388 | 389 | def test_case_with_numeric_attribute(self): 390 | # Tests https://github.com/deezer/html-linter/issues/3, because of 391 | # python bug http://bugs.python.org/issue13822. 392 | self.assertEquals( 393 | [], 394 | html_linter.HTML5Linter( 395 | 'foo').messages 396 | ) 397 | 398 | def test_quotation(self): 399 | self.assertEquals( 400 | [html_linter.QuotationMessage(line=1, column=9, quotation="'"), 401 | html_linter.QuotationMessage(line=1, column=22, quotation='')], 402 | html_linter.HTML5Linter('').messages 403 | ) 404 | 405 | def test_indentation(self): 406 | self.assertEquals( 407 | [html_linter.IndentationMessage( 408 | line=2, column=1, indent=3, max_indent=2)], 409 | html_linter.HTML5Linter('\n
').messages 423 | ) 424 | # If we indented by something greater than the maximum allowed we 425 | # normalize it to the previous maximum. 426 | self.assertEquals( 427 | [html_linter.IndentationMessage( 428 | line=2, column=1, indent=3, max_indent=2)], 429 | html_linter.HTML5Linter('\n
\n
').messages 430 | ) 431 | # This case should raise two warnings, because the first indentation is 432 | # normalized to 2 spaces and the second is 6 spaces. 433 | self.assertEquals( 434 | [html_linter.IndentationMessage( 435 | line=2, column=1, indent=3, max_indent=2), 436 | html_linter.IndentationMessage( 437 | line=3, column=1, indent=6, max_indent=4)], 438 | html_linter.HTML5Linter('\n
\n
').messages 439 | ) 440 | 441 | self.assertEquals( 442 | [], 443 | html_linter.HTML5Linter('
\n ').messages 444 | ) 445 | # Tabs are replaced by two spaces, so we are only getting the Tab error. 446 | self.assertEquals( 447 | [html_linter.TabMessage(line=2, column=1)], 448 | html_linter.HTML5Linter('
\n\t').messages 449 | ) 450 | 451 | def test_spaces_between_tags(self): 452 | self.assertEquals( 453 | [], 454 | html_linter.HTML5Linter('
').messages 455 | ) 456 | 457 | def test_formatting(self): 458 | self.assertEquals( 459 | [html_linter.FormattingMessage(line=1, column=5, tag='li'), 460 | html_linter.FormattingMessage(line=1, column=9, tag='div'), 461 | html_linter.FormattingMessage(line=1, column=14, tag='table'), 462 | html_linter.FormattingMessage(line=1, column=21, tag='tr'), 463 | html_linter.FormattingMessage(line=1, column=25, tag='td')], 464 | html_linter.HTML5Linter('