├── lint_lib ├── __init__.py ├── _vendor │ ├── __init__.py │ ├── funcparserlib │ │ ├── __init__.py │ │ ├── py.typed │ │ ├── util.pyi │ │ ├── lexer.pyi │ │ ├── LICENSE │ │ ├── util.py │ │ ├── parser.pyi │ │ └── lexer.py │ └── vendor.txt ├── _vendor-patches │ └── funcparserlib.patch ├── parser.py └── lint.py ├── .gitattributes ├── encoding ├── tests1.dat ├── chardet │ └── test_big5.txt ├── scripted │ └── tests1.dat ├── test-yahoo-jp.dat └── tests2.dat ├── lint ├── tokenizer ├── pendingSpecChanges.test ├── xmlViolation.test ├── unicodeCharsProblematic.test ├── escapeFlag.test ├── contentModelFlags.test └── README.md ├── pyproject.toml ├── tree-construction ├── scripted │ ├── adoption01.dat │ ├── webkit01.dat │ └── ark.dat ├── namespace-sensitivity.dat ├── adoption02.dat ├── main-element.dat ├── search-element.dat ├── isindex.dat ├── pending-spec-changes.dat ├── inbody01.dat ├── pending-spec-changes-plain-text-unsafe.dat ├── tests24.dat ├── tests4.dat ├── quirks01.dat ├── tests14.dat ├── tests12.dat ├── svg.dat ├── math.dat ├── tests8.dat ├── tests17.dat ├── tests5.dat ├── tests23.dat ├── comments01.dat ├── tests15.dat ├── tests25.dat ├── tests22.dat ├── menuitem-element.dat ├── README.md ├── noscript01.dat ├── ruby.dat ├── tests3.dat ├── html5test-com.dat ├── entities02.dat ├── tests21.dat ├── adoption01.dat ├── tricky01.dat ├── scriptdata01.dat ├── tables01.dat └── tests7.dat ├── .github └── workflows │ ├── lint.yml │ └── downstream.yml ├── AUTHORS.rst ├── LICENSE ├── .gitignore └── serializer ├── options.test ├── whitespace.test ├── injectmeta.test └── core.test /lint_lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lint_lib/_vendor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lint_lib/_vendor/funcparserlib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lint_lib/_vendor/funcparserlib/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lint_lib/_vendor/vendor.txt: -------------------------------------------------------------------------------- 1 | funcparserlib==1.0.1 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.dat -text diff 2 | *.test -text diff 3 | -------------------------------------------------------------------------------- /encoding/tests1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/html5lib/html5lib-tests/HEAD/encoding/tests1.dat -------------------------------------------------------------------------------- /lint: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | 4 | import lint_lib.lint as lint 5 | 6 | sys.exit(lint.main()) 7 | -------------------------------------------------------------------------------- /encoding/chardet/test_big5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/html5lib/html5lib-tests/HEAD/encoding/chardet/test_big5.txt -------------------------------------------------------------------------------- /encoding/scripted/tests1.dat: -------------------------------------------------------------------------------- 1 | #data 2 | 3 | 4 | #encoding 5 | iso-8859-2 6 | -------------------------------------------------------------------------------- /lint_lib/_vendor/funcparserlib/util.pyi: -------------------------------------------------------------------------------- 1 | from typing import TypeVar, Callable, List, Text 2 | 3 | _A = TypeVar("_A") 4 | 5 | def pretty_tree( 6 | x: _A, kids: Callable[[_A], List[_A]], show: Callable[[_A], Text] 7 | ) -> Text: ... 8 | -------------------------------------------------------------------------------- /tokenizer/pendingSpecChanges.test: -------------------------------------------------------------------------------- 1 | {"tests": [ 2 | 3 | {"description":" 6 | Yahoo! JAPAN 7 | 8 |
23 | #errors 24 | (1,3): expected-doctype-but-got-start-tag 25 | (1,35): unexpected-start-tag-implies-end-tag 26 | (1,35): adoption-agency-1.3 27 | (1,35): adoption-agency-1.3 28 | (1,35): expected-closing-tag-but-got-eof 29 | #document 30 | | 31 | | 32 | | 33 | | 34 | |
35 | | 36 | | "] 49 | } 50 | 51 | ]} -------------------------------------------------------------------------------- /lint_lib/_vendor/funcparserlib/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright © 2009/2021 Andrey Vlasovskikh 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | # software and associated documentation files (the "Software"), to deal in the Software 7 | # without restriction, including without limitation the rights to use, copy, modify, 8 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | # permit persons to whom the Software is furnished to do so, subject to the following 10 | # conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all copies 13 | # or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 16 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 17 | # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 19 | # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 20 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | from __future__ import unicode_literals 23 | 24 | 25 | def pretty_tree(x, kids, show): 26 | """Return a pseudo-graphic tree representation of the object `x` similar to the 27 | `tree` command in Unix. 28 | 29 | Type: `(T, Callable[[T], List[T]], Callable[[T], str]) -> str` 30 | 31 | It applies the parameter `show` (which is a function of type `(T) -> str`) to get a 32 | textual representation of the objects to show. 33 | 34 | It applies the parameter `kids` (which is a function of type `(T) -> List[T]`) to 35 | list the children of the object to show. 36 | 37 | Examples: 38 | 39 | ```pycon 40 | >>> print(pretty_tree( 41 | ... ["foo", ["bar", "baz"], "quux"], 42 | ... lambda obj: obj if isinstance(obj, list) else [], 43 | ... lambda obj: "[]" if isinstance(obj, list) else str(obj), 44 | ... )) 45 | [] 46 | |-- foo 47 | |-- [] 48 | | |-- bar 49 | | `-- baz 50 | `-- quux 51 | 52 | ``` 53 | """ 54 | (MID, END, CONT, LAST, ROOT) = ("|-- ", "`-- ", "| ", " ", "") 55 | 56 | def rec(obj, indent, sym): 57 | line = indent + sym + show(obj) 58 | obj_kids = kids(obj) 59 | if len(obj_kids) == 0: 60 | return line 61 | else: 62 | if sym == MID: 63 | next_indent = indent + CONT 64 | elif sym == ROOT: 65 | next_indent = indent + ROOT 66 | else: 67 | next_indent = indent + LAST 68 | chars = [MID] * (len(obj_kids) - 1) + [END] 69 | lines = [rec(kid, next_indent, sym) for kid, sym in zip(obj_kids, chars)] 70 | return "\n".join([line] + lines) 71 | 72 | return rec(x, "", ROOT) 73 | -------------------------------------------------------------------------------- /lint_lib/_vendor/funcparserlib/parser.pyi: -------------------------------------------------------------------------------- 1 | from typing import ( 2 | Optional, 3 | Generic, 4 | TypeVar, 5 | Union, 6 | Callable, 7 | Tuple, 8 | Sequence, 9 | Any, 10 | List, 11 | Text, 12 | overload, 13 | ) 14 | from funcparserlib.lexer import Token 15 | 16 | _A = TypeVar("_A") 17 | _B = TypeVar("_B") 18 | _C = TypeVar("_C") 19 | _D = TypeVar("_D") 20 | 21 | class State: 22 | pos: int 23 | max: int 24 | parser: Union[Parser, _ParserCallable, None] 25 | def __init__( 26 | self, 27 | pos: int, 28 | max: int, 29 | parser: Union[Parser, _ParserCallable, None] = ..., 30 | ) -> None: ... 31 | 32 | _ParserCallable = Callable[[_A, State], Tuple[_B, State]] 33 | 34 | class Parser(Generic[_A, _B]): 35 | name: Text 36 | def __init__(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ... 37 | def named(self, name: Text) -> Parser[_A, _B]: ... 38 | def define(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ... 39 | def run(self, tokens: Sequence[_A], s: State) -> Tuple[_B, State]: ... 40 | def parse(self, tokens: Sequence[_A]) -> _B: ... 41 | @overload 42 | def __add__( # type: ignore[misc] 43 | self, other: _IgnoredParser[_A] 44 | ) -> Parser[_A, _B]: ... 45 | @overload 46 | def __add__(self, other: Parser[_A, _C]) -> _TupleParser[_A, Tuple[_B, _C]]: ... 47 | def __or__(self, other: Parser[_A, _C]) -> Parser[_A, Union[_B, _C]]: ... 48 | def __rshift__(self, f: Callable[[_B], _C]) -> Parser[_A, _C]: ... 49 | def bind(self, f: Callable[[_B], Parser[_A, _C]]) -> Parser[_A, _C]: ... 50 | def __neg__(self) -> _IgnoredParser[_A]: ... 51 | 52 | class _Ignored: 53 | value: Any 54 | def __init__(self, value: Any) -> None: ... 55 | 56 | class _IgnoredParser(Parser[_A, _Ignored]): 57 | @overload # type: ignore[override] 58 | def __add__(self, other: _IgnoredParser[_A]) -> _IgnoredParser[_A]: ... 59 | @overload # type: ignore[override] 60 | def __add__(self, other: Parser[_A, _C]) -> Parser[_A, _C]: ... 61 | 62 | class _TupleParser(Parser[_A, _B]): 63 | @overload # type: ignore[override] 64 | def __add__(self, other: _IgnoredParser[_A]) -> _TupleParser[_A, _B]: ... 65 | @overload 66 | def __add__(self, other: Parser[_A, Any]) -> Parser[_A, Any]: ... 67 | 68 | finished: Parser[Any, None] 69 | 70 | def many(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ... 71 | def some(pred: Callable[[_A], bool]) -> Parser[_A, _A]: ... 72 | def a(value: _A) -> Parser[_A, _A]: ... 73 | def tok(type: Text, value: Optional[Text] = ...) -> Parser[Token, Text]: ... 74 | def pure(x: _A) -> Parser[_A, _A]: ... 75 | def maybe(p: Parser[_A, _B]) -> Parser[_A, Optional[_B]]: ... 76 | def skip(p: Parser[_A, Any]) -> _IgnoredParser[_A]: ... 77 | def oneplus(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ... 78 | def forward_decl() -> Parser[Any, Any]: ... 79 | 80 | class NoParseError(Exception): 81 | msg: Text 82 | state: State 83 | def __init__(self, msg: Text, state: State) -> None: ... 84 | -------------------------------------------------------------------------------- /tokenizer/contentModelFlags.test: -------------------------------------------------------------------------------- 1 | {"tests": [ 2 | 3 | {"description":"PLAINTEXT content model flag", 4 | "initialStates":["PLAINTEXT state"], 5 | "lastStartTag":"plaintext", 6 | "input":"&body;", 7 | "output":[["Character", "&body;"]]}, 8 | 9 | {"description":"PLAINTEXT with seeming close tag", 10 | "initialStates":["PLAINTEXT state"], 11 | "lastStartTag":"plaintext", 12 | "input":"&body;", 13 | "output":[["Character", "&body;"]]}, 14 | 15 | {"description":"End tag closing RCDATA or RAWTEXT", 16 | "initialStates":["RCDATA state", "RAWTEXT state"], 17 | "lastStartTag":"xmp", 18 | "input":"foo", 19 | "output":[["Character", "foo"], ["EndTag", "xmp"]]}, 20 | 21 | {"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)", 22 | "initialStates":["RCDATA state", "RAWTEXT state"], 23 | "lastStartTag":"xmp", 24 | "input":"foo", 25 | "output":[["Character", "foo"], ["EndTag", "xmp"]]}, 26 | 27 | {"description":"End tag closing RCDATA or RAWTEXT (ending with space)", 28 | "initialStates":["RCDATA state", "RAWTEXT state"], 29 | "lastStartTag":"xmp", 30 | "input":"foobar", 61 | "output":[["Character", "bar"], ["EndTag", "xmp"]]}, 62 | 63 | {"description":"Partial end tags leading straight into partial end tags", 64 | "initialStates":["RCDATA state", "RAWTEXT state"], 65 | "lastStartTag":"xmp", 66 | "input":"", 67 | "output":[["Character", "bar", 73 | "output":[["Character", "bar"]]}, 74 | 75 | {"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA", 76 | "initialStates":["RCDATA state", "RAWTEXT state"], 77 | "lastStartTag":"xmp", 78 | "input":"foo", 79 | "output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]}, 80 | 81 | {"description":"RAWTEXT w/ something looking like an entity", 82 | "initialStates":["RAWTEXT state"], 83 | "lastStartTag":"xmp", 84 | "input":"&foo;", 85 | "output":[["Character", "&foo;"]]}, 86 | 87 | {"description":"RCDATA w/ an entity", 88 | "initialStates":["RCDATA state"], 89 | "lastStartTag":"textarea", 90 | "input":"<", 91 | "output":[["Character", "<"]]} 92 | 93 | ]} 94 | -------------------------------------------------------------------------------- /tree-construction/tests8.dat: -------------------------------------------------------------------------------- 1 | #data 2 |
3 |
4 | x 5 | #errors 6 | (1,5): expected-doctype-but-got-start-tag 7 | (3,7): unexpected-end-tag 8 | (3,8): expected-closing-tag-but-got-eof 9 | #document 10 | | 11 | | 12 | | 13 | |
14 | | " 15 | " 16 | |
17 | | " 18 | x" 19 | 20 | #data 21 |
x
22 | x 23 | #errors 24 | (1,5): expected-doctype-but-got-start-tag 25 | (2,7): unexpected-end-tag 26 | (2,8): expected-closing-tag-but-got-eof 27 | #document 28 | | 29 | | 30 | | 31 | |
32 | | "x" 33 | |
34 | | " 35 | x" 36 | 37 | #data 38 |
x
xx 39 | #errors 40 | (1,5): expected-doctype-but-got-start-tag 41 | (1,25): unexpected-end-tag 42 | (1,26): expected-closing-tag-but-got-eof 43 | #document 44 | | 45 | | 46 | | 47 | |
48 | | "x" 49 | |
50 | | "xx" 51 | 52 | #data 53 |
x
yz 54 | #errors 55 | (1,5): expected-doctype-but-got-start-tag 56 | (1,25): unexpected-end-tag 57 | (1,26): expected-closing-tag-but-got-eof 58 | #document 59 | | 60 | | 61 | | 62 | |
63 | | "x" 64 | |
65 | | "yz" 66 | 67 | #data 68 |
x
xx 69 | #errors 70 | (1,7): expected-doctype-but-got-start-tag 71 | (1,12): foster-parenting-start-tag 72 | (1,13): foster-parenting-character 73 | (1,18): foster-parenting-start-tag 74 | (1,24): foster-parenting-end-tag 75 | (1,25): foster-parenting-start-tag 76 | (1,32): foster-parenting-end-tag 77 | (1,32): unexpected-end-tag 78 | (1,33): foster-parenting-character 79 | (1,33): eof-in-table 80 | #document 81 | | 82 | | 83 | | 84 | |
85 | | "x" 86 | |
87 | | "xx" 88 | |
89 | 90 | #data 91 |
  • 92 | #errors 93 | (1,7): expected-doctype-but-got-start-tag 94 | (1,11): foster-parenting-start-tag 95 | (1,15): foster-parenting-start-tag 96 | #document 97 | | 98 | | 99 | | 100 | |
  • 101 | |
  • 102 | | 103 | 104 | #data 105 | x
    x 106 | #errors 107 | (1,1): expected-doctype-but-got-chars 108 | (1,9): foster-parenting-character 109 | (1,9): eof-in-table 110 | #document 111 | | 112 | | 113 | | 114 | | "xx" 115 | |
    116 | 117 | #data 118 | x
    x 119 | #errors 120 | (1,1): expected-doctype-but-got-chars 121 | (1,15): unexpected-start-tag-implies-end-tag 122 | (1,16): foster-parenting-character 123 | (1,16): eof-in-table 124 | #document 125 | | 126 | | 127 | | 128 | | "x" 129 | |
    130 | | "x" 131 | |
    132 | 133 | #data 134 | a
    y 135 | #errors 136 | (1,3): expected-doctype-but-got-start-tag 137 | (1,24): adoption-agency-1.3 138 | (1,25): expected-closing-tag-but-got-eof 139 | #document 140 | | 141 | | 142 | | 143 | | 144 | | "a" 145 | |
    146 | |
    147 | | 148 | | "y" 149 | 150 | #data 151 |

    152 | #errors 153 | (1,3): expected-doctype-but-got-start-tag 154 | (1,15): adoption-agency-1.3 155 | (1,15): adoption-agency-1.3 156 | (1,15): expected-closing-tag-but-got-eof 157 | #document 158 | | 159 | | 160 | | 161 | | 162 | |

    163 | | 164 | |

    165 | | 166 | -------------------------------------------------------------------------------- /encoding/tests2.dat: -------------------------------------------------------------------------------- 1 | #data 2 | 39 | #encoding 40 | utf-8 41 | 42 | #data 43 | 44 | 50 | #encoding 51 | windows-1252 52 | 53 | #data 54 | 55 | #encoding 56 | utf-8 57 | 58 | #data 59 | 60 | #encoding 61 | windows-1252 62 | 63 | #data 64 | 76 | #encoding 77 | utf-8 78 | 79 | #data 80 | 86 | #encoding 87 | utf-8 88 | 89 | #data 90 | 91 | #encoding 92 | utf-8 93 | 94 | #data 95 | 96 | #encoding 97 | utf-8 98 | 99 | #data 100 | 101 | 102 | #encoding 103 | utf-8 104 | 105 | #data 106 | 107 | 108 | #encoding 109 | utf-8 110 | 111 | #data 112 | ñ 113 | 114 | #encoding 115 | utf-8 116 | -------------------------------------------------------------------------------- /tree-construction/tests17.dat: -------------------------------------------------------------------------------- 1 | #data 2 |

    3 | #errors 4 | (1,30): unexpected-start-tag 5 | (1,42): premature-eof 6 | #document 7 | | 8 | | 9 | | 10 | | 11 | |
    13 | | 14 | | 15 | 16 | #data 17 |
    18 | #errors 19 | (1,27): unexpected-start-tag 20 | (1,39): premature-eof 21 | #document 22 | | 23 | | 24 | | 25 | | 26 | | 28 | | 29 | | 30 | |
    31 | 32 | #data 33 |
    34 | #errors 35 | (1,42): unexpected-table-element-start-tag-in-select-in-table 36 | (1,42): expected-closing-tag-but-got-eof 37 | #document 38 | | 39 | | 40 | | 41 | | 42 | | 43 | | 44 | | 45 | |
    46 | | 48 | 49 | #data 50 |
    51 | #errors 52 | (1,42): unexpected-table-element-start-tag-in-select-in-table 53 | (1,42): expected-closing-tag-but-got-eof 54 | #document 55 | | 56 | | 57 | | 58 | | 59 | | 60 | | 61 | | 62 | |
    63 | | 65 | 66 | #data 67 | 68 | #errors 69 | (1,43): unexpected-table-element-start-tag-in-select-in-table 70 | (1,43): eof-in-table 71 | #document 72 | | 73 | | 74 | | 75 | | 76 | |
    77 | | 80 | | 81 | 82 | #data 83 | 84 | #errors 85 | (1,27): unexpected-start-tag-in-select 86 | (1,27): eof-in-select 87 | #document 88 | | 89 | | 90 | | 91 | | 92 | | 120 | #errors 121 | (1,30): unexpected-start-tag-in-select 122 | (1,30): eof-in-select 123 | #document 124 | | 125 | | 126 | | 127 | | 128 | | 132 | #errors 133 | (1,30): unexpected-start-tag-in-select 134 | (1,30): eof-in-select 135 | #document 136 | | 137 | | 138 | | 139 | | 140 | | 144 | #errors 145 | (1,30): unexpected-start-tag-in-select 146 | (1,30): eof-in-select 147 | #document 148 | | 149 | | 150 | | 151 | | 152 | |
    78 | |
    96 | #errors 97 | (1,27): unexpected-start-tag-in-select 98 | (1,27): eof-in-select 99 | #document 100 | | 101 | | 102 | | 103 | | 104 | | 108 | #errors 109 | (1,27): unexpected-start-tag-in-select 110 | (1,27): eof-in-select 111 | #document 112 | | 113 | | 114 | | 115 | | 116 | |
    156 | #errors 157 | (1,32): unexpected-start-tag-in-select 158 | (1,32): eof-in-select 159 | #document 160 | | 161 | | 162 | | 163 | | 164 | |
    a 168 | #errors 169 | #document 170 | | 171 | | 172 | | 173 | | 174 | | 175 | | 176 | | 177 | | "a" 178 | -------------------------------------------------------------------------------- /tree-construction/tests5.dat: -------------------------------------------------------------------------------- 1 | #data 2 | x 3 | #errors 4 | (1,7): expected-doctype-but-got-start-tag 5 | #document 6 | | 7 | | 8 | | --> x 15 | #errors 16 | (1,7): expected-doctype-but-got-start-tag 17 | (1,34): unexpected-end-tag 18 | #document 19 | | 20 | | 21 | | x 29 | #errors 30 | (1,7): expected-doctype-but-got-start-tag 31 | #document 32 | | 33 | | 34 | | x 41 | #errors 42 | (1,7): expected-doctype-but-got-start-tag 43 | #document 44 | | 45 | | 46 | | x 121 | #errors 122 | (1,7): expected-doctype-but-got-start-tag 123 | #document 124 | | 125 | | 126 | |
    149 | #errors 150 | (1,23): foster-parenting-character 151 | #document 152 | | 153 | | 154 | | 155 | | 156 | | "X" 157 | | 158 | | 130 | #errors 131 | Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. 132 | #script-off 133 | #document 134 | | 135 | | 136 | |
    136 | #errors 137 | #document 138 | | 139 | | 140 | | 141 | | 142 | | 143 | |