├── .gitignore ├── .travis.yml ├── COPYING ├── COPYING.LIB ├── LICENSES ├── README.md ├── cl-html5-parser.asd ├── constants.lisp ├── cxml ├── cl-html5-parser-cxml.asd └── cxml-dom.lisp ├── entities.lisp ├── html5-parser-class.lisp ├── html5-parser.lisp ├── inputstream.lisp ├── packages.lisp ├── simple-tree.lisp ├── tests ├── cl-html5-parser-tests.asd ├── packages.lisp ├── run-tests.lisp ├── support.lisp ├── test-inputstream.lisp ├── test-parser.lisp ├── test-tokenizer.lisp ├── test-tree-builder.lisp └── testdata │ ├── encoding │ ├── test-yahoo-jp.dat │ ├── tests1.dat │ └── tests2.dat │ ├── sanitizer │ └── tests1.dat │ ├── serializer │ ├── core.test │ ├── injectmeta.test │ ├── optionaltags.test │ ├── options.test │ └── whitespace.test │ ├── sniffer │ └── htmlOrFeed.json │ ├── tokenizer │ ├── aa-lisp-tests.test │ ├── contentModelFlags.test │ ├── domjs.test │ ├── entities.test │ ├── escapeFlag.test │ ├── namedEntities.test │ ├── numericEntities.test │ ├── pendingSpecChanges.test │ ├── test1.test │ ├── test2.test │ ├── test3.test │ ├── test4.test │ ├── unicodeChars.test │ ├── unicodeCharsProblematic.test │ └── xmlViolation.test │ └── tree-construction │ ├── adoption01.dat │ ├── adoption02.dat │ ├── button.dat │ ├── comments01.dat │ ├── doctype01.dat │ ├── domjs-unsafe.dat │ ├── entities01.dat │ ├── entities02.dat │ ├── html5test-com.dat │ ├── inbody01.dat │ ├── isindex.dat │ ├── pending-spec-changes-plain-text-unsafe.dat │ ├── pending-spec-changes.dat │ ├── plain-text-unsafe.dat │ ├── scriptdata01.dat │ ├── tables01.dat │ ├── tests1.dat │ ├── tests10.dat │ ├── tests11.dat │ ├── tests12.dat │ ├── tests14.dat │ ├── tests15.dat │ ├── tests16.dat │ ├── tests17.dat │ ├── tests18.dat │ ├── tests19.dat │ ├── tests2.dat │ ├── tests20.dat │ ├── tests21.dat │ ├── tests22.dat │ ├── tests23.dat │ ├── tests24.dat │ ├── tests25.dat │ ├── tests26.dat │ ├── tests3.dat │ ├── tests4.dat │ ├── tests5.dat │ ├── tests6.dat │ ├── tests7.dat │ ├── tests8.dat │ ├── tests9.dat │ ├── tests_innerHTML_1.dat │ ├── tricky01.dat │ ├── webkit01.dat │ └── webkit02.dat ├── tokenizer.lisp ├── toxml.lisp ├── tree-help.lisp └── xmls.lisp /.gitignore: -------------------------------------------------------------------------------- 1 | README.html 2 | 3 | *.fasl 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: lisp 2 | sudo: required 3 | 4 | branches: 5 | only: 6 | - master 7 | 8 | env: 9 | matrix: 10 | - LISP=sbcl 11 | 12 | install: 13 | - curl -L https://github.com/luismbo/cl-travis/raw/master/install.sh | sh 14 | 15 | script: 16 | - cl -e "(ql:quickload :cl-html5-parser) (ql:quickload :cl-html5-parser-tests) (html5-parser-tests::run-html5-parser-tests)" 17 | -------------------------------------------------------------------------------- /COPYING.LIB: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /LICENSES: -------------------------------------------------------------------------------- 1 | This software is ported from html5lib, see http://code.google.com/p/html5lib/ 2 | html5lib is distributed under the following license: 3 | 4 | Copyright (c) 2006-2011 The Authors 5 | 6 | Contributors: 7 | James Graham - jg307@cam.ac.uk 8 | Anne van Kesteren - annevankesteren@gmail.com 9 | Lachlan Hunt - lachlan.hunt@lachy.id.au 10 | Matt McDonald - kanashii@kanashii.ca 11 | Sam Ruby - rubys@intertwingly.net 12 | Ian Hickson (Google) - ian@hixie.ch 13 | Thomas Broyer - t.broyer@ltgt.net 14 | Jacques Distler - distler@golem.ph.utexas.edu 15 | Henri Sivonen - hsivonen@iki.fi 16 | Adam Barth - abarth@webkit.org 17 | Eric Seidel - eric@webkit.org 18 | The Mozilla Foundation (contributions from Henri Sivonen since 2008) 19 | David Flanagan (Mozilla) - dflanagan@mozilla.com 20 | 21 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 22 | 23 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cl-html5-parser: HTML5 parser for Common Lisp 2 | ============================================= 3 | 4 | ## Abstract 5 | 6 | cl-html5-parser is a HTML5 parser for Common Lisp with the following features: 7 | 8 | * It is a port of the Python library [html5lib](http://code.google.com/p/html5lib/). 9 | * It passes all relevant tests from html5lib. 10 | * It is not tied to a specific DOM implementation. 11 | 12 | 13 | ## Requirements 14 | 15 | * SBCL or ECL. 16 | * CL-PPCRE and FLEXI-STREAMS. 17 | 18 | Might work with CLISP, ABCL and Clozure CL, but many of the tests don't pass there. 19 | 20 | 21 | ## Usage 22 | 23 | 24 | ### Parsing 25 | 26 | Parsing functions are in the package HTML5-PARSER. 27 | 28 | ``` 29 | parse-html5 source &key encoding strictp dom 30 | => document, errors 31 | ``` 32 | 33 | Parse an HTML document from source. Source can be a string, a pathname 34 | or a stream. When parsing from a stream encoding detection is not 35 | supported, encoding must be supplied via the encoding keyword 36 | parameter. 37 | 38 | When strictp is true, parsing stops on first error. 39 | 40 | Returns two values. The primary value is the document node. The 41 | secondary value is a list of errors found during parsing. The format 42 | of this list is subject to change. 43 | 44 | The type of document depends on the dom parameter. By default it's an 45 | instance of cl-html5-parser's own DOM implementation. See the DOM 46 | paragraph below for more information. 47 | 48 | ``` 49 | parse-html5-fragment source &key container encoding strictp dom 50 | => document-fragment, errors 51 | ``` 52 | 53 | Parses a fragment of HTML. Container sets the context, defaults to 54 | "div". Returns a document-fragment node. For the other parameters see 55 | `PARSE-HTML5`. 56 | 57 | 58 | ### Example 59 | ```common-lisp 60 | (html5-parser:parse-html5-fragment "Parse some HTML" :dom :xmls) 61 | ==> ("Parse " ("i" NIL "some") " HTML") 62 | ``` 63 | 64 | ### The DOM 65 | 66 | Parsing HTML5 is not possible without a 67 | [DOM](http://en.wikipedia.org/wiki/Document_Object_Model). cl-html5-parser 68 | defines a minimal DOM implementation for this task. Functions for 69 | traversing documents are exported by the HTML5-PARSER package. 70 | 71 | Alternatively the parser can be instructed to to convert the document 72 | into other DOM implementations using the dom parameter. The conversion 73 | is done by simply calling the generic function 74 | transform-html5-dom. Support for other DOM implementations can be 75 | added by defining new methods for this generic function. The dom 76 | parameter is either a symbol or a list where the car is a symbol and 77 | the rest is key arguments. Below is the currently supported target 78 | types. 79 | 80 | 81 | ### Namespace of elements and attributes 82 | 83 | The HTML5 syntax has no support for namespaces, however the standard 84 | defines special rules to set the expected namespace for SVG and MathML 85 | elements and the following attributes: `xlink:actuate`, 86 | `xlink:arcrole`, `xlink:href`, `xlink:role`, `xlink:show`, 87 | `xlink:title`, `xlink:type`, `xml:base`, `xml:lang`, `xml:space`, 88 | `xmlns`, `xmlns:xlink`. Please note that this only applies to SVG and 89 | MathML elements. Attributes of HTML elements will never get a 90 | namespace. 91 | 92 | #### Examples 93 | 94 | ```html 95 | " :dom :xmls-ns) 105 | ==> 106 | (("html" . "http://www.w3.org/1999/xhtml") 107 | (("xmlU00003Alang" "en") ("xmlU000040lang" "en")) ("head" NIL) ("body" NIL)) 108 | ``` 109 | 110 | On an HTML element `xml:lang` and `xml@lang` are just attributes with 111 | unusual characters in their name. In the HTML DOM these names are kept 112 | as is, but when converting to XML they are escaped, to ensure the XML 113 | becomes valid. This escaping can be reversed with 114 | `HTML5-PARSER:XML-UNESCAPE-NAME`. 115 | 116 | ```common-lisp 117 | (html5-parser:parse-html5 "" :dom :xmls-ns) 118 | ==> 119 | (("html" . "http://www.w3.org/1999/xhtml") NIL ("head" NIL) 120 | ("body" NIL 121 | (("svg" . "http://www.w3.org/2000/svg") 122 | (("xml:lang" "en") ("xmlU000040lang" "en") ("xlink:href" "#") 123 | ("xmlns:xlink" "http://www.w3.org/1999/xlink") ("xlinkU00003Ato" "#"))))) 124 | ``` 125 | 126 | In this case the `xml:lang` and `xmlns:xlink` is one of those 127 | attributes with known namespace when used on SVG and MathML 128 | elements. However `xlink:to` is not the list, even if it's defined in 129 | the xlink standard. 130 | 131 | ### :XMLS or (:XMLS &key namespace comments) 132 | 133 | Converts a node into a simple 134 | [XMLS](http://common-lisp.net/project/xmls/)-like list structure. 135 | If node is a document fragment a list of XMLS nodes a returned. In 136 | all other cases a single XMLS node is returned. 137 | 138 | If namespace argument is true, tag names are conses of name and 139 | namespace URI. 140 | 141 | By default comments are stripped. If comments argument is true, 142 | comments are returned as (:COMMENT NIL "comment text"). This extension 143 | of XMLS format. 144 | 145 | 146 | ### :CXML 147 | 148 | Convert to [Closure XML Parser](http://common-lisp.net/project/cxml/) 149 | DOM implementation. In order to use this you must load/depend on the 150 | the system cl-html5-parser-cxml. 151 | 152 | 153 | ## License 154 | 155 | This library is available under the 156 | [GNU Lesser General Public License v3.0](http://www.gnu.org/licenses/lgpl.html). 157 | -------------------------------------------------------------------------------- /cl-html5-parser.asd: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2017 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (defsystem #:cl-html5-parser 22 | :name "cl-html5-parser" 23 | :description "A HTML5 parser for Common Lisp" 24 | :licence "GNU Lesser General Public License" 25 | :author "Thomas Bakketun " 26 | :depends-on (:cl-ppcre :flexi-streams :string-case) 27 | :serial t 28 | :components ((:file "packages") 29 | (:file "constants") 30 | (:file "entities") 31 | (:file "inputstream") 32 | (:file "tokenizer") 33 | (:file "simple-tree") 34 | (:file "html5-parser-class") 35 | (:file "tree-help") 36 | (:file "html5-parser") 37 | (:file "toxml") 38 | (:file "xmls"))) 39 | 40 | 41 | (defmethod perform ((o test-op) (c (eql (find-system '#:cl-html5-parser)))) 42 | (operate 'load-op '#:cl-html5-parser-tests) 43 | (funcall (find-symbol (string :run-html5-parser-tests) 44 | :html5-parser-tests))) 45 | -------------------------------------------------------------------------------- /cxml/cl-html5-parser-cxml.asd: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2017 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (defsystem #:cl-html5-parser-cxml 22 | :name "cl-html5-parser" 23 | :description "CXML integration for cl-html5-parser" 24 | :licence "GNU Lesser General Public License" 25 | :author "Thomas Bakketun " 26 | :depends-on (:cl-html5-parser :cxml) 27 | :serial t 28 | :components ((:file "cxml-dom"))) 29 | -------------------------------------------------------------------------------- /cxml/cxml-dom.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2014 Joe Taylor 4 | ;;;; Copyright (C) 2012 Thomas Bakketun 5 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 6 | ;;;; Copyright (C) 2012 Mathias Hellevang 7 | ;;;; Copyright (C) 2012 Stian Sletner 8 | ;;;; 9 | ;;;; This library is free software: you can redistribute it and/or modify 10 | ;;;; it under the terms of the GNU Lesser General Public License as published 11 | ;;;; by the Free Software Foundation, either version 3 of the License, or 12 | ;;;; (at your option) any later version. 13 | ;;;; 14 | ;;;; This library is distributed in the hope that it will be useful, 15 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | ;;;; GNU General Public License for more details. 18 | ;;;; 19 | ;;;; You should have received a copy of the GNU General Public License 20 | ;;;; along with this library. If not, see . 21 | 22 | (in-package #:html5-parser) 23 | 24 | 25 | (defmethod transform-html5-dom ((to-type (eql :cxml)) node &key) 26 | (let ((document-type) 27 | (document) 28 | (document-fragment)) 29 | (labels ((walk (node &optional parent xlink-defined) 30 | (ecase (node-type node) 31 | (:document-type 32 | (setf document-type (dom:create-document-type 'rune-dom:implementation 33 | (xml-escape-name (node-name node)) 34 | (node-public-id node) 35 | (node-system-id node)))) 36 | (:document 37 | (element-map-children #'walk node)) 38 | (:document-fragment 39 | (setf document (dom:create-document 'rune-dom:implementation nil nil nil)) 40 | (setf document-fragment (dom:create-document-fragment document)) 41 | (element-map-children (lambda (c) (walk c document-fragment xlink-defined)) node)) 42 | (:element 43 | (let ((element 44 | (if document 45 | (dom:create-element-ns document (node-namespace node) (xml-escape-name (node-name node))) 46 | (dom:document-element 47 | (setf document (dom:create-document 'rune-dom:implementation 48 | (node-namespace node) 49 | (xml-escape-name (node-name node)) 50 | document-type)))))) 51 | (unless (and parent 52 | (equal (node-namespace node) (dom:namespace-uri parent))) 53 | (dom:set-attribute-ns element (html5-constants:find-namespace "xmlns") 54 | "xmlns" (node-namespace node))) 55 | (element-map-attributes (lambda (name namespace value) 56 | (when (and (not xlink-defined) 57 | (equal namespace (html5-constants:find-namespace "xlink"))) 58 | (dom:set-attribute element "xmlns:xlink" (html5-constants:find-namespace "xlink")) 59 | (setf xlink-defined t)) 60 | (if namespace 61 | (dom:set-attribute-ns element namespace name value) 62 | (dom:set-attribute element (xml-escape-name name) value))) 63 | node) 64 | (element-map-children (lambda (c) (walk c element xlink-defined)) node) 65 | (dom:append-child (or parent document) element))) 66 | (:text 67 | (dom:append-child (or parent document) 68 | (dom:create-text-node document (node-value node)))) 69 | (:comment 70 | (dom:append-child (or parent document) 71 | (dom:create-comment document (node-value node))))))) 72 | (walk node)) 73 | (or document-fragment document))) 74 | -------------------------------------------------------------------------------- /html5-parser-class.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (in-package :html5-parser) 22 | 23 | (defvar *parser*) 24 | 25 | (defclass html-parser () 26 | ((html-namespace :initform (find-namespace "html")) 27 | (strict :initarg :strict) 28 | (inner-html-mode) 29 | (container :initform "div") 30 | (tokenizer) 31 | (document :initform (make-document)) 32 | (errors :initform '()) 33 | (phase :accessor parser-phase) 34 | first-start-tag 35 | compat-mode 36 | inner-html 37 | last-phase 38 | original-phase 39 | before-rcdata-phase 40 | (character-tokens :initform nil) 41 | frameset-ok 42 | open-elements 43 | active-formatting-elements 44 | head-pointer 45 | form-pointer 46 | insert-from-table 47 | (in-body-process-space-characters-mode :initform :non-pre))) 48 | -------------------------------------------------------------------------------- /packages.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (defpackage :html5-constants 22 | (:use 23 | :common-lisp) 24 | (:export 25 | #:+namespaces+ 26 | #:find-namespace 27 | #:find-prefix 28 | #:+scoping-elements+ 29 | #:+formatting-elements+ 30 | #:+special-elements+ 31 | #:+html-integration-point-elements+ 32 | #:+mathml-text-integration-point-elements+ 33 | #:+eof+ 34 | #:+token-types+ 35 | #:+tag-token-types+ 36 | #:+space-characters+ 37 | #:+table-insert-mode-elements+ 38 | #:+ascii-lowercase+ 39 | #:+ascii-uppercase+ 40 | #:+ascii-letters+ 41 | #:ascii-letter-p 42 | #:+digits+ 43 | #:+hex-digits+ 44 | #:ascii-upper-2-lower 45 | #:+replacement-characters+ 46 | #:+cdata-elements+ 47 | #:+rcdata-elements+ 48 | #:+html-integration-point-elements+ 49 | #:+mathml-text-integration-point-elements+ 50 | #:+quirks-mode-doctypes-regexp+ 51 | #:ascii-upper-2-lower 52 | #:+replacement-characters+ 53 | #:+heading-elements+)) 54 | 55 | (defpackage :html5-parser 56 | (:use 57 | :common-lisp 58 | :html5-constants 59 | :cl-ppcre) 60 | (:export 61 | #:parse-html5 62 | #:parse-html5-fragment 63 | #:transform-html5-dom 64 | 65 | #:xml-escape-name 66 | #:xml-unescape-name 67 | 68 | ;; A simple DOM 69 | #:make-document 70 | #:make-fragment 71 | #:make-doctype 72 | #:make-comment 73 | #:make-element 74 | #:make-text-node 75 | 76 | #:node-type 77 | #:node-name 78 | #:node-namespace 79 | #:node-value 80 | #:node-public-id 81 | #:node-system-id 82 | #:element-attribute 83 | 84 | #:node-append-child 85 | #:node-insert-before 86 | #:node-remove-child 87 | 88 | #:node-parent 89 | #:node-first-child 90 | #:node-last-child 91 | #:node-previous-sibling 92 | #:node-next-sibling 93 | #:element-map-attributes 94 | #:element-map-attributes* 95 | #:element-map-children)) 96 | -------------------------------------------------------------------------------- /simple-tree.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (in-package :html5-parser) 22 | 23 | ;; A basic implementation of a DOM-core like thing 24 | 25 | (defclass node () 26 | ((type :initform :node :allocation :class :reader node-type) 27 | (name :initarg :name :initform nil :reader node-name) 28 | (namespace :initarg :namespace :initform nil :reader node-namespace) 29 | (parent :initform nil :reader node-parent) 30 | (value :initform nil :initarg :value 31 | :accessor node-value) 32 | (child-nodes :initform nil :accessor %node-child-nodes) 33 | (last-child :initform nil :accessor last-child))) 34 | 35 | (defmethod (setf %node-child-nodes) :after (value (node node)) 36 | (setf (last-child node) (last value))) 37 | 38 | (defclass document (node) 39 | ((type :initform :document :allocation :class))) 40 | 41 | (defclass document-fragment (document) 42 | ((type :initform :document-fragment :allocation :class))) 43 | 44 | (defclass document-type (node) 45 | ((type :initform :document-type :allocation :class) 46 | (public-id :initarg :public-id :reader node-public-id) 47 | (system-id :initarg :system-id :reader node-system-id))) 48 | 49 | (defclass text-node (node) 50 | ((type :initform :text :allocation :class))) 51 | 52 | (defclass element (node) 53 | ((type :initform :element :allocation :class) 54 | (attributes :initform nil :accessor %node-attributes))) 55 | 56 | (defclass comment-node (node) 57 | ((type :initform :comment :allocation :class))) 58 | 59 | ;;; 60 | ;;; Creating nodes 61 | ;;; 62 | 63 | (defun make-document () 64 | (make-instance 'document)) 65 | 66 | (defun make-fragment (document) 67 | (declare (ignore document)) 68 | (make-instance 'document-fragment)) 69 | 70 | (defun make-doctype (document name public-id system-id) 71 | (declare (ignore document)) 72 | (make-instance 'document-type :name name :public-id public-id :system-id system-id)) 73 | 74 | (defun make-comment (document data) 75 | (declare (ignore document)) 76 | (make-instance 'comment-node :value data)) 77 | 78 | (defun make-element (document name namespace) 79 | (declare (ignore document)) 80 | (make-instance 'element :name name :namespace namespace)) 81 | 82 | (defun make-text-node (document data) 83 | (declare (ignore document)) 84 | (make-instance 'text-node :value data)) 85 | 86 | ;;; 87 | ;;; Node methods 88 | ;;; 89 | 90 | (defun node-first-child (node) 91 | (car (%node-child-nodes node))) 92 | 93 | (defun node-last-child (node) 94 | (car (last-child node))) 95 | 96 | (defun node-previous-sibling (node) 97 | (loop for (this next) on (%node-child-nodes (node-parent node)) 98 | when (eql next node) do (return this))) 99 | 100 | (defun node-next-sibling (node) 101 | (loop for (this next) on (%node-child-nodes (node-parent node)) 102 | when (eql this node) do (return next))) 103 | 104 | (defun node-append-child (node child) 105 | (when (node-parent child) 106 | (node-remove-child (node-parent child) child)) 107 | (setf (slot-value child 'parent) node) 108 | (if (%node-child-nodes node) 109 | (setf (last-child node) 110 | (push child (cdr (last-child node)))) 111 | (setf (%node-child-nodes node) 112 | (list child))) 113 | (%node-child-nodes node)) 114 | 115 | (defun node-remove-child (node child) 116 | (setf (%node-child-nodes node) 117 | (remove child (%node-child-nodes node))) 118 | (setf (slot-value child 'parent) nil)) 119 | 120 | (defun node-insert-before (node child insert-before) 121 | (let ((child-nodes (%node-child-nodes node))) 122 | (setf (slot-value child 'parent) node) 123 | (labels ((insert-before (child-nodes) 124 | (cond ((endp child-nodes) 125 | (cons child nil)) 126 | ((eql (car child-nodes) insert-before) 127 | (cons child child-nodes)) 128 | (t (rplacd child-nodes (insert-before (cdr child-nodes))))))) 129 | (setf (%node-child-nodes node) 130 | (insert-before child-nodes))))) 131 | 132 | (defun element-attribute (node attribute &optional namespace) 133 | (cdr (assoc (cons attribute namespace) 134 | (%node-attributes node) 135 | :test #'equal))) 136 | 137 | (defun (setf element-attribute) (new-value node attribute 138 | &optional namespace) 139 | (check-type attribute string) 140 | (check-type new-value string) 141 | (let ((old-attr (assoc (cons attribute namespace) 142 | (%node-attributes node) 143 | :test #'equal))) 144 | (if old-attr 145 | (setf (cdr old-attr) new-value) 146 | (push (cons (cons attribute namespace) new-value) (%node-attributes node))))) 147 | 148 | ;;; 149 | ;;; Traversing 150 | ;;; 151 | 152 | (defun element-map-children (function node) 153 | (map nil function (%node-child-nodes node))) 154 | 155 | (defun element-map-attributes* (function node) 156 | (loop for ((name . namespace) . value) in (%node-attributes node) 157 | do (funcall function name namespace value))) 158 | 159 | (defun element-map-attributes (function node) 160 | (element-map-attributes* 161 | (lambda (name namespace value) 162 | (funcall function 163 | (if namespace 164 | (format nil "~A:~A" (html5-constants:find-prefix namespace) name) 165 | name) 166 | namespace 167 | value)) 168 | node)) 169 | 170 | ;; 171 | ;; Printing for the ease of debugging 172 | ;; 173 | 174 | (defun node-count (tree) 175 | (typecase tree 176 | (element (1+ (apply #'+ (mapcar #'node-count (%node-child-nodes tree))))) 177 | ((or document document-fragment) 178 | (apply #'+ (mapcar #'node-count (%node-child-nodes tree)))) 179 | (t 1))) 180 | 181 | (defmethod print-object ((node document) stream) 182 | (print-unreadable-object (node stream :type t :identity t) 183 | (format stream "nodes: ~A" (node-count node)))) 184 | 185 | (defmethod print-object ((node node) stream) 186 | (print-unreadable-object (node stream :type t :identity t) 187 | (format stream "~A" (node-name node)))) 188 | 189 | (defmethod print-object ((node text-node) stream) 190 | (print-unreadable-object (node stream :type t :identity t) 191 | (write (node-value node) :stream stream :length 30))) 192 | -------------------------------------------------------------------------------- /tests/cl-html5-parser-tests.asd: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2017 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (defsystem #:cl-html5-parser-tests 22 | :depends-on (:cl-html5-parser :stefil :json-streams :split-sequence) 23 | :components ((:file "packages") 24 | (:file "support") 25 | (:file "test-inputstream") 26 | (:file "test-tokenizer") 27 | (:file "test-tree-builder") 28 | (:file "test-parser") 29 | (:file "run-tests"))) 30 | -------------------------------------------------------------------------------- /tests/packages.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (defpackage :html5-parser-tests 22 | (:use 23 | :common-lisp 24 | :html5-parser 25 | :cl-ppcre 26 | :stefil)) 27 | -------------------------------------------------------------------------------- /tests/run-tests.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (in-package :html5-parser-tests) 22 | 23 | (defun run-html5-parser-tests () 24 | (values (input-stream-tests) 25 | (test-tokenizer) 26 | (tree-builder-tests) 27 | (test-parser))) 28 | -------------------------------------------------------------------------------- /tests/support.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2017 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (in-package :html5-parser-tests) 22 | 23 | (defun html5lib-test-files (subdirectory &key (type "dat")) 24 | (directory (merge-pathnames (make-pathname :directory `(:relative ,subdirectory) 25 | :name :wild 26 | :type type) 27 | (asdf:system-relative-pathname :cl-html5-parser-tests "testdata/")))) 28 | 29 | (defun parse-test-part (in) 30 | (let ((line (read-line in nil))) 31 | (when line 32 | (assert (char= #\# (char line 0))) 33 | (let ((name (intern (string-upcase (subseq line 1)) :keyword)) 34 | (value (with-output-to-string (out) 35 | (loop for next-char = (peek-char nil in nil) 36 | while (and next-char (char/= #\# next-char)) 37 | do (write-line (read-line in) out))))) 38 | (list name (subseq value 0 (max 0 (1- (length value))))))))) 39 | 40 | 41 | (defun parse-one-test (in) 42 | (loop for part = (parse-test-part in) 43 | while part 44 | append part 45 | until (eql (car part) :document))) 46 | 47 | (defun parse-test-data (filename) 48 | (with-open-file (in filename) 49 | (loop for test = (parse-one-test in) 50 | while test 51 | collect test))) 52 | 53 | -------------------------------------------------------------------------------- /tests/test-inputstream.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (in-package :html5-parser-tests) 22 | 23 | (in-root-suite) 24 | (defsuite input-stream-tests) 25 | (in-suite input-stream-tests) 26 | 27 | (deftest test-read-char () 28 | (let ((stream (html5-parser::make-html-input-stream "hello"))) 29 | (is (eql #\h (html5-parser::html5-stream-char stream))) 30 | (is (eql #\e (html5-parser::html5-stream-char stream))))) 31 | 32 | (deftest test-unget () 33 | (let ((stream (html5-parser::make-html-input-stream "hei"))) 34 | (is (eql #\h (html5-parser::html5-stream-char stream))) 35 | (is (eql #\e (html5-parser::html5-stream-char stream))) 36 | (is (eql #\i (html5-parser::html5-stream-char stream))) 37 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream))) 38 | (html5-parser::html5-stream-unget stream html5-constants::+eof+) 39 | (html5-parser::html5-stream-unget stream #\i) 40 | (is (eql #\i (html5-parser::html5-stream-char stream))) 41 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream))))) 42 | 43 | (deftest test-chars-until () 44 | (let ((stream (html5-parser::make-html-input-stream "hello<--__-->a"))) 45 | (is (equal "hello" (html5-parser::html5-stream-chars-until stream "><"))) 46 | (is (eql #\< (html5-parser::html5-stream-char stream))) 47 | (is (equal "--__-->" (html5-parser::html5-stream-chars-until stream "<>-_" t))) 48 | (is (eql #\a (html5-parser::html5-stream-char stream))))) 49 | 50 | (deftest test-chars-until-eof () 51 | (let ((stream (html5-parser::make-html-input-stream "hello"))) 52 | (is (equal "hello" (html5-parser::html5-stream-chars-until stream "?"))) 53 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream))))) 54 | 55 | (deftest test-line-ending-fix () 56 | (let ((stream (html5-parser::make-html-input-stream (coerce #(#\a #\Newline 57 | #\b #\Return 58 | #\c #\Return #\Newline 59 | #\d) 60 | 'string)))) 61 | (is (eql #\a (html5-parser::html5-stream-char stream))) 62 | (is (eql #\Newline (html5-parser::html5-stream-char stream))) 63 | (is (eql #\b (html5-parser::html5-stream-char stream))) 64 | (is (eql #\Newline (html5-parser::html5-stream-char stream))) 65 | (is (eql #\c (html5-parser::html5-stream-char stream))) 66 | (is (eql #\Newline (html5-parser::html5-stream-char stream))) 67 | (is (eql #\d (html5-parser::html5-stream-char stream))) 68 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream))))) 69 | 70 | (deftest test-line-ending-fix2 () 71 | (let ((stream (html5-parser::make-html-input-stream (coerce #(#\< #\? #\Return) 72 | 'string)))) 73 | (is (eql #\< (html5-parser::html5-stream-char stream))) 74 | (is (eql #\? (html5-parser::html5-stream-char stream))) 75 | (is (eql #\Newline (html5-parser::html5-stream-char stream))) 76 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream))))) 77 | 78 | 79 | (deftest test-bom () 80 | (let ((stream (html5-parser::make-html-input-stream #(#xef #xbb #xbf 39)))) 81 | (is (eql (car (html5-parser::html5-stream-encoding stream)) 82 | :utf-8)) 83 | (is (eql (html5-parser::html5-stream-char stream) 84 | #\')))) 85 | 86 | -------------------------------------------------------------------------------- /tests/test-parser.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (in-package :html5-parser-tests) 22 | 23 | ;; Printing for tests 24 | 25 | (defun print-node (node stream) 26 | (ecase (node-type node) 27 | (:document-type 28 | (format stream "")) 35 | (:comment 36 | (format stream "" (node-value node))) 37 | (:element 38 | (if (and (node-namespace node) 39 | (string/= (node-namespace node) 40 | (html5-constants::find-namespace "html"))) 41 | (format stream "<~A ~A>" 42 | (html5-constants::find-prefix (node-namespace node)) 43 | (node-name node)) 44 | (format stream "<~A>" (node-name node)))) 45 | (:text 46 | (format stream "\"~A\"" (node-value node))))) 47 | 48 | (defun print-tree (node &key (stream *standard-output*) (indent 0)) 49 | (ecase (node-type node) 50 | ((:document :document-fragment) 51 | (element-map-children (lambda (child) 52 | (print-tree child 53 | :stream stream 54 | :indent (+ indent 2))) 55 | node)) 56 | (:element 57 | (format stream "~&|~vT" indent) 58 | (print-node node stream) 59 | (incf indent 2) 60 | (let ((attributes)) 61 | (element-map-attributes* (lambda (name namespace value) 62 | (push (cons (cons name namespace) value) attributes)) 63 | node) 64 | (when attributes 65 | (loop for (name . value) in (sort attributes #'string< 66 | :key (lambda (attr) 67 | (if (consp (car attr)) 68 | (caar attr) 69 | (car attr)))) 70 | do 71 | (format stream "~&|~vT" indent) 72 | (if (cdr name) 73 | (format stream "~A ~A" (html5-constants:find-prefix (cdr name)) (car name)) 74 | (format stream "~A" (car name))) 75 | (format stream "=\"~A\"" value))) 76 | (element-map-children (lambda (child) 77 | (print-tree child 78 | :stream stream 79 | :indent indent)) 80 | node))) 81 | ((:text :comment :document-type) 82 | (format stream "~&|~vT" indent) 83 | (print-node node stream))) 84 | node) 85 | 86 | 87 | (defparameter *parser-tests-to-skip* 88 | ()) 89 | 90 | (defun do-parser-test (&key test-name data errors document document-fragment) 91 | (with-simple-restart (skip "Skip test ~A ~A" 92 | test-name 93 | data) 94 | (format t "~&Test ~A: ~A~%" test-name data) 95 | (setf document (string-right-trim '(#\Newline) document)) 96 | (when (member data *parser-tests-to-skip* :test #'string=) 97 | (format t " skipped") 98 | (return-from do-parser-test)) 99 | (multiple-value-bind (result-document got-errors) 100 | (if document-fragment 101 | (parse-html5-fragment data :container document-fragment) 102 | (parse-html5 data)) 103 | (let ((result (with-output-to-string (out) 104 | (print-tree result-document :stream out)))) 105 | (unless (string= document result) 106 | (error "Input:~%~A~%Got:~%~A~%Expected:~%~A" data result document)) 107 | (setf errors (split-sequence:split-sequence #\Newline errors 108 | :remove-empty-subseqs t)) 109 | (when (and errors 110 | (/= (length errors) (length got-errors))) 111 | (warn "Errors mismatch~&Input:~%~A~%Got:~%~{~&~A~}~%Expected:~%~{~&~A~}" 112 | data got-errors errors))) 113 | result-document))) 114 | 115 | 116 | (defun test-parser () 117 | (let ((files (html5lib-test-files "tree-construction"))) 118 | (dolist (file files) 119 | (let ((test-name (pathname-name file)) 120 | (tests (parse-test-data file))) 121 | (dolist (test tests) 122 | (apply #'do-parser-test :test-name test-name test)))))) 123 | 124 | 125 | (in-root-suite) 126 | (defsuite parser-tests) 127 | (in-suite parser-tests) 128 | 129 | (deftest test-parse-content-attr () 130 | (is (eql nil (html5-parser::parse-content-attr "garble"))) 131 | (is (eql nil (html5-parser::parse-content-attr "charset"))) 132 | (is (string= "utf-8" (html5-parser::parse-content-attr "charset=utf-8"))) 133 | (is (string= "utf-8" (html5-parser::parse-content-attr "charset = utf-8"))) 134 | (is (string= "utf-8" (html5-parser::parse-content-attr " charset = utf-8 "))) 135 | (is (string= " utf-8 " (html5-parser::parse-content-attr " charset =' utf-8 '"))) 136 | (is (eql nil (html5-parser::parse-content-attr " charset =\"utf-8 '"))) 137 | (is (string= "utf-8" (html5-parser::parse-content-attr " charset =\"utf-8\""))) 138 | (is (string= "utf-8" (html5-parser::parse-content-attr " charset =\"utf-8\" ")))) 139 | -------------------------------------------------------------------------------- /tests/test-tree-builder.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see . 20 | 21 | (in-package :html5-parser-tests) 22 | 23 | (in-root-suite) 24 | (defsuite tree-builder-tests) 25 | (in-suite tree-builder-tests) 26 | 27 | (deftest test-make-document () 28 | (is (eq :document (node-type (make-document))))) 29 | 30 | (deftest test-append-child () 31 | (let* ((doc (make-document)) 32 | (child (make-element doc "test" nil))) 33 | (node-append-child doc child) 34 | (element-map-children (lambda (kid) 35 | (is (eq kid child))) 36 | doc))) 37 | 38 | (deftest test-reappend-child () 39 | (let* ((doc (make-document)) 40 | (parent1 (make-element doc "parent1" nil)) 41 | (parent2 (make-element doc "parent2" nil)) 42 | (child (make-element doc "child" nil))) 43 | (node-append-child parent1 child) 44 | (is (eq parent1 (node-parent child))) 45 | (node-append-child parent2 child) 46 | (is (eq parent2 (node-parent child))) 47 | (element-map-children (lambda (kid) 48 | (error "parent1 should not have children now ~S" kid)) 49 | parent1))) 50 | 51 | (deftest test-navigate () 52 | (let* ((doc (make-document)) 53 | (parent (make-element doc "parent" nil)) 54 | (child1 (make-element doc "child1" nil)) 55 | (child2 (make-element doc "child2" nil)) 56 | (child3 (make-element doc "child3" nil)) 57 | (child4 (make-element doc "child4" nil))) 58 | (node-append-child parent child1) 59 | (node-append-child parent child2) 60 | (node-append-child parent child3) 61 | (node-append-child parent child4) 62 | (is (eq child1 (node-first-child parent))) 63 | (is (eq child4 (node-last-child parent))) 64 | (is (eq child2 (node-next-sibling child1))) 65 | (is (eq nil (node-next-sibling child4))) 66 | (is (eq child1 (node-previous-sibling child2))) 67 | (is (eq nil (node-previous-sibling child1))))) 68 | 69 | (deftest test-remove-child () 70 | (let* ((doc (make-document)) 71 | (parent (make-element doc "parent" nil)) 72 | (child1 (make-element doc "child1" nil)) 73 | (child2 (make-element doc "child2" nil)) 74 | (child3 (make-element doc "child3" nil)) 75 | (child4 (make-element doc "child4" nil))) 76 | (node-append-child parent child1) 77 | (node-append-child parent child2) 78 | (node-append-child parent child3) 79 | (node-append-child parent child4) 80 | 81 | (node-remove-child parent child2) 82 | (is (eq child3 (node-next-sibling child1))))) 83 | 84 | (deftest test-set-attribute () 85 | (let* ((doc (make-document)) 86 | (element (make-element doc "test" nil))) 87 | (setf (element-attribute element "hello") "world") 88 | (is (string= (element-attribute element "hello") "world")))) 89 | 90 | (deftest test-append-text () 91 | (let* ((doc (make-document)) 92 | (parent (make-element doc "parent" nil))) 93 | (html5-parser::node-append-child* parent (make-text-node doc "hello")) 94 | (html5-parser::node-append-child* parent (make-text-node doc "world")) 95 | (is (string= "helloworld" (node-value (node-first-child parent)))))) 96 | 97 | ;; (deftest test-node-clone () 98 | ;; (let* ((tree (make-tree)) 99 | ;; (parent (tree-make-element tree "parent" nil)) 100 | ;; (element (tree-make-element tree "test" nil))) 101 | ;; (node-append-child tree parent element) 102 | ;; (setf (node-attribute tree element "hello") "world") 103 | ;; (let ((clone (node-clone tree element))) 104 | ;; (is (null (node-parent tree clone))) 105 | ;; (is (string= (node-attribute tree clone "hello") "world"))))) 106 | -------------------------------------------------------------------------------- /tests/testdata/encoding/test-yahoo-jp.dat: -------------------------------------------------------------------------------- 1 | #data 2 | 3 | 4 | 5 | 6 | Yahoo! JAPAN 7 | 8 | "] 49 | } 50 | 51 | ]} -------------------------------------------------------------------------------- /tests/testdata/tokenizer/aa-lisp-tests.test: -------------------------------------------------------------------------------- 1 | {"tests": [ 2 | 3 | {"description":"Comment in script", 4 | "initialStates":["SCRIPT DATA state"], 5 | "lastStartTag":"plaintext", 6 | "input":"", 81 | "output":["ParseError", ["Comment", "--!\\uFFFD"]] 82 | }, 83 | { 84 | "description":"space EOF after doctype ", 85 | "input":"-->", 7 | "output":[["Character", "foo"], ["EndTag", "xmp"]]}, 8 | 9 | {"description":"Bogus comment in RCDATA or RAWTEXT", 10 | "initialStates":["RCDATA state", "RAWTEXT state"], 11 | "lastStartTag":"xmp", 12 | "input":"foobaz", 13 | "output":[["Character", "foobaz"], ["EndTag", "xmp"]]}, 14 | 15 | {"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT", 16 | "initialStates":["RCDATA state", "RAWTEXT state"], 17 | "lastStartTag":"xmp", 18 | "input":"foobaz", 19 | "output":[["Character", "foo"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]}, 20 | 21 | {"description":"Commented entities in RCDATA", 22 | "initialStates":["RCDATA state"], 23 | "lastStartTag":"xmp", 24 | "input":" & & ", 25 | "output":[["Character", " & & "], ["EndTag", "xmp"]]}, 26 | 27 | {"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT", 28 | "initialStates":["RCDATA state", "RAWTEXT state"], 29 | "lastStartTag":"xmp", 30 | "input":"foox--<>", 31 | "output":[["Character", "foox--<>"], ["EndTag", "xmp"]]} 32 | 33 | ]} 34 | -------------------------------------------------------------------------------- /tests/testdata/tokenizer/pendingSpecChanges.test: -------------------------------------------------------------------------------- 1 | {"tests": [ 2 | 3 | {"description":"", 73 | "output":[["Comment", "comment"]]}, 74 | 75 | {"description":"Comment, Central dash no space", 76 | "input":"", 77 | "output":["ParseError", ["Comment", "-"]]}, 78 | 79 | {"description":"Comment, two central dashes", 80 | "input":"", 81 | "output":["ParseError", ["Comment", " --comment "]]}, 82 | 83 | {"description":"Unfinished comment", 84 | "input":"", 93 | "output":["ParseError", ["Comment", ""]]}, 94 | 95 | {"description":"Short comment two", 96 | "input":"", 97 | "output":["ParseError", ["Comment", ""]]}, 98 | 99 | {"description":"Short comment three", 100 | "input":"", 101 | "output":[["Comment", ""]]}, 102 | 103 | 104 | {"description":"Ampersand EOF", 105 | "input":"&", 106 | "output":[["Character", "&"]]}, 107 | 108 | {"description":"Ampersand ampersand EOF", 109 | "input":"&&", 110 | "output":[["Character", "&&"]]}, 111 | 112 | {"description":"Ampersand space EOF", 113 | "input":"& ", 114 | "output":[["Character", "& "]]}, 115 | 116 | {"description":"Unfinished entity", 117 | "input":"&f", 118 | "output":["ParseError", ["Character", "&f"]]}, 119 | 120 | {"description":"Ampersand, number sign", 121 | "input":"&#", 122 | "output":["ParseError", ["Character", "&#"]]}, 123 | 124 | {"description":"Unfinished numeric entity", 125 | "input":"&#x", 126 | "output":["ParseError", ["Character", "&#x"]]}, 127 | 128 | {"description":"Entity with trailing semicolon (1)", 129 | "input":"I'm ¬it", 130 | "output":[["Character","I'm \u00ACit"]]}, 131 | 132 | {"description":"Entity with trailing semicolon (2)", 133 | "input":"I'm ∉", 134 | "output":[["Character","I'm \u2209"]]}, 135 | 136 | {"description":"Entity without trailing semicolon (1)", 137 | "input":"I'm ¬it", 138 | "output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]}, 139 | 140 | {"description":"Entity without trailing semicolon (2)", 141 | "input":"I'm ¬in", 142 | "output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]}, 143 | 144 | {"description":"Partial entity match at end of file", 145 | "input":"I'm &no", 146 | "output":[["Character","I'm "], "ParseError", ["Character", "&no"]]}, 147 | 148 | {"description":"Non-ASCII character reference name", 149 | "input":"&\u00AC;", 150 | "output":["ParseError", ["Character", "&\u00AC;"]]}, 151 | 152 | {"description":"ASCII decimal entity", 153 | "input":"$", 154 | "output":[["Character","$"]]}, 155 | 156 | {"description":"ASCII hexadecimal entity", 157 | "input":"?", 158 | "output":[["Character","?"]]}, 159 | 160 | {"description":"Hexadecimal entity in attribute", 161 | "input":"", 162 | "output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}, 163 | 164 | {"description":"Entity in attribute without semicolon ending in x", 165 | "input":"", 166 | "output":["ParseError", ["StartTag", "h", {"a":"¬x"}]]}, 167 | 168 | {"description":"Entity in attribute without semicolon ending in 1", 169 | "input":"", 170 | "output":["ParseError", ["StartTag", "h", {"a":"¬1"}]]}, 171 | 172 | {"description":"Entity in attribute without semicolon ending in i", 173 | "input":"", 174 | "output":["ParseError", ["StartTag", "h", {"a":"¬i"}]]}, 175 | 176 | {"description":"Entity in attribute without semicolon", 177 | "input":"", 178 | "output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]}, 179 | 180 | {"description":"Unquoted attribute ending in ampersand", 181 | "input":"", 182 | "output":[["StartTag","s",{"o":"&","t":""}]]}, 183 | 184 | {"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters", 185 | "input":"foo", 186 | "output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]}, 187 | 188 | {"description":"plaintext element", 189 | "input":"foobar", 190 | "output":[["StartTag","plaintext",{}], ["Character","foobar"]]}, 191 | 192 | {"description":"Open angled bracket in unquoted attribute value state", 193 | "input":"<a a=f<>", 194 | "output":["ParseError", ["StartTag", "a", {"a":"f<"}]]} 195 | 196 | ]} 197 | -------------------------------------------------------------------------------- /tests/testdata/tokenizer/test2.test: -------------------------------------------------------------------------------- 1 | {"tests": [ 2 | 3 | {"description":"DOCTYPE without name", 4 | "input":"<!DOCTYPE>", 5 | "output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]}, 6 | 7 | {"description":"DOCTYPE without space before name", 8 | "input":"<!DOCTYPEhtml>", 9 | "output":["ParseError", ["DOCTYPE", "html", null, null, true]]}, 10 | 11 | {"description":"Incorrect DOCTYPE without a space before name", 12 | "input":"<!DOCTYPEfoo>", 13 | "output":["ParseError", ["DOCTYPE", "foo", null, null, true]]}, 14 | 15 | {"description":"DOCTYPE with publicId", 16 | "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">", 17 | "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]}, 18 | 19 | {"description":"DOCTYPE with EOF after PUBLIC", 20 | "input":"<!DOCTYPE html PUBLIC", 21 | "output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, 22 | 23 | {"description":"DOCTYPE with EOF after PUBLIC '", 24 | "input":"<!DOCTYPE html PUBLIC '", 25 | "output":["ParseError", ["DOCTYPE", "html", "", null, false]]}, 26 | 27 | {"description":"DOCTYPE with EOF after PUBLIC 'x", 28 | "input":"<!DOCTYPE html PUBLIC 'x", 29 | "output":["ParseError", ["DOCTYPE", "html", "x", null, false]]}, 30 | 31 | {"description":"DOCTYPE with systemId", 32 | "input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">", 33 | "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, 34 | 35 | {"description":"DOCTYPE with publicId and systemId", 36 | "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">", 37 | "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, 38 | 39 | {"description":"DOCTYPE with > in double-quoted publicId", 40 | "input":"<!DOCTYPE html PUBLIC \">x", 41 | "output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, 42 | 43 | {"description":"DOCTYPE with > in single-quoted publicId", 44 | "input":"<!DOCTYPE html PUBLIC '>x", 45 | "output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, 46 | 47 | {"description":"DOCTYPE with > in double-quoted systemId", 48 | "input":"<!DOCTYPE html PUBLIC \"foo\" \">x", 49 | "output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, 50 | 51 | {"description":"DOCTYPE with > in single-quoted systemId", 52 | "input":"<!DOCTYPE html PUBLIC 'foo' '>x", 53 | "output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, 54 | 55 | {"description":"Incomplete doctype", 56 | "input":"<!DOCTYPE html ", 57 | "output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, 58 | 59 | {"description":"Numeric entity representing the NUL character", 60 | "input":"&#0000;", 61 | "output":["ParseError", ["Character", "\uFFFD"]]}, 62 | 63 | {"description":"Hexadecimal entity representing the NUL character", 64 | "input":"&#x0000;", 65 | "output":["ParseError", ["Character", "\uFFFD"]]}, 66 | 67 | {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)", 68 | "input":"&#2225222;", 69 | "output":["ParseError", ["Character", "\uFFFD"]]}, 70 | 71 | {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)", 72 | "input":"&#x1010FFFF;", 73 | "output":["ParseError", ["Character", "\uFFFD"]]}, 74 | 75 | {"description":"Hexadecimal entity pair representing a surrogate pair", 76 | "input":"&#xD869;&#xDED6;", 77 | "output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]}, 78 | 79 | {"description":"Hexadecimal entity with mixed uppercase and lowercase", 80 | "input":"&#xaBcD;", 81 | "output":[["Character", "\uABCD"]]}, 82 | 83 | {"description":"Entity without a name", 84 | "input":"&;", 85 | "output":["ParseError", ["Character", "&;"]]}, 86 | 87 | {"description":"Unescaped ampersand in attribute value", 88 | "input":"<h a='&'>", 89 | "output":[["StartTag", "h", { "a":"&" }]]}, 90 | 91 | {"description":"StartTag containing <", 92 | "input":"<a<b>", 93 | "output":[["StartTag", "a<b", { }]]}, 94 | 95 | {"description":"Non-void element containing trailing /", 96 | "input":"<h/>", 97 | "output":[["StartTag","h",{},true]]}, 98 | 99 | {"description":"Void element with permitted slash", 100 | "input":"<br/>", 101 | "output":[["StartTag","br",{},true]]}, 102 | 103 | {"description":"Void element with permitted slash (with attribute)", 104 | "input":"<br foo='bar'/>", 105 | "output":[["StartTag","br",{"foo":"bar"},true]]}, 106 | 107 | {"description":"StartTag containing /", 108 | "input":"<h/a='b'>", 109 | "output":["ParseError", ["StartTag", "h", { "a":"b" }]]}, 110 | 111 | {"description":"Double-quoted attribute value", 112 | "input":"<h a=\"b\">", 113 | "output":[["StartTag", "h", { "a":"b" }]]}, 114 | 115 | {"description":"Unescaped </", 116 | "input":"</", 117 | "output":["ParseError", ["Character", "</"]]}, 118 | 119 | {"description":"Illegal end tag name", 120 | "input":"</1>", 121 | "output":["ParseError", ["Comment", "1"]]}, 122 | 123 | {"description":"Simili processing instruction", 124 | "input":"<?namespace>", 125 | "output":["ParseError", ["Comment", "?namespace"]]}, 126 | 127 | {"description":"A bogus comment stops at >, even if preceeded by two dashes", 128 | "input":"<?foo-->", 129 | "output":["ParseError", ["Comment", "?foo--"]]}, 130 | 131 | {"description":"Unescaped <", 132 | "input":"foo < bar", 133 | "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]}, 134 | 135 | {"description":"Null Byte Replacement", 136 | "input":"\u0000", 137 | "output":["ParseError", ["Character", "\u0000"]]}, 138 | 139 | {"description":"Comment with dash", 140 | "input":"<!---x", 141 | "output":["ParseError", ["Comment", "-x"]]}, 142 | 143 | {"description":"Entity + newline", 144 | "input":"\nx\n&gt;\n", 145 | "output":[["Character","\nx\n>\n"]]}, 146 | 147 | {"description":"Start tag with no attributes but space before the greater-than sign", 148 | "input":"<h >", 149 | "output":[["StartTag", "h", {}]]}, 150 | 151 | {"description":"Empty attribute followed by uppercase attribute", 152 | "input":"<h a B=''>", 153 | "output":[["StartTag", "h", {"a":"", "b":""}]]}, 154 | 155 | {"description":"Double-quote after attribute name", 156 | "input":"<h a \">", 157 | "output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]}, 158 | 159 | {"description":"Single-quote after attribute name", 160 | "input":"<h a '>", 161 | "output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]}, 162 | 163 | {"description":"Empty end tag with following characters", 164 | "input":"a</>bc", 165 | "output":[["Character", "a"], "ParseError", ["Character", "bc"]]}, 166 | 167 | {"description":"Empty end tag with following tag", 168 | "input":"a</><b>c", 169 | "output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]}, 170 | 171 | {"description":"Empty end tag with following comment", 172 | "input":"a</><!--b-->c", 173 | "output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]}, 174 | 175 | {"description":"Empty end tag with following end tag", 176 | "input":"a</></b>c", 177 | "output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]} 178 | 179 | ]} 180 | -------------------------------------------------------------------------------- /tests/testdata/tokenizer/unicodeCharsProblematic.test: -------------------------------------------------------------------------------- 1 | {"tests" : [ 2 | {"description": "Invalid Unicode character U+DFFF", 3 | "doubleEscaped":true, 4 | "input": "\\uDFFF", 5 | "output":["ParseError", ["Character", "\\uFFFD"]]}, 6 | 7 | {"description": "Invalid Unicode character U+D800", 8 | "doubleEscaped":true, 9 | "input": "\\uD800", 10 | "output":["ParseError", ["Character", "\\uFFFD"]]}, 11 | 12 | {"description": "Invalid Unicode character U+DFFF with valid preceding character", 13 | "doubleEscaped":true, 14 | "input": "a\\uDFFF", 15 | "output":["ParseError", ["Character", "a\\uFFFD"]]}, 16 | 17 | {"description": "Invalid Unicode character U+D800 with valid following character", 18 | "doubleEscaped":true, 19 | "input": "\\uD800a", 20 | "output":["ParseError", ["Character", "\\uFFFDa"]]}, 21 | 22 | {"description":"CR followed by U+0000", 23 | "input":"\r\u0000", 24 | "output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]], 25 | "ignoreErrorOrder":true} 26 | ] 27 | } -------------------------------------------------------------------------------- /tests/testdata/tokenizer/xmlViolation.test: -------------------------------------------------------------------------------- 1 | {"xmlViolationTests": [ 2 | 3 | {"description":"Non-XML character", 4 | "input":"a\uFFFFb", 5 | "ignoreErrorOrder":true, 6 | "output":["ParseError",["Character","a\uFFFDb"]]}, 7 | 8 | {"description":"Non-XML space", 9 | "input":"a\u000Cb", 10 | "ignoreErrorOrder":true, 11 | "output":[["Character","a b"]]}, 12 | 13 | {"description":"Double hyphen in comment", 14 | "input":"<!-- foo -- bar -->", 15 | "output":["ParseError",["Comment"," foo - - bar "]]}, 16 | 17 | {"description":"FF between attributes", 18 | "input":"<a b=''\u000Cc=''>", 19 | "output":[["StartTag","a",{"b":"","c":""}]]} 20 | ]} 21 | 22 | 23 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/adoption01.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <a><p></a></p> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <a> 9 | | <p> 10 | | <a> 11 | 12 | #data 13 | <a>1<p>2</a>3</p> 14 | #errors 15 | #document 16 | | <html> 17 | | <head> 18 | | <body> 19 | | <a> 20 | | "1" 21 | | <p> 22 | | <a> 23 | | "2" 24 | | "3" 25 | 26 | #data 27 | <a>1<button>2</a>3</button> 28 | #errors 29 | #document 30 | | <html> 31 | | <head> 32 | | <body> 33 | | <a> 34 | | "1" 35 | | <button> 36 | | <a> 37 | | "2" 38 | | "3" 39 | 40 | #data 41 | <a>1<b>2</a>3</b> 42 | #errors 43 | #document 44 | | <html> 45 | | <head> 46 | | <body> 47 | | <a> 48 | | "1" 49 | | <b> 50 | | "2" 51 | | <b> 52 | | "3" 53 | 54 | #data 55 | <a>1<div>2<div>3</a>4</div>5</div> 56 | #errors 57 | #document 58 | | <html> 59 | | <head> 60 | | <body> 61 | | <a> 62 | | "1" 63 | | <div> 64 | | <a> 65 | | "2" 66 | | <div> 67 | | <a> 68 | | "3" 69 | | "4" 70 | | "5" 71 | 72 | #data 73 | <table><a>1<p>2</a>3</p> 74 | #errors 75 | #document 76 | | <html> 77 | | <head> 78 | | <body> 79 | | <a> 80 | | "1" 81 | | <p> 82 | | <a> 83 | | "2" 84 | | "3" 85 | | <table> 86 | 87 | #data 88 | <b><b><a><p></a> 89 | #errors 90 | #document 91 | | <html> 92 | | <head> 93 | | <body> 94 | | <b> 95 | | <b> 96 | | <a> 97 | | <p> 98 | | <a> 99 | 100 | #data 101 | <b><a><b><p></a> 102 | #errors 103 | #document 104 | | <html> 105 | | <head> 106 | | <body> 107 | | <b> 108 | | <a> 109 | | <b> 110 | | <b> 111 | | <p> 112 | | <a> 113 | 114 | #data 115 | <a><b><b><p></a> 116 | #errors 117 | #document 118 | | <html> 119 | | <head> 120 | | <body> 121 | | <a> 122 | | <b> 123 | | <b> 124 | | <b> 125 | | <b> 126 | | <p> 127 | | <a> 128 | 129 | #data 130 | <p>1<s id="A">2<b id="B">3</p>4</s>5</b> 131 | #errors 132 | #document 133 | | <html> 134 | | <head> 135 | | <body> 136 | | <p> 137 | | "1" 138 | | <s> 139 | | id="A" 140 | | "2" 141 | | <b> 142 | | id="B" 143 | | "3" 144 | | <s> 145 | | id="A" 146 | | <b> 147 | | id="B" 148 | | "4" 149 | | <b> 150 | | id="B" 151 | | "5" 152 | 153 | #data 154 | <table><a>1<td>2</td>3</table> 155 | #errors 156 | #document 157 | | <html> 158 | | <head> 159 | | <body> 160 | | <a> 161 | | "1" 162 | | <a> 163 | | "3" 164 | | <table> 165 | | <tbody> 166 | | <tr> 167 | | <td> 168 | | "2" 169 | 170 | #data 171 | <table>A<td>B</td>C</table> 172 | #errors 173 | #document 174 | | <html> 175 | | <head> 176 | | <body> 177 | | "AC" 178 | | <table> 179 | | <tbody> 180 | | <tr> 181 | | <td> 182 | | "B" 183 | 184 | #data 185 | <a><svg><tr><input></a> 186 | #errors 187 | #document 188 | | <html> 189 | | <head> 190 | | <body> 191 | | <a> 192 | | <svg svg> 193 | | <svg tr> 194 | | <svg input> 195 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/adoption02.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <b>1<i>2<p>3</b>4 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <b> 9 | | "1" 10 | | <i> 11 | | "2" 12 | | <i> 13 | | <p> 14 | | <b> 15 | | "3" 16 | | "4" 17 | 18 | #data 19 | <a><div><style></style><address><a> 20 | #errors 21 | #document 22 | | <html> 23 | | <head> 24 | | <body> 25 | | <a> 26 | | <div> 27 | | <a> 28 | | <style> 29 | | <address> 30 | | <a> 31 | | <a> 32 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/button.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <button><p></button><button></button> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <button> 9 | | <p> 10 | | <button> 11 | #data 12 | <button><b><button>hei 13 | #errors 14 | #document 15 | | <html> 16 | | <head> 17 | | <body> 18 | | <button> 19 | | <b> 20 | | <b> 21 | | <button> 22 | | "hei" 23 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/comments01.dat: -------------------------------------------------------------------------------- 1 | #data 2 | FOO<!-- BAR -->BAZ 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | "FOO" 9 | | <!-- BAR --> 10 | | "BAZ" 11 | 12 | #data 13 | FOO<!-- BAR --!>BAZ 14 | #errors 15 | #document 16 | | <html> 17 | | <head> 18 | | <body> 19 | | "FOO" 20 | | <!-- BAR --> 21 | | "BAZ" 22 | 23 | #data 24 | FOO<!-- BAR -- >BAZ 25 | #errors 26 | #document 27 | | <html> 28 | | <head> 29 | | <body> 30 | | "FOO" 31 | | <!-- BAR -- >BAZ --> 32 | 33 | #data 34 | FOO<!-- BAR -- <QUX> -- MUX -->BAZ 35 | #errors 36 | #document 37 | | <html> 38 | | <head> 39 | | <body> 40 | | "FOO" 41 | | <!-- BAR -- <QUX> -- MUX --> 42 | | "BAZ" 43 | 44 | #data 45 | FOO<!-- BAR -- <QUX> -- MUX --!>BAZ 46 | #errors 47 | #document 48 | | <html> 49 | | <head> 50 | | <body> 51 | | "FOO" 52 | | <!-- BAR -- <QUX> -- MUX --> 53 | | "BAZ" 54 | 55 | #data 56 | FOO<!-- BAR -- <QUX> -- MUX -- >BAZ 57 | #errors 58 | #document 59 | | <html> 60 | | <head> 61 | | <body> 62 | | "FOO" 63 | | <!-- BAR -- <QUX> -- MUX -- >BAZ --> 64 | 65 | #data 66 | FOO<!---->BAZ 67 | #errors 68 | #document 69 | | <html> 70 | | <head> 71 | | <body> 72 | | "FOO" 73 | | <!-- --> 74 | | "BAZ" 75 | 76 | #data 77 | FOO<!--->BAZ 78 | #errors 79 | #document 80 | | <html> 81 | | <head> 82 | | <body> 83 | | "FOO" 84 | | <!-- --> 85 | | "BAZ" 86 | 87 | #data 88 | FOO<!-->BAZ 89 | #errors 90 | #document 91 | | <html> 92 | | <head> 93 | | <body> 94 | | "FOO" 95 | | <!-- --> 96 | | "BAZ" 97 | 98 | #data 99 | <?xml version="1.0">Hi 100 | #errors 101 | #document 102 | | <!-- ?xml version="1.0" --> 103 | | <html> 104 | | <head> 105 | | <body> 106 | | "Hi" 107 | 108 | #data 109 | <?xml version="1.0"> 110 | #errors 111 | #document 112 | | <!-- ?xml version="1.0" --> 113 | | <html> 114 | | <head> 115 | | <body> 116 | 117 | #data 118 | <?xml version 119 | #errors 120 | #document 121 | | <!-- ?xml version --> 122 | | <html> 123 | | <head> 124 | | <body> 125 | 126 | #data 127 | FOO<!----->BAZ 128 | #errors 129 | #document 130 | | <html> 131 | | <head> 132 | | <body> 133 | | "FOO" 134 | | <!-- - --> 135 | | "BAZ" 136 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/doctype01.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!DOCTYPE html>Hello 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | "Hello" 10 | 11 | #data 12 | <!dOctYpE HtMl>Hello 13 | #errors 14 | #document 15 | | <!DOCTYPE html> 16 | | <html> 17 | | <head> 18 | | <body> 19 | | "Hello" 20 | 21 | #data 22 | <!DOCTYPEhtml>Hello 23 | #errors 24 | #document 25 | | <!DOCTYPE html> 26 | | <html> 27 | | <head> 28 | | <body> 29 | | "Hello" 30 | 31 | #data 32 | <!DOCTYPE>Hello 33 | #errors 34 | #document 35 | | <!DOCTYPE > 36 | | <html> 37 | | <head> 38 | | <body> 39 | | "Hello" 40 | 41 | #data 42 | <!DOCTYPE >Hello 43 | #errors 44 | #document 45 | | <!DOCTYPE > 46 | | <html> 47 | | <head> 48 | | <body> 49 | | "Hello" 50 | 51 | #data 52 | <!DOCTYPE potato>Hello 53 | #errors 54 | #document 55 | | <!DOCTYPE potato> 56 | | <html> 57 | | <head> 58 | | <body> 59 | | "Hello" 60 | 61 | #data 62 | <!DOCTYPE potato >Hello 63 | #errors 64 | #document 65 | | <!DOCTYPE potato> 66 | | <html> 67 | | <head> 68 | | <body> 69 | | "Hello" 70 | 71 | #data 72 | <!DOCTYPE potato taco>Hello 73 | #errors 74 | #document 75 | | <!DOCTYPE potato> 76 | | <html> 77 | | <head> 78 | | <body> 79 | | "Hello" 80 | 81 | #data 82 | <!DOCTYPE potato taco "ddd>Hello 83 | #errors 84 | #document 85 | | <!DOCTYPE potato> 86 | | <html> 87 | | <head> 88 | | <body> 89 | | "Hello" 90 | 91 | #data 92 | <!DOCTYPE potato sYstEM>Hello 93 | #errors 94 | #document 95 | | <!DOCTYPE potato> 96 | | <html> 97 | | <head> 98 | | <body> 99 | | "Hello" 100 | 101 | #data 102 | <!DOCTYPE potato sYstEM >Hello 103 | #errors 104 | #document 105 | | <!DOCTYPE potato> 106 | | <html> 107 | | <head> 108 | | <body> 109 | | "Hello" 110 | 111 | #data 112 | <!DOCTYPE potato sYstEM ggg>Hello 113 | #errors 114 | #document 115 | | <!DOCTYPE potato> 116 | | <html> 117 | | <head> 118 | | <body> 119 | | "Hello" 120 | 121 | #data 122 | <!DOCTYPE potato SYSTEM taco >Hello 123 | #errors 124 | #document 125 | | <!DOCTYPE potato> 126 | | <html> 127 | | <head> 128 | | <body> 129 | | "Hello" 130 | 131 | #data 132 | <!DOCTYPE potato SYSTEM 'taco"'>Hello 133 | #errors 134 | #document 135 | | <!DOCTYPE potato "" "taco""> 136 | | <html> 137 | | <head> 138 | | <body> 139 | | "Hello" 140 | 141 | #data 142 | <!DOCTYPE potato SYSTEM "taco">Hello 143 | #errors 144 | #document 145 | | <!DOCTYPE potato "" "taco"> 146 | | <html> 147 | | <head> 148 | | <body> 149 | | "Hello" 150 | 151 | #data 152 | <!DOCTYPE potato SYSTEM "tai'co">Hello 153 | #errors 154 | #document 155 | | <!DOCTYPE potato "" "tai'co"> 156 | | <html> 157 | | <head> 158 | | <body> 159 | | "Hello" 160 | 161 | #data 162 | <!DOCTYPE potato SYSTEMtaco "ddd">Hello 163 | #errors 164 | #document 165 | | <!DOCTYPE potato> 166 | | <html> 167 | | <head> 168 | | <body> 169 | | "Hello" 170 | 171 | #data 172 | <!DOCTYPE potato grass SYSTEM taco>Hello 173 | #errors 174 | #document 175 | | <!DOCTYPE potato> 176 | | <html> 177 | | <head> 178 | | <body> 179 | | "Hello" 180 | 181 | #data 182 | <!DOCTYPE potato pUbLIc>Hello 183 | #errors 184 | #document 185 | | <!DOCTYPE potato> 186 | | <html> 187 | | <head> 188 | | <body> 189 | | "Hello" 190 | 191 | #data 192 | <!DOCTYPE potato pUbLIc >Hello 193 | #errors 194 | #document 195 | | <!DOCTYPE potato> 196 | | <html> 197 | | <head> 198 | | <body> 199 | | "Hello" 200 | 201 | #data 202 | <!DOCTYPE potato pUbLIcgoof>Hello 203 | #errors 204 | #document 205 | | <!DOCTYPE potato> 206 | | <html> 207 | | <head> 208 | | <body> 209 | | "Hello" 210 | 211 | #data 212 | <!DOCTYPE potato PUBLIC goof>Hello 213 | #errors 214 | #document 215 | | <!DOCTYPE potato> 216 | | <html> 217 | | <head> 218 | | <body> 219 | | "Hello" 220 | 221 | #data 222 | <!DOCTYPE potato PUBLIC "go'of">Hello 223 | #errors 224 | #document 225 | | <!DOCTYPE potato "go'of" ""> 226 | | <html> 227 | | <head> 228 | | <body> 229 | | "Hello" 230 | 231 | #data 232 | <!DOCTYPE potato PUBLIC 'go'of'>Hello 233 | #errors 234 | #document 235 | | <!DOCTYPE potato "go" ""> 236 | | <html> 237 | | <head> 238 | | <body> 239 | | "Hello" 240 | 241 | #data 242 | <!DOCTYPE potato PUBLIC 'go:hh of' >Hello 243 | #errors 244 | #document 245 | | <!DOCTYPE potato "go:hh of" ""> 246 | | <html> 247 | | <head> 248 | | <body> 249 | | "Hello" 250 | 251 | #data 252 | <!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello 253 | #errors 254 | #document 255 | | <!DOCTYPE potato "W3C-//dfdf" ""> 256 | | <html> 257 | | <head> 258 | | <body> 259 | | "Hello" 260 | 261 | #data 262 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 263 | "http://www.w3.org/TR/html4/strict.dtd">Hello 264 | #errors 265 | #document 266 | | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 267 | | <html> 268 | | <head> 269 | | <body> 270 | | "Hello" 271 | 272 | #data 273 | <!DOCTYPE ...>Hello 274 | #errors 275 | #document 276 | | <!DOCTYPE ...> 277 | | <html> 278 | | <head> 279 | | <body> 280 | | "Hello" 281 | 282 | #data 283 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 284 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 285 | #errors 286 | #document 287 | | <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 288 | | <html> 289 | | <head> 290 | | <body> 291 | 292 | #data 293 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" 294 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> 295 | #errors 296 | #document 297 | | <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> 298 | | <html> 299 | | <head> 300 | | <body> 301 | 302 | #data 303 | <!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 304 | <!-- internal declarations --> 305 | ]> 306 | #errors 307 | #document 308 | | <!DOCTYPE root-element> 309 | | <html> 310 | | <head> 311 | | <body> 312 | | "]>" 313 | 314 | #data 315 | <!DOCTYPE html PUBLIC 316 | "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" 317 | "http://www.wapforum.org/DTD/xhtml-mobile10.dtd"> 318 | #errors 319 | #document 320 | | <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd"> 321 | | <html> 322 | | <head> 323 | | <body> 324 | 325 | #data 326 | <!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body> 327 | #errors 328 | #document 329 | | <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd"> 330 | | <html> 331 | | <head> 332 | | <body> 333 | | <b> 334 | | "Mine!" 335 | 336 | #data 337 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd"> 338 | #errors 339 | #document 340 | | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 341 | | <html> 342 | | <head> 343 | | <body> 344 | 345 | #data 346 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'> 347 | #errors 348 | #document 349 | | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 350 | | <html> 351 | | <head> 352 | | <body> 353 | 354 | #data 355 | <!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'> 356 | #errors 357 | #document 358 | | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 359 | | <html> 360 | | <head> 361 | | <body> 362 | 363 | #data 364 | <!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'> 365 | #errors 366 | #document 367 | | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 368 | | <html> 369 | | <head> 370 | | <body> 371 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/entities02.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <div bar="ZZ&gt;YY"></div> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <div> 9 | | bar="ZZ>YY" 10 | 11 | #data 12 | <div bar="ZZ&"></div> 13 | #errors 14 | #document 15 | | <html> 16 | | <head> 17 | | <body> 18 | | <div> 19 | | bar="ZZ&" 20 | 21 | #data 22 | <div bar='ZZ&'></div> 23 | #errors 24 | #document 25 | | <html> 26 | | <head> 27 | | <body> 28 | | <div> 29 | | bar="ZZ&" 30 | 31 | #data 32 | <div bar=ZZ&></div> 33 | #errors 34 | #document 35 | | <html> 36 | | <head> 37 | | <body> 38 | | <div> 39 | | bar="ZZ&" 40 | 41 | #data 42 | <div bar="ZZ&gt=YY"></div> 43 | #errors 44 | #document 45 | | <html> 46 | | <head> 47 | | <body> 48 | | <div> 49 | | bar="ZZ&gt=YY" 50 | 51 | #data 52 | <div bar="ZZ&gt0YY"></div> 53 | #errors 54 | #document 55 | | <html> 56 | | <head> 57 | | <body> 58 | | <div> 59 | | bar="ZZ&gt0YY" 60 | 61 | #data 62 | <div bar="ZZ&gt9YY"></div> 63 | #errors 64 | #document 65 | | <html> 66 | | <head> 67 | | <body> 68 | | <div> 69 | | bar="ZZ&gt9YY" 70 | 71 | #data 72 | <div bar="ZZ&gtaYY"></div> 73 | #errors 74 | #document 75 | | <html> 76 | | <head> 77 | | <body> 78 | | <div> 79 | | bar="ZZ&gtaYY" 80 | 81 | #data 82 | <div bar="ZZ&gtZYY"></div> 83 | #errors 84 | #document 85 | | <html> 86 | | <head> 87 | | <body> 88 | | <div> 89 | | bar="ZZ&gtZYY" 90 | 91 | #data 92 | <div bar="ZZ&gt YY"></div> 93 | #errors 94 | #document 95 | | <html> 96 | | <head> 97 | | <body> 98 | | <div> 99 | | bar="ZZ> YY" 100 | 101 | #data 102 | <div bar="ZZ&gt"></div> 103 | #errors 104 | #document 105 | | <html> 106 | | <head> 107 | | <body> 108 | | <div> 109 | | bar="ZZ>" 110 | 111 | #data 112 | <div bar='ZZ&gt'></div> 113 | #errors 114 | #document 115 | | <html> 116 | | <head> 117 | | <body> 118 | | <div> 119 | | bar="ZZ>" 120 | 121 | #data 122 | <div bar=ZZ&gt></div> 123 | #errors 124 | #document 125 | | <html> 126 | | <head> 127 | | <body> 128 | | <div> 129 | | bar="ZZ>" 130 | 131 | #data 132 | <div bar="ZZ&pound_id=23"></div> 133 | #errors 134 | #document 135 | | <html> 136 | | <head> 137 | | <body> 138 | | <div> 139 | | bar="ZZ£_id=23" 140 | 141 | #data 142 | <div bar="ZZ&prod_id=23"></div> 143 | #errors 144 | #document 145 | | <html> 146 | | <head> 147 | | <body> 148 | | <div> 149 | | bar="ZZ&prod_id=23" 150 | 151 | #data 152 | <div bar="ZZ&pound;_id=23"></div> 153 | #errors 154 | #document 155 | | <html> 156 | | <head> 157 | | <body> 158 | | <div> 159 | | bar="ZZ£_id=23" 160 | 161 | #data 162 | <div bar="ZZ&prod;_id=23"></div> 163 | #errors 164 | #document 165 | | <html> 166 | | <head> 167 | | <body> 168 | | <div> 169 | | bar="ZZ∏_id=23" 170 | 171 | #data 172 | <div bar="ZZ&pound=23"></div> 173 | #errors 174 | #document 175 | | <html> 176 | | <head> 177 | | <body> 178 | | <div> 179 | | bar="ZZ&pound=23" 180 | 181 | #data 182 | <div bar="ZZ&prod=23"></div> 183 | #errors 184 | #document 185 | | <html> 186 | | <head> 187 | | <body> 188 | | <div> 189 | | bar="ZZ&prod=23" 190 | 191 | #data 192 | <div>ZZ&pound_id=23</div> 193 | #errors 194 | #document 195 | | <html> 196 | | <head> 197 | | <body> 198 | | <div> 199 | | "ZZ£_id=23" 200 | 201 | #data 202 | <div>ZZ&prod_id=23</div> 203 | #errors 204 | #document 205 | | <html> 206 | | <head> 207 | | <body> 208 | | <div> 209 | | "ZZ&prod_id=23" 210 | 211 | #data 212 | <div>ZZ&pound;_id=23</div> 213 | #errors 214 | #document 215 | | <html> 216 | | <head> 217 | | <body> 218 | | <div> 219 | | "ZZ£_id=23" 220 | 221 | #data 222 | <div>ZZ&prod;_id=23</div> 223 | #errors 224 | #document 225 | | <html> 226 | | <head> 227 | | <body> 228 | | <div> 229 | | "ZZ∏_id=23" 230 | 231 | #data 232 | <div>ZZ&pound=23</div> 233 | #errors 234 | #document 235 | | <html> 236 | | <head> 237 | | <body> 238 | | <div> 239 | | "ZZ£=23" 240 | 241 | #data 242 | <div>ZZ&prod=23</div> 243 | #errors 244 | #document 245 | | <html> 246 | | <head> 247 | | <body> 248 | | <div> 249 | | "ZZ&prod=23" 250 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/html5test-com.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <div<div> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <div<div> 9 | 10 | #data 11 | <div foo<bar=''> 12 | #errors 13 | #document 14 | | <html> 15 | | <head> 16 | | <body> 17 | | <div> 18 | | foo<bar="" 19 | 20 | #data 21 | <div foo=`bar`> 22 | #errors 23 | #document 24 | | <html> 25 | | <head> 26 | | <body> 27 | | <div> 28 | | foo="`bar`" 29 | 30 | #data 31 | <div \"foo=''> 32 | #errors 33 | #document 34 | | <html> 35 | | <head> 36 | | <body> 37 | | <div> 38 | | \"foo="" 39 | 40 | #data 41 | <a href='\nbar'></a> 42 | #errors 43 | #document 44 | | <html> 45 | | <head> 46 | | <body> 47 | | <a> 48 | | href="\nbar" 49 | 50 | #data 51 | <!DOCTYPE html> 52 | #errors 53 | #document 54 | | <!DOCTYPE html> 55 | | <html> 56 | | <head> 57 | | <body> 58 | 59 | #data 60 | &lang;&rang; 61 | #errors 62 | #document 63 | | <html> 64 | | <head> 65 | | <body> 66 | | "⟨⟩" 67 | 68 | #data 69 | &apos; 70 | #errors 71 | #document 72 | | <html> 73 | | <head> 74 | | <body> 75 | | "'" 76 | 77 | #data 78 | &ImaginaryI; 79 | #errors 80 | #document 81 | | <html> 82 | | <head> 83 | | <body> 84 | | "ⅈ" 85 | 86 | #data 87 | &Kopf; 88 | #errors 89 | #document 90 | | <html> 91 | | <head> 92 | | <body> 93 | | "𝕂" 94 | 95 | #data 96 | &notinva; 97 | #errors 98 | #document 99 | | <html> 100 | | <head> 101 | | <body> 102 | | "∉" 103 | 104 | #data 105 | <?import namespace="foo" implementation="#bar"> 106 | #errors 107 | #document 108 | | <!-- ?import namespace="foo" implementation="#bar" --> 109 | | <html> 110 | | <head> 111 | | <body> 112 | 113 | #data 114 | <!--foo--bar--> 115 | #errors 116 | #document 117 | | <!-- foo--bar --> 118 | | <html> 119 | | <head> 120 | | <body> 121 | 122 | #data 123 | <![CDATA[x]]> 124 | #errors 125 | #document 126 | | <!-- [CDATA[x]] --> 127 | | <html> 128 | | <head> 129 | | <body> 130 | 131 | #data 132 | <textarea><!--</textarea>--></textarea> 133 | #errors 134 | #document 135 | | <html> 136 | | <head> 137 | | <body> 138 | | <textarea> 139 | | "<!--" 140 | | "-->" 141 | 142 | #data 143 | <textarea><!--</textarea>--> 144 | #errors 145 | #document 146 | | <html> 147 | | <head> 148 | | <body> 149 | | <textarea> 150 | | "<!--" 151 | | "-->" 152 | 153 | #data 154 | <style><!--</style>--></style> 155 | #errors 156 | #document 157 | | <html> 158 | | <head> 159 | | <style> 160 | | "<!--" 161 | | <body> 162 | | "-->" 163 | 164 | #data 165 | <style><!--</style>--> 166 | #errors 167 | #document 168 | | <html> 169 | | <head> 170 | | <style> 171 | | "<!--" 172 | | <body> 173 | | "-->" 174 | 175 | #data 176 | <ul><li>A </li> <li>B</li></ul> 177 | #errors 178 | #document 179 | | <html> 180 | | <head> 181 | | <body> 182 | | <ul> 183 | | <li> 184 | | "A " 185 | | " " 186 | | <li> 187 | | "B" 188 | 189 | #data 190 | <table><form><input type=hidden><input></form><div></div></table> 191 | #errors 192 | #document 193 | | <html> 194 | | <head> 195 | | <body> 196 | | <input> 197 | | <div> 198 | | <table> 199 | | <form> 200 | | <input> 201 | | type="hidden" 202 | 203 | #data 204 | <i>A<b>B<p></i>C</b>D 205 | #errors 206 | #document 207 | | <html> 208 | | <head> 209 | | <body> 210 | | <i> 211 | | "A" 212 | | <b> 213 | | "B" 214 | | <b> 215 | | <p> 216 | | <b> 217 | | <i> 218 | | "C" 219 | | "D" 220 | 221 | #data 222 | <div></div> 223 | #errors 224 | #document 225 | | <html> 226 | | <head> 227 | | <body> 228 | | <div> 229 | 230 | #data 231 | <svg></svg> 232 | #errors 233 | #document 234 | | <html> 235 | | <head> 236 | | <body> 237 | | <svg svg> 238 | 239 | #data 240 | <math></math> 241 | #errors 242 | #document 243 | | <html> 244 | | <head> 245 | | <body> 246 | | <mathml math> 247 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/inbody01.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <button>1</foo> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <button> 9 | | "1" 10 | 11 | #data 12 | <foo>1<p>2</foo> 13 | #errors 14 | #document 15 | | <html> 16 | | <head> 17 | | <body> 18 | | <foo> 19 | | "1" 20 | | <p> 21 | | "2" 22 | 23 | #data 24 | <dd>1</foo> 25 | #errors 26 | #document 27 | | <html> 28 | | <head> 29 | | <body> 30 | | <dd> 31 | | "1" 32 | 33 | #data 34 | <foo>1<dd>2</foo> 35 | #errors 36 | #document 37 | | <html> 38 | | <head> 39 | | <body> 40 | | <foo> 41 | | "1" 42 | | <dd> 43 | | "2" 44 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/isindex.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <isindex> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <form> 9 | | <hr> 10 | | <label> 11 | | "This is a searchable index. Enter search keywords: " 12 | | <input> 13 | | name="isindex" 14 | | <hr> 15 | 16 | #data 17 | <isindex name="A" action="B" prompt="C" foo="D"> 18 | #errors 19 | #document 20 | | <html> 21 | | <head> 22 | | <body> 23 | | <form> 24 | | action="B" 25 | | <hr> 26 | | <label> 27 | | "C" 28 | | <input> 29 | | foo="D" 30 | | name="isindex" 31 | | <hr> 32 | 33 | #data 34 | <form><isindex> 35 | #errors 36 | #document 37 | | <html> 38 | | <head> 39 | | <body> 40 | | <form> 41 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <body><table>�filler�text� 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | "fillertext" 9 | | <table> 10 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/pending-spec-changes.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <input type="hidden"><frameset> 3 | #errors 4 | 21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. 5 | 31: “frameset” start tag seen. 6 | 31: End of file seen and there were open elements. 7 | #document 8 | | <html> 9 | | <head> 10 | | <frameset> 11 | 12 | #data 13 | <!DOCTYPE html><table><caption><svg>foo</table>bar 14 | #errors 15 | 47: End tag “table” did not match the name of the current open element (“svg”). 16 | 47: “table” closed but “caption” was still open. 17 | 47: End tag “table” seen, but there were open elements. 18 | 36: Unclosed element “svg”. 19 | #document 20 | | <!DOCTYPE html> 21 | | <html> 22 | | <head> 23 | | <body> 24 | | <table> 25 | | <caption> 26 | | <svg svg> 27 | | "foo" 28 | | "bar" 29 | 30 | #data 31 | <table><tr><td><svg><desc><td></desc><circle> 32 | #errors 33 | 7: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. 34 | 30: A table cell was implicitly closed, but there were open elements. 35 | 26: Unclosed element “desc”. 36 | 20: Unclosed element “svg”. 37 | 37: Stray end tag “desc”. 38 | 45: End of file seen and there were open elements. 39 | 45: Unclosed element “circle”. 40 | 7: Unclosed element “table”. 41 | #document 42 | | <html> 43 | | <head> 44 | | <body> 45 | | <table> 46 | | <tbody> 47 | | <tr> 48 | | <td> 49 | | <svg svg> 50 | | <svg desc> 51 | | <td> 52 | | <circle> 53 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/plain-text-unsafe.dat: -------------------------------------------------------------------------------- 1 | #data 2 | FOO&#x000D;ZOO 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | "FOO ZOO" 9 | 10 | #data 11 | <html>�<frameset></frameset> 12 | #errors 13 | #document 14 | | <html> 15 | | <head> 16 | | <frameset> 17 | 18 | #data 19 | <html> � <frameset></frameset> 20 | #errors 21 | #document 22 | | <html> 23 | | <head> 24 | | <frameset> 25 | 26 | #data 27 | <html>a�a<frameset></frameset> 28 | #errors 29 | #document 30 | | <html> 31 | | <head> 32 | | <body> 33 | | "aa" 34 | 35 | #data 36 | <html>��<frameset></frameset> 37 | #errors 38 | #document 39 | | <html> 40 | | <head> 41 | | <frameset> 42 | 43 | #data 44 | <html>� 45 | <frameset></frameset> 46 | #errors 47 | #document 48 | | <html> 49 | | <head> 50 | | <frameset> 51 | 52 | #data 53 | <html><select>� 54 | #errors 55 | #document 56 | | <html> 57 | | <head> 58 | | <body> 59 | | <select> 60 | 61 | #data 62 | � 63 | #errors 64 | #document 65 | | <html> 66 | | <head> 67 | | <body> 68 | 69 | #data 70 | <body>� 71 | #errors 72 | #document 73 | | <html> 74 | | <head> 75 | | <body> 76 | 77 | #data 78 | <plaintext>�filler�text� 79 | #errors 80 | #document 81 | | <html> 82 | | <head> 83 | | <body> 84 | | <plaintext> 85 | | "�filler�text�" 86 | 87 | #data 88 | <svg><![CDATA[�filler�text�]]> 89 | #errors 90 | #document 91 | | <html> 92 | | <head> 93 | | <body> 94 | | <svg svg> 95 | | "�filler�text�" 96 | 97 | #data 98 | <body><!�> 99 | #errors 100 | #document 101 | | <html> 102 | | <head> 103 | | <body> 104 | | <!-- � --> 105 | 106 | #data 107 | <body><!�filler�text> 108 | #errors 109 | #document 110 | | <html> 111 | | <head> 112 | | <body> 113 | | <!-- �filler�text --> 114 | 115 | #data 116 | <body><svg><foreignObject>�filler�text 117 | #errors 118 | #document 119 | | <html> 120 | | <head> 121 | | <body> 122 | | <svg svg> 123 | | <svg foreignObject> 124 | | "fillertext" 125 | 126 | #data 127 | <svg>�filler�text 128 | #errors 129 | #document 130 | | <html> 131 | | <head> 132 | | <body> 133 | | <svg svg> 134 | | "�filler�text" 135 | 136 | #data 137 | <svg>�<frameset> 138 | #errors 139 | #document 140 | | <html> 141 | | <head> 142 | | <body> 143 | | <svg svg> 144 | | "�" 145 | | <svg frameset> 146 | 147 | #data 148 | <svg>� <frameset> 149 | #errors 150 | #document 151 | | <html> 152 | | <head> 153 | | <body> 154 | | <svg svg> 155 | | "� " 156 | | <svg frameset> 157 | 158 | #data 159 | <svg>�a<frameset> 160 | #errors 161 | #document 162 | | <html> 163 | | <head> 164 | | <body> 165 | | <svg svg> 166 | | "�a" 167 | | <svg frameset> 168 | 169 | #data 170 | <svg>�</svg><frameset> 171 | #errors 172 | #document 173 | | <html> 174 | | <head> 175 | | <frameset> 176 | 177 | #data 178 | <svg>� </svg><frameset> 179 | #errors 180 | #document 181 | | <html> 182 | | <head> 183 | | <frameset> 184 | 185 | #data 186 | <svg>�a</svg><frameset> 187 | #errors 188 | #document 189 | | <html> 190 | | <head> 191 | | <body> 192 | | <svg svg> 193 | | "�a" 194 | 195 | #data 196 | <svg><path></path></svg><frameset> 197 | #errors 198 | #document 199 | | <html> 200 | | <head> 201 | | <frameset> 202 | 203 | #data 204 | <svg><p><frameset> 205 | #errors 206 | #document 207 | | <html> 208 | | <head> 209 | | <frameset> 210 | 211 | #data 212 | <!DOCTYPE html><pre> 213 | 214 | A</pre> 215 | #errors 216 | #document 217 | | <!DOCTYPE html> 218 | | <html> 219 | | <head> 220 | | <body> 221 | | <pre> 222 | | " 223 | A" 224 | 225 | #data 226 | <!DOCTYPE html><pre> A</pre> 227 | #errors 228 | #document 229 | | <!DOCTYPE html> 230 | | <html> 231 | | <head> 232 | | <body> 233 | | <pre> 234 | | " 235 | A" 236 | 237 | #data 238 | <!DOCTYPE html><pre> A</pre> 239 | #errors 240 | #document 241 | | <!DOCTYPE html> 242 | | <html> 243 | | <head> 244 | | <body> 245 | | <pre> 246 | | "A" 247 | 248 | #data 249 | <!DOCTYPE html><table><tr><td><math><mtext>�a 250 | #errors 251 | 44: Saw U+0000 in stream. 252 | 45: End of file in a foreign namespace context. 253 | #document 254 | | <!DOCTYPE html> 255 | | <html> 256 | | <head> 257 | | <body> 258 | | <table> 259 | | <tbody> 260 | | <tr> 261 | | <td> 262 | | <mathml math> 263 | | <mathml mtext> 264 | | "a" 265 | 266 | #data 267 | <!DOCTYPE html><table><tr><td><svg><foreignObject>�a 268 | #errors 269 | 44: Saw U+0000 in stream. 270 | 45: End of file in a foreign namespace context. 271 | #document 272 | | <!DOCTYPE html> 273 | | <html> 274 | | <head> 275 | | <body> 276 | | <table> 277 | | <tbody> 278 | | <tr> 279 | | <td> 280 | | <svg svg> 281 | | <svg foreignObject> 282 | | "a" 283 | 284 | #data 285 | <!DOCTYPE html><math><mi>a�b 286 | #errors 287 | #document 288 | | <!DOCTYPE html> 289 | | <html> 290 | | <head> 291 | | <body> 292 | | <mathml math> 293 | | <mathml mi> 294 | | "ab" 295 | 296 | #data 297 | <!DOCTYPE html><math><mo>a�b 298 | #errors 299 | #document 300 | | <!DOCTYPE html> 301 | | <html> 302 | | <head> 303 | | <body> 304 | | <mathml math> 305 | | <mathml mo> 306 | | "ab" 307 | 308 | #data 309 | <!DOCTYPE html><math><mn>a�b 310 | #errors 311 | #document 312 | | <!DOCTYPE html> 313 | | <html> 314 | | <head> 315 | | <body> 316 | | <mathml math> 317 | | <mathml mn> 318 | | "ab" 319 | 320 | #data 321 | <!DOCTYPE html><math><ms>a�b 322 | #errors 323 | #document 324 | | <!DOCTYPE html> 325 | | <html> 326 | | <head> 327 | | <body> 328 | | <mathml math> 329 | | <mathml ms> 330 | | "ab" 331 | 332 | #data 333 | <!DOCTYPE html><math><mtext>a�b 334 | #errors 335 | #document 336 | | <!DOCTYPE html> 337 | | <html> 338 | | <head> 339 | | <body> 340 | | <mathml math> 341 | | <mathml mtext> 342 | | "ab" 343 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/scriptdata01.dat: -------------------------------------------------------------------------------- 1 | #data 2 | FOO<script>'Hello'</script>BAR 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | "FOO" 9 | | <script> 10 | | "'Hello'" 11 | | "BAR" 12 | 13 | #data 14 | FOO<script></script>BAR 15 | #errors 16 | #document 17 | | <html> 18 | | <head> 19 | | <body> 20 | | "FOO" 21 | | <script> 22 | | "BAR" 23 | 24 | #data 25 | FOO<script></script >BAR 26 | #errors 27 | #document 28 | | <html> 29 | | <head> 30 | | <body> 31 | | "FOO" 32 | | <script> 33 | | "BAR" 34 | 35 | #data 36 | FOO<script></script/>BAR 37 | #errors 38 | #document 39 | | <html> 40 | | <head> 41 | | <body> 42 | | "FOO" 43 | | <script> 44 | | "BAR" 45 | 46 | #data 47 | FOO<script></script/ >BAR 48 | #errors 49 | #document 50 | | <html> 51 | | <head> 52 | | <body> 53 | | "FOO" 54 | | <script> 55 | | "BAR" 56 | 57 | #data 58 | FOO<script type="text/plain"></scriptx>BAR 59 | #errors 60 | #document 61 | | <html> 62 | | <head> 63 | | <body> 64 | | "FOO" 65 | | <script> 66 | | type="text/plain" 67 | | "</scriptx>BAR" 68 | 69 | #data 70 | FOO<script></script foo=">" dd>BAR 71 | #errors 72 | #document 73 | | <html> 74 | | <head> 75 | | <body> 76 | | "FOO" 77 | | <script> 78 | | "BAR" 79 | 80 | #data 81 | FOO<script>'<'</script>BAR 82 | #errors 83 | #document 84 | | <html> 85 | | <head> 86 | | <body> 87 | | "FOO" 88 | | <script> 89 | | "'<'" 90 | | "BAR" 91 | 92 | #data 93 | FOO<script>'<!'</script>BAR 94 | #errors 95 | #document 96 | | <html> 97 | | <head> 98 | | <body> 99 | | "FOO" 100 | | <script> 101 | | "'<!'" 102 | | "BAR" 103 | 104 | #data 105 | FOO<script>'<!-'</script>BAR 106 | #errors 107 | #document 108 | | <html> 109 | | <head> 110 | | <body> 111 | | "FOO" 112 | | <script> 113 | | "'<!-'" 114 | | "BAR" 115 | 116 | #data 117 | FOO<script>'<!--'</script>BAR 118 | #errors 119 | #document 120 | | <html> 121 | | <head> 122 | | <body> 123 | | "FOO" 124 | | <script> 125 | | "'<!--'" 126 | | "BAR" 127 | 128 | #data 129 | FOO<script>'<!---'</script>BAR 130 | #errors 131 | #document 132 | | <html> 133 | | <head> 134 | | <body> 135 | | "FOO" 136 | | <script> 137 | | "'<!---'" 138 | | "BAR" 139 | 140 | #data 141 | FOO<script>'<!-->'</script>BAR 142 | #errors 143 | #document 144 | | <html> 145 | | <head> 146 | | <body> 147 | | "FOO" 148 | | <script> 149 | | "'<!-->'" 150 | | "BAR" 151 | 152 | #data 153 | FOO<script>'<!-->'</script>BAR 154 | #errors 155 | #document 156 | | <html> 157 | | <head> 158 | | <body> 159 | | "FOO" 160 | | <script> 161 | | "'<!-->'" 162 | | "BAR" 163 | 164 | #data 165 | FOO<script>'<!-- potato'</script>BAR 166 | #errors 167 | #document 168 | | <html> 169 | | <head> 170 | | <body> 171 | | "FOO" 172 | | <script> 173 | | "'<!-- potato'" 174 | | "BAR" 175 | 176 | #data 177 | FOO<script>'<!-- <sCrIpt'</script>BAR 178 | #errors 179 | #document 180 | | <html> 181 | | <head> 182 | | <body> 183 | | "FOO" 184 | | <script> 185 | | "'<!-- <sCrIpt'" 186 | | "BAR" 187 | 188 | #data 189 | FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR 190 | #errors 191 | #document 192 | | <html> 193 | | <head> 194 | | <body> 195 | | "FOO" 196 | | <script> 197 | | type="text/plain" 198 | | "'<!-- <sCrIpt>'</script>BAR" 199 | 200 | #data 201 | FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR 202 | #errors 203 | #document 204 | | <html> 205 | | <head> 206 | | <body> 207 | | "FOO" 208 | | <script> 209 | | type="text/plain" 210 | | "'<!-- <sCrIpt> -'</script>BAR" 211 | 212 | #data 213 | FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR 214 | #errors 215 | #document 216 | | <html> 217 | | <head> 218 | | <body> 219 | | "FOO" 220 | | <script> 221 | | type="text/plain" 222 | | "'<!-- <sCrIpt> --'</script>BAR" 223 | 224 | #data 225 | FOO<script>'<!-- <sCrIpt> -->'</script>BAR 226 | #errors 227 | #document 228 | | <html> 229 | | <head> 230 | | <body> 231 | | "FOO" 232 | | <script> 233 | | "'<!-- <sCrIpt> -->'" 234 | | "BAR" 235 | 236 | #data 237 | FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR 238 | #errors 239 | #document 240 | | <html> 241 | | <head> 242 | | <body> 243 | | "FOO" 244 | | <script> 245 | | type="text/plain" 246 | | "'<!-- <sCrIpt> --!>'</script>BAR" 247 | 248 | #data 249 | FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR 250 | #errors 251 | #document 252 | | <html> 253 | | <head> 254 | | <body> 255 | | "FOO" 256 | | <script> 257 | | type="text/plain" 258 | | "'<!-- <sCrIpt> -- >'</script>BAR" 259 | 260 | #data 261 | FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR 262 | #errors 263 | #document 264 | | <html> 265 | | <head> 266 | | <body> 267 | | "FOO" 268 | | <script> 269 | | type="text/plain" 270 | | "'<!-- <sCrIpt '</script>BAR" 271 | 272 | #data 273 | FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR 274 | #errors 275 | #document 276 | | <html> 277 | | <head> 278 | | <body> 279 | | "FOO" 280 | | <script> 281 | | type="text/plain" 282 | | "'<!-- <sCrIpt/'</script>BAR" 283 | 284 | #data 285 | FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR 286 | #errors 287 | #document 288 | | <html> 289 | | <head> 290 | | <body> 291 | | "FOO" 292 | | <script> 293 | | type="text/plain" 294 | | "'<!-- <sCrIpt\'" 295 | | "BAR" 296 | 297 | #data 298 | FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX 299 | #errors 300 | #document 301 | | <html> 302 | | <head> 303 | | <body> 304 | | "FOO" 305 | | <script> 306 | | type="text/plain" 307 | | "'<!-- <sCrIpt/'</script>BAR" 308 | | "QUX" 309 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tables01.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <table><th> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <table> 9 | | <tbody> 10 | | <tr> 11 | | <th> 12 | 13 | #data 14 | <table><td> 15 | #errors 16 | #document 17 | | <html> 18 | | <head> 19 | | <body> 20 | | <table> 21 | | <tbody> 22 | | <tr> 23 | | <td> 24 | 25 | #data 26 | <table><col foo='bar'> 27 | #errors 28 | #document 29 | | <html> 30 | | <head> 31 | | <body> 32 | | <table> 33 | | <colgroup> 34 | | <col> 35 | | foo="bar" 36 | 37 | #data 38 | <table><colgroup></html>foo 39 | #errors 40 | #document 41 | | <html> 42 | | <head> 43 | | <body> 44 | | "foo" 45 | | <table> 46 | | <colgroup> 47 | 48 | #data 49 | <table></table><p>foo 50 | #errors 51 | #document 52 | | <html> 53 | | <head> 54 | | <body> 55 | | <table> 56 | | <p> 57 | | "foo" 58 | 59 | #data 60 | <table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td> 61 | #errors 62 | #document 63 | | <html> 64 | | <head> 65 | | <body> 66 | | <table> 67 | | <tbody> 68 | | <tr> 69 | | <td> 70 | 71 | #data 72 | <table><select><option>3</select></table> 73 | #errors 74 | #document 75 | | <html> 76 | | <head> 77 | | <body> 78 | | <select> 79 | | <option> 80 | | "3" 81 | | <table> 82 | 83 | #data 84 | <table><select><table></table></select></table> 85 | #errors 86 | #document 87 | | <html> 88 | | <head> 89 | | <body> 90 | | <select> 91 | | <table> 92 | | <table> 93 | 94 | #data 95 | <table><select></table> 96 | #errors 97 | #document 98 | | <html> 99 | | <head> 100 | | <body> 101 | | <select> 102 | | <table> 103 | 104 | #data 105 | <table><select><option>A<tr><td>B</td></tr></table> 106 | #errors 107 | #document 108 | | <html> 109 | | <head> 110 | | <body> 111 | | <select> 112 | | <option> 113 | | "A" 114 | | <table> 115 | | <tbody> 116 | | <tr> 117 | | <td> 118 | | "B" 119 | 120 | #data 121 | <table><td></body></caption></col></colgroup></html>foo 122 | #errors 123 | #document 124 | | <html> 125 | | <head> 126 | | <body> 127 | | <table> 128 | | <tbody> 129 | | <tr> 130 | | <td> 131 | | "foo" 132 | 133 | #data 134 | <table><td>A</table>B 135 | #errors 136 | #document 137 | | <html> 138 | | <head> 139 | | <body> 140 | | <table> 141 | | <tbody> 142 | | <tr> 143 | | <td> 144 | | "A" 145 | | "B" 146 | 147 | #data 148 | <table><tr><caption> 149 | #errors 150 | #document 151 | | <html> 152 | | <head> 153 | | <body> 154 | | <table> 155 | | <tbody> 156 | | <tr> 157 | | <caption> 158 | 159 | #data 160 | <table><tr></body></caption></col></colgroup></html></td></th><td>foo 161 | #errors 162 | #document 163 | | <html> 164 | | <head> 165 | | <body> 166 | | <table> 167 | | <tbody> 168 | | <tr> 169 | | <td> 170 | | "foo" 171 | 172 | #data 173 | <table><td><tr> 174 | #errors 175 | #document 176 | | <html> 177 | | <head> 178 | | <body> 179 | | <table> 180 | | <tbody> 181 | | <tr> 182 | | <td> 183 | | <tr> 184 | 185 | #data 186 | <table><td><button><td> 187 | #errors 188 | #document 189 | | <html> 190 | | <head> 191 | | <body> 192 | | <table> 193 | | <tbody> 194 | | <tr> 195 | | <td> 196 | | <button> 197 | | <td> 198 | 199 | #data 200 | <table><tr><td><svg><desc><td> 201 | #errors 202 | #document 203 | | <html> 204 | | <head> 205 | | <body> 206 | | <table> 207 | | <tbody> 208 | | <tr> 209 | | <td> 210 | | <svg svg> 211 | | <svg desc> 212 | | <td> 213 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests12.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <p> 10 | | "foo" 11 | | <mathml math> 12 | | <mathml mtext> 13 | | <i> 14 | | "baz" 15 | | <mathml annotation-xml> 16 | | <svg svg> 17 | | <svg desc> 18 | | <b> 19 | | "eggs" 20 | | <svg g> 21 | | <svg foreignObject> 22 | | <p> 23 | | "spam" 24 | | <table> 25 | | <tbody> 26 | | <tr> 27 | | <td> 28 | | <img> 29 | | <svg g> 30 | | "quux" 31 | | "bar" 32 | 33 | #data 34 | <!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar 35 | #errors 36 | #document 37 | | <!DOCTYPE html> 38 | | <html> 39 | | <head> 40 | | <body> 41 | | "foo" 42 | | <mathml math> 43 | | <mathml mtext> 44 | | <i> 45 | | "baz" 46 | | <mathml annotation-xml> 47 | | <svg svg> 48 | | <svg desc> 49 | | <b> 50 | | "eggs" 51 | | <svg g> 52 | | <svg foreignObject> 53 | | <p> 54 | | "spam" 55 | | <table> 56 | | <tbody> 57 | | <tr> 58 | | <td> 59 | | <img> 60 | | <svg g> 61 | | "quux" 62 | | "bar" 63 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests14.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!DOCTYPE html><html><body><xyz:abc></xyz:abc> 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <xyz:abc> 10 | 11 | #data 12 | <!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span> 13 | #errors 14 | #document 15 | | <!DOCTYPE html> 16 | | <html> 17 | | <head> 18 | | <body> 19 | | <xyz:abc> 20 | | <span> 21 | 22 | #data 23 | <!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc> 24 | #errors 25 | 15: Unexpected start tag html 26 | #document 27 | | <!DOCTYPE html> 28 | | <html> 29 | | abc:def="gh" 30 | | <head> 31 | | <body> 32 | | <xyz:abc> 33 | 34 | #data 35 | <!DOCTYPE html><html xml:lang=bar><html xml:lang=foo> 36 | #errors 37 | 15: Unexpected start tag html 38 | #document 39 | | <!DOCTYPE html> 40 | | <html> 41 | | xml:lang="bar" 42 | | <head> 43 | | <body> 44 | 45 | #data 46 | <!DOCTYPE html><html 123=456> 47 | #errors 48 | #document 49 | | <!DOCTYPE html> 50 | | <html> 51 | | 123="456" 52 | | <head> 53 | | <body> 54 | 55 | #data 56 | <!DOCTYPE html><html 123=456><html 789=012> 57 | #errors 58 | #document 59 | | <!DOCTYPE html> 60 | | <html> 61 | | 123="456" 62 | | 789="012" 63 | | <head> 64 | | <body> 65 | 66 | #data 67 | <!DOCTYPE html><html><body 789=012> 68 | #errors 69 | #document 70 | | <!DOCTYPE html> 71 | | <html> 72 | | <head> 73 | | <body> 74 | | 789="012" 75 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests15.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!DOCTYPE html><p><b><i><u></p> <p>X 3 | #errors 4 | Line: 1 Col: 31 Unexpected end tag (p). Ignored. 5 | Line: 1 Col: 36 Expected closing tag. Unexpected end of file. 6 | #document 7 | | <!DOCTYPE html> 8 | | <html> 9 | | <head> 10 | | <body> 11 | | <p> 12 | | <b> 13 | | <i> 14 | | <u> 15 | | <b> 16 | | <i> 17 | | <u> 18 | | " " 19 | | <p> 20 | | "X" 21 | 22 | #data 23 | <p><b><i><u></p> 24 | <p>X 25 | #errors 26 | Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE. 27 | Line: 1 Col: 16 Unexpected end tag (p). Ignored. 28 | Line: 2 Col: 4 Expected closing tag. Unexpected end of file. 29 | #document 30 | | <html> 31 | | <head> 32 | | <body> 33 | | <p> 34 | | <b> 35 | | <i> 36 | | <u> 37 | | <b> 38 | | <i> 39 | | <u> 40 | | " 41 | " 42 | | <p> 43 | | "X" 44 | 45 | #data 46 | <!doctype html></html> <head> 47 | #errors 48 | Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element. 49 | #document 50 | | <!DOCTYPE html> 51 | | <html> 52 | | <head> 53 | | <body> 54 | | " " 55 | 56 | #data 57 | <!doctype html></body><meta> 58 | #errors 59 | Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element. 60 | #document 61 | | <!DOCTYPE html> 62 | | <html> 63 | | <head> 64 | | <body> 65 | | <meta> 66 | 67 | #data 68 | <html></html><!-- foo --> 69 | #errors 70 | Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE. 71 | Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element. 72 | #document 73 | | <html> 74 | | <head> 75 | | <body> 76 | | <!-- foo --> 77 | 78 | #data 79 | <!doctype html></body><title>X</title> 80 | #errors 81 | Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element. 82 | #document 83 | | <!DOCTYPE html> 84 | | <html> 85 | | <head> 86 | | <body> 87 | | <title> 88 | | "X" 89 | 90 | #data 91 | <!doctype html><table> X<meta></table> 92 | #errors 93 | Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode. 94 | Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode. 95 | #document 96 | | <!DOCTYPE html> 97 | | <html> 98 | | <head> 99 | | <body> 100 | | " X" 101 | | <meta> 102 | | <table> 103 | 104 | #data 105 | <!doctype html><table> x</table> 106 | #errors 107 | Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode. 108 | #document 109 | | <!DOCTYPE html> 110 | | <html> 111 | | <head> 112 | | <body> 113 | | " x" 114 | | <table> 115 | 116 | #data 117 | <!doctype html><table> x </table> 118 | #errors 119 | Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode. 120 | #document 121 | | <!DOCTYPE html> 122 | | <html> 123 | | <head> 124 | | <body> 125 | | " x " 126 | | <table> 127 | 128 | #data 129 | <!doctype html><table><tr> x</table> 130 | #errors 131 | Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode. 132 | #document 133 | | <!DOCTYPE html> 134 | | <html> 135 | | <head> 136 | | <body> 137 | | " x" 138 | | <table> 139 | | <tbody> 140 | | <tr> 141 | 142 | #data 143 | <!doctype html><table>X<style> <tr>x </style> </table> 144 | #errors 145 | Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode. 146 | #document 147 | | <!DOCTYPE html> 148 | | <html> 149 | | <head> 150 | | <body> 151 | | "X" 152 | | <table> 153 | | <style> 154 | | " <tr>x " 155 | | " " 156 | 157 | #data 158 | <!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div> 159 | #errors 160 | Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode. 161 | Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode. 162 | #document 163 | | <!DOCTYPE html> 164 | | <html> 165 | | <head> 166 | | <body> 167 | | <div> 168 | | <a> 169 | | "foo" 170 | | <table> 171 | | " " 172 | | <tbody> 173 | | <tr> 174 | | <td> 175 | | "bar" 176 | | " " 177 | 178 | #data 179 | <frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes> 180 | #errors 181 | 6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. 182 | 13: Stray start tag “frame”. 183 | 21: Stray end tag “frame”. 184 | 29: Stray end tag “frame”. 185 | 39: “frameset” start tag after “body” already open. 186 | 105: End of file seen inside an [R]CDATA element. 187 | 105: End of file seen and there were open elements. 188 | XXX: These errors are wrong, please fix me! 189 | #document 190 | | <html> 191 | | <head> 192 | | <frameset> 193 | | <frame> 194 | | <frameset> 195 | | <frame> 196 | | <noframes> 197 | | "</frameset><noframes>" 198 | 199 | #data 200 | <!DOCTYPE html><object></html> 201 | #errors 202 | 1: Expected closing tag. Unexpected end of file 203 | #document 204 | | <!DOCTYPE html> 205 | | <html> 206 | | <head> 207 | | <body> 208 | | <object> 209 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests17.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!doctype html><table><tbody><select><tr> 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <select> 10 | | <table> 11 | | <tbody> 12 | | <tr> 13 | 14 | #data 15 | <!doctype html><table><tr><select><td> 16 | #errors 17 | #document 18 | | <!DOCTYPE html> 19 | | <html> 20 | | <head> 21 | | <body> 22 | | <select> 23 | | <table> 24 | | <tbody> 25 | | <tr> 26 | | <td> 27 | 28 | #data 29 | <!doctype html><table><tr><td><select><td> 30 | #errors 31 | #document 32 | | <!DOCTYPE html> 33 | | <html> 34 | | <head> 35 | | <body> 36 | | <table> 37 | | <tbody> 38 | | <tr> 39 | | <td> 40 | | <select> 41 | | <td> 42 | 43 | #data 44 | <!doctype html><table><tr><th><select><td> 45 | #errors 46 | #document 47 | | <!DOCTYPE html> 48 | | <html> 49 | | <head> 50 | | <body> 51 | | <table> 52 | | <tbody> 53 | | <tr> 54 | | <th> 55 | | <select> 56 | | <td> 57 | 58 | #data 59 | <!doctype html><table><caption><select><tr> 60 | #errors 61 | #document 62 | | <!DOCTYPE html> 63 | | <html> 64 | | <head> 65 | | <body> 66 | | <table> 67 | | <caption> 68 | | <select> 69 | | <tbody> 70 | | <tr> 71 | 72 | #data 73 | <!doctype html><select><tr> 74 | #errors 75 | #document 76 | | <!DOCTYPE html> 77 | | <html> 78 | | <head> 79 | | <body> 80 | | <select> 81 | 82 | #data 83 | <!doctype html><select><td> 84 | #errors 85 | #document 86 | | <!DOCTYPE html> 87 | | <html> 88 | | <head> 89 | | <body> 90 | | <select> 91 | 92 | #data 93 | <!doctype html><select><th> 94 | #errors 95 | #document 96 | | <!DOCTYPE html> 97 | | <html> 98 | | <head> 99 | | <body> 100 | | <select> 101 | 102 | #data 103 | <!doctype html><select><tbody> 104 | #errors 105 | #document 106 | | <!DOCTYPE html> 107 | | <html> 108 | | <head> 109 | | <body> 110 | | <select> 111 | 112 | #data 113 | <!doctype html><select><thead> 114 | #errors 115 | #document 116 | | <!DOCTYPE html> 117 | | <html> 118 | | <head> 119 | | <body> 120 | | <select> 121 | 122 | #data 123 | <!doctype html><select><tfoot> 124 | #errors 125 | #document 126 | | <!DOCTYPE html> 127 | | <html> 128 | | <head> 129 | | <body> 130 | | <select> 131 | 132 | #data 133 | <!doctype html><select><caption> 134 | #errors 135 | #document 136 | | <!DOCTYPE html> 137 | | <html> 138 | | <head> 139 | | <body> 140 | | <select> 141 | 142 | #data 143 | <!doctype html><table><tr></table>a 144 | #errors 145 | #document 146 | | <!DOCTYPE html> 147 | | <html> 148 | | <head> 149 | | <body> 150 | | <table> 151 | | <tbody> 152 | | <tr> 153 | | "a" 154 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests18.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!doctype html><plaintext></plaintext> 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <plaintext> 10 | | "</plaintext>" 11 | 12 | #data 13 | <!doctype html><table><plaintext></plaintext> 14 | #errors 15 | #document 16 | | <!DOCTYPE html> 17 | | <html> 18 | | <head> 19 | | <body> 20 | | <plaintext> 21 | | "</plaintext>" 22 | | <table> 23 | 24 | #data 25 | <!doctype html><table><tbody><plaintext></plaintext> 26 | #errors 27 | #document 28 | | <!DOCTYPE html> 29 | | <html> 30 | | <head> 31 | | <body> 32 | | <plaintext> 33 | | "</plaintext>" 34 | | <table> 35 | | <tbody> 36 | 37 | #data 38 | <!doctype html><table><tbody><tr><plaintext></plaintext> 39 | #errors 40 | #document 41 | | <!DOCTYPE html> 42 | | <html> 43 | | <head> 44 | | <body> 45 | | <plaintext> 46 | | "</plaintext>" 47 | | <table> 48 | | <tbody> 49 | | <tr> 50 | 51 | #data 52 | <!doctype html><table><tbody><tr><plaintext></plaintext> 53 | #errors 54 | #document 55 | | <!DOCTYPE html> 56 | | <html> 57 | | <head> 58 | | <body> 59 | | <plaintext> 60 | | "</plaintext>" 61 | | <table> 62 | | <tbody> 63 | | <tr> 64 | 65 | #data 66 | <!doctype html><table><td><plaintext></plaintext> 67 | #errors 68 | #document 69 | | <!DOCTYPE html> 70 | | <html> 71 | | <head> 72 | | <body> 73 | | <table> 74 | | <tbody> 75 | | <tr> 76 | | <td> 77 | | <plaintext> 78 | | "</plaintext>" 79 | 80 | #data 81 | <!doctype html><table><caption><plaintext></plaintext> 82 | #errors 83 | #document 84 | | <!DOCTYPE html> 85 | | <html> 86 | | <head> 87 | | <body> 88 | | <table> 89 | | <caption> 90 | | <plaintext> 91 | | "</plaintext>" 92 | 93 | #data 94 | <!doctype html><table><tr><style></script></style>abc 95 | #errors 96 | #document 97 | | <!DOCTYPE html> 98 | | <html> 99 | | <head> 100 | | <body> 101 | | "abc" 102 | | <table> 103 | | <tbody> 104 | | <tr> 105 | | <style> 106 | | "</script>" 107 | 108 | #data 109 | <!doctype html><table><tr><script></style></script>abc 110 | #errors 111 | #document 112 | | <!DOCTYPE html> 113 | | <html> 114 | | <head> 115 | | <body> 116 | | "abc" 117 | | <table> 118 | | <tbody> 119 | | <tr> 120 | | <script> 121 | | "</style>" 122 | 123 | #data 124 | <!doctype html><table><caption><style></script></style>abc 125 | #errors 126 | #document 127 | | <!DOCTYPE html> 128 | | <html> 129 | | <head> 130 | | <body> 131 | | <table> 132 | | <caption> 133 | | <style> 134 | | "</script>" 135 | | "abc" 136 | 137 | #data 138 | <!doctype html><table><td><style></script></style>abc 139 | #errors 140 | #document 141 | | <!DOCTYPE html> 142 | | <html> 143 | | <head> 144 | | <body> 145 | | <table> 146 | | <tbody> 147 | | <tr> 148 | | <td> 149 | | <style> 150 | | "</script>" 151 | | "abc" 152 | 153 | #data 154 | <!doctype html><select><script></style></script>abc 155 | #errors 156 | #document 157 | | <!DOCTYPE html> 158 | | <html> 159 | | <head> 160 | | <body> 161 | | <select> 162 | | <script> 163 | | "</style>" 164 | | "abc" 165 | 166 | #data 167 | <!doctype html><table><select><script></style></script>abc 168 | #errors 169 | #document 170 | | <!DOCTYPE html> 171 | | <html> 172 | | <head> 173 | | <body> 174 | | <select> 175 | | <script> 176 | | "</style>" 177 | | "abc" 178 | | <table> 179 | 180 | #data 181 | <!doctype html><table><tr><select><script></style></script>abc 182 | #errors 183 | #document 184 | | <!DOCTYPE html> 185 | | <html> 186 | | <head> 187 | | <body> 188 | | <select> 189 | | <script> 190 | | "</style>" 191 | | "abc" 192 | | <table> 193 | | <tbody> 194 | | <tr> 195 | 196 | #data 197 | <!doctype html><frameset></frameset><noframes>abc 198 | #errors 199 | #document 200 | | <!DOCTYPE html> 201 | | <html> 202 | | <head> 203 | | <frameset> 204 | | <noframes> 205 | | "abc" 206 | 207 | #data 208 | <!doctype html><frameset></frameset><noframes>abc</noframes><!--abc--> 209 | #errors 210 | #document 211 | | <!DOCTYPE html> 212 | | <html> 213 | | <head> 214 | | <frameset> 215 | | <noframes> 216 | | "abc" 217 | | <!-- abc --> 218 | 219 | #data 220 | <!doctype html><frameset></frameset></html><noframes>abc 221 | #errors 222 | #document 223 | | <!DOCTYPE html> 224 | | <html> 225 | | <head> 226 | | <frameset> 227 | | <noframes> 228 | | "abc" 229 | 230 | #data 231 | <!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc--> 232 | #errors 233 | #document 234 | | <!DOCTYPE html> 235 | | <html> 236 | | <head> 237 | | <frameset> 238 | | <noframes> 239 | | "abc" 240 | | <!-- abc --> 241 | 242 | #data 243 | <!doctype html><table><tr></tbody><tfoot> 244 | #errors 245 | #document 246 | | <!DOCTYPE html> 247 | | <html> 248 | | <head> 249 | | <body> 250 | | <table> 251 | | <tbody> 252 | | <tr> 253 | | <tfoot> 254 | 255 | #data 256 | <!doctype html><table><td><svg></svg>abc<td> 257 | #errors 258 | #document 259 | | <!DOCTYPE html> 260 | | <html> 261 | | <head> 262 | | <body> 263 | | <table> 264 | | <tbody> 265 | | <tr> 266 | | <td> 267 | | <svg svg> 268 | | "abc" 269 | | <td> 270 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests20.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!doctype html><p><button><button> 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <p> 10 | | <button> 11 | | <button> 12 | 13 | #data 14 | <!doctype html><p><button><address> 15 | #errors 16 | #document 17 | | <!DOCTYPE html> 18 | | <html> 19 | | <head> 20 | | <body> 21 | | <p> 22 | | <button> 23 | | <address> 24 | 25 | #data 26 | <!doctype html><p><button><blockquote> 27 | #errors 28 | #document 29 | | <!DOCTYPE html> 30 | | <html> 31 | | <head> 32 | | <body> 33 | | <p> 34 | | <button> 35 | | <blockquote> 36 | 37 | #data 38 | <!doctype html><p><button><menu> 39 | #errors 40 | #document 41 | | <!DOCTYPE html> 42 | | <html> 43 | | <head> 44 | | <body> 45 | | <p> 46 | | <button> 47 | | <menu> 48 | 49 | #data 50 | <!doctype html><p><button><p> 51 | #errors 52 | #document 53 | | <!DOCTYPE html> 54 | | <html> 55 | | <head> 56 | | <body> 57 | | <p> 58 | | <button> 59 | | <p> 60 | 61 | #data 62 | <!doctype html><p><button><ul> 63 | #errors 64 | #document 65 | | <!DOCTYPE html> 66 | | <html> 67 | | <head> 68 | | <body> 69 | | <p> 70 | | <button> 71 | | <ul> 72 | 73 | #data 74 | <!doctype html><p><button><h1> 75 | #errors 76 | #document 77 | | <!DOCTYPE html> 78 | | <html> 79 | | <head> 80 | | <body> 81 | | <p> 82 | | <button> 83 | | <h1> 84 | 85 | #data 86 | <!doctype html><p><button><h6> 87 | #errors 88 | #document 89 | | <!DOCTYPE html> 90 | | <html> 91 | | <head> 92 | | <body> 93 | | <p> 94 | | <button> 95 | | <h6> 96 | 97 | #data 98 | <!doctype html><p><button><listing> 99 | #errors 100 | #document 101 | | <!DOCTYPE html> 102 | | <html> 103 | | <head> 104 | | <body> 105 | | <p> 106 | | <button> 107 | | <listing> 108 | 109 | #data 110 | <!doctype html><p><button><pre> 111 | #errors 112 | #document 113 | | <!DOCTYPE html> 114 | | <html> 115 | | <head> 116 | | <body> 117 | | <p> 118 | | <button> 119 | | <pre> 120 | 121 | #data 122 | <!doctype html><p><button><form> 123 | #errors 124 | #document 125 | | <!DOCTYPE html> 126 | | <html> 127 | | <head> 128 | | <body> 129 | | <p> 130 | | <button> 131 | | <form> 132 | 133 | #data 134 | <!doctype html><p><button><li> 135 | #errors 136 | #document 137 | | <!DOCTYPE html> 138 | | <html> 139 | | <head> 140 | | <body> 141 | | <p> 142 | | <button> 143 | | <li> 144 | 145 | #data 146 | <!doctype html><p><button><dd> 147 | #errors 148 | #document 149 | | <!DOCTYPE html> 150 | | <html> 151 | | <head> 152 | | <body> 153 | | <p> 154 | | <button> 155 | | <dd> 156 | 157 | #data 158 | <!doctype html><p><button><dt> 159 | #errors 160 | #document 161 | | <!DOCTYPE html> 162 | | <html> 163 | | <head> 164 | | <body> 165 | | <p> 166 | | <button> 167 | | <dt> 168 | 169 | #data 170 | <!doctype html><p><button><plaintext> 171 | #errors 172 | #document 173 | | <!DOCTYPE html> 174 | | <html> 175 | | <head> 176 | | <body> 177 | | <p> 178 | | <button> 179 | | <plaintext> 180 | 181 | #data 182 | <!doctype html><p><button><table> 183 | #errors 184 | #document 185 | | <!DOCTYPE html> 186 | | <html> 187 | | <head> 188 | | <body> 189 | | <p> 190 | | <button> 191 | | <table> 192 | 193 | #data 194 | <!doctype html><p><button><hr> 195 | #errors 196 | #document 197 | | <!DOCTYPE html> 198 | | <html> 199 | | <head> 200 | | <body> 201 | | <p> 202 | | <button> 203 | | <hr> 204 | 205 | #data 206 | <!doctype html><p><button><xmp> 207 | #errors 208 | #document 209 | | <!DOCTYPE html> 210 | | <html> 211 | | <head> 212 | | <body> 213 | | <p> 214 | | <button> 215 | | <xmp> 216 | 217 | #data 218 | <!doctype html><p><button></p> 219 | #errors 220 | #document 221 | | <!DOCTYPE html> 222 | | <html> 223 | | <head> 224 | | <body> 225 | | <p> 226 | | <button> 227 | | <p> 228 | 229 | #data 230 | <!doctype html><address><button></address>a 231 | #errors 232 | #document 233 | | <!DOCTYPE html> 234 | | <html> 235 | | <head> 236 | | <body> 237 | | <address> 238 | | <button> 239 | | "a" 240 | 241 | #data 242 | <!doctype html><address><button></address>a 243 | #errors 244 | #document 245 | | <!DOCTYPE html> 246 | | <html> 247 | | <head> 248 | | <body> 249 | | <address> 250 | | <button> 251 | | "a" 252 | 253 | #data 254 | <p><table></p> 255 | #errors 256 | #document 257 | | <html> 258 | | <head> 259 | | <body> 260 | | <p> 261 | | <p> 262 | | <table> 263 | 264 | #data 265 | <!doctype html><svg> 266 | #errors 267 | #document 268 | | <!DOCTYPE html> 269 | | <html> 270 | | <head> 271 | | <body> 272 | | <svg svg> 273 | 274 | #data 275 | <!doctype html><p><figcaption> 276 | #errors 277 | #document 278 | | <!DOCTYPE html> 279 | | <html> 280 | | <head> 281 | | <body> 282 | | <p> 283 | | <figcaption> 284 | 285 | #data 286 | <!doctype html><p><summary> 287 | #errors 288 | #document 289 | | <!DOCTYPE html> 290 | | <html> 291 | | <head> 292 | | <body> 293 | | <p> 294 | | <summary> 295 | 296 | #data 297 | <!doctype html><form><table><form> 298 | #errors 299 | #document 300 | | <!DOCTYPE html> 301 | | <html> 302 | | <head> 303 | | <body> 304 | | <form> 305 | | <table> 306 | 307 | #data 308 | <!doctype html><table><form><form> 309 | #errors 310 | #document 311 | | <!DOCTYPE html> 312 | | <html> 313 | | <head> 314 | | <body> 315 | | <table> 316 | | <form> 317 | 318 | #data 319 | <!doctype html><table><form></table><form> 320 | #errors 321 | #document 322 | | <!DOCTYPE html> 323 | | <html> 324 | | <head> 325 | | <body> 326 | | <table> 327 | | <form> 328 | 329 | #data 330 | <!doctype html><svg><foreignObject><p> 331 | #errors 332 | #document 333 | | <!DOCTYPE html> 334 | | <html> 335 | | <head> 336 | | <body> 337 | | <svg svg> 338 | | <svg foreignObject> 339 | | <p> 340 | 341 | #data 342 | <!doctype html><svg><title>abc 343 | #errors 344 | #document 345 | | <!DOCTYPE html> 346 | | <html> 347 | | <head> 348 | | <body> 349 | | <svg svg> 350 | | <svg title> 351 | | "abc" 352 | 353 | #data 354 | <option><span><option> 355 | #errors 356 | #document 357 | | <html> 358 | | <head> 359 | | <body> 360 | | <option> 361 | | <span> 362 | | <option> 363 | 364 | #data 365 | <option><option> 366 | #errors 367 | #document 368 | | <html> 369 | | <head> 370 | | <body> 371 | | <option> 372 | | <option> 373 | 374 | #data 375 | <math><annotation-xml><div> 376 | #errors 377 | #document 378 | | <html> 379 | | <head> 380 | | <body> 381 | | <mathml math> 382 | | <mathml annotation-xml> 383 | | <div> 384 | 385 | #data 386 | <math><annotation-xml encoding="application/svg+xml"><div> 387 | #errors 388 | #document 389 | | <html> 390 | | <head> 391 | | <body> 392 | | <mathml math> 393 | | <mathml annotation-xml> 394 | | encoding="application/svg+xml" 395 | | <div> 396 | 397 | #data 398 | <math><annotation-xml encoding="application/xhtml+xml"><div> 399 | #errors 400 | #document 401 | | <html> 402 | | <head> 403 | | <body> 404 | | <mathml math> 405 | | <mathml annotation-xml> 406 | | encoding="application/xhtml+xml" 407 | | <div> 408 | 409 | #data 410 | <math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div> 411 | #errors 412 | #document 413 | | <html> 414 | | <head> 415 | | <body> 416 | | <mathml math> 417 | | <mathml annotation-xml> 418 | | encoding="aPPlication/xhtmL+xMl" 419 | | <div> 420 | 421 | #data 422 | <math><annotation-xml encoding="text/html"><div> 423 | #errors 424 | #document 425 | | <html> 426 | | <head> 427 | | <body> 428 | | <mathml math> 429 | | <mathml annotation-xml> 430 | | encoding="text/html" 431 | | <div> 432 | 433 | #data 434 | <math><annotation-xml encoding="Text/htmL"><div> 435 | #errors 436 | #document 437 | | <html> 438 | | <head> 439 | | <body> 440 | | <mathml math> 441 | | <mathml annotation-xml> 442 | | encoding="Text/htmL" 443 | | <div> 444 | 445 | #data 446 | <math><annotation-xml encoding=" text/html "><div> 447 | #errors 448 | #document 449 | | <html> 450 | | <head> 451 | | <body> 452 | | <mathml math> 453 | | <mathml annotation-xml> 454 | | encoding=" text/html " 455 | | <div> 456 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests21.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <svg><![CDATA[foo]]> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <svg svg> 9 | | "foo" 10 | 11 | #data 12 | <math><![CDATA[foo]]> 13 | #errors 14 | #document 15 | | <html> 16 | | <head> 17 | | <body> 18 | | <mathml math> 19 | | "foo" 20 | 21 | #data 22 | <div><![CDATA[foo]]> 23 | #errors 24 | #document 25 | | <html> 26 | | <head> 27 | | <body> 28 | | <div> 29 | | <!-- [CDATA[foo]] --> 30 | 31 | #data 32 | <svg><![CDATA[foo 33 | #errors 34 | #document 35 | | <html> 36 | | <head> 37 | | <body> 38 | | <svg svg> 39 | | "foo" 40 | 41 | #data 42 | <svg><![CDATA[foo 43 | #errors 44 | #document 45 | | <html> 46 | | <head> 47 | | <body> 48 | | <svg svg> 49 | | "foo" 50 | 51 | #data 52 | <svg><![CDATA[ 53 | #errors 54 | #document 55 | | <html> 56 | | <head> 57 | | <body> 58 | | <svg svg> 59 | 60 | #data 61 | <svg><![CDATA[]]> 62 | #errors 63 | #document 64 | | <html> 65 | | <head> 66 | | <body> 67 | | <svg svg> 68 | 69 | #data 70 | <svg><![CDATA[]] >]]> 71 | #errors 72 | #document 73 | | <html> 74 | | <head> 75 | | <body> 76 | | <svg svg> 77 | | "]] >" 78 | 79 | #data 80 | <svg><![CDATA[]] >]]> 81 | #errors 82 | #document 83 | | <html> 84 | | <head> 85 | | <body> 86 | | <svg svg> 87 | | "]] >" 88 | 89 | #data 90 | <svg><![CDATA[]] 91 | #errors 92 | #document 93 | | <html> 94 | | <head> 95 | | <body> 96 | | <svg svg> 97 | | "]]" 98 | 99 | #data 100 | <svg><![CDATA[] 101 | #errors 102 | #document 103 | | <html> 104 | | <head> 105 | | <body> 106 | | <svg svg> 107 | | "]" 108 | 109 | #data 110 | <svg><![CDATA[]>a 111 | #errors 112 | #document 113 | | <html> 114 | | <head> 115 | | <body> 116 | | <svg svg> 117 | | "]>a" 118 | 119 | #data 120 | <svg><foreignObject><div><![CDATA[foo]]> 121 | #errors 122 | #document 123 | | <html> 124 | | <head> 125 | | <body> 126 | | <svg svg> 127 | | <svg foreignObject> 128 | | <div> 129 | | <!-- [CDATA[foo]] --> 130 | 131 | #data 132 | <svg><![CDATA[<svg>]]> 133 | #errors 134 | #document 135 | | <html> 136 | | <head> 137 | | <body> 138 | | <svg svg> 139 | | "<svg>" 140 | 141 | #data 142 | <svg><![CDATA[</svg>a]]> 143 | #errors 144 | #document 145 | | <html> 146 | | <head> 147 | | <body> 148 | | <svg svg> 149 | | "</svg>a" 150 | 151 | #data 152 | <svg><![CDATA[<svg>a 153 | #errors 154 | #document 155 | | <html> 156 | | <head> 157 | | <body> 158 | | <svg svg> 159 | | "<svg>a" 160 | 161 | #data 162 | <svg><![CDATA[</svg>a 163 | #errors 164 | #document 165 | | <html> 166 | | <head> 167 | | <body> 168 | | <svg svg> 169 | | "</svg>a" 170 | 171 | #data 172 | <svg><![CDATA[<svg>]]><path> 173 | #errors 174 | #document 175 | | <html> 176 | | <head> 177 | | <body> 178 | | <svg svg> 179 | | "<svg>" 180 | | <svg path> 181 | 182 | #data 183 | <svg><![CDATA[<svg>]]></path> 184 | #errors 185 | #document 186 | | <html> 187 | | <head> 188 | | <body> 189 | | <svg svg> 190 | | "<svg>" 191 | 192 | #data 193 | <svg><![CDATA[<svg>]]><!--path--> 194 | #errors 195 | #document 196 | | <html> 197 | | <head> 198 | | <body> 199 | | <svg svg> 200 | | "<svg>" 201 | | <!-- path --> 202 | 203 | #data 204 | <svg><![CDATA[<svg>]]>path 205 | #errors 206 | #document 207 | | <html> 208 | | <head> 209 | | <body> 210 | | <svg svg> 211 | | "<svg>path" 212 | 213 | #data 214 | <svg><![CDATA[<!--svg-->]]> 215 | #errors 216 | #document 217 | | <html> 218 | | <head> 219 | | <body> 220 | | <svg svg> 221 | | "<!--svg-->" 222 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests22.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <a><b><big><em><strong><div>X</a> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <a> 9 | | <b> 10 | | <big> 11 | | <em> 12 | | <strong> 13 | | <big> 14 | | <em> 15 | | <strong> 16 | | <div> 17 | | <a> 18 | | "X" 19 | 20 | #data 21 | <a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a> 22 | #errors 23 | #document 24 | | <html> 25 | | <head> 26 | | <body> 27 | | <a> 28 | | <b> 29 | | <b> 30 | | <div> 31 | | id="1" 32 | | <a> 33 | | <div> 34 | | id="2" 35 | | <a> 36 | | <div> 37 | | id="3" 38 | | <a> 39 | | <div> 40 | | id="4" 41 | | <a> 42 | | <div> 43 | | id="5" 44 | | <a> 45 | | <div> 46 | | id="6" 47 | | <a> 48 | | <div> 49 | | id="7" 50 | | <a> 51 | | <div> 52 | | id="8" 53 | | <a> 54 | | "A" 55 | 56 | #data 57 | <a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a> 58 | #errors 59 | #document 60 | | <html> 61 | | <head> 62 | | <body> 63 | | <a> 64 | | <b> 65 | | <b> 66 | | <div> 67 | | id="1" 68 | | <a> 69 | | <div> 70 | | id="2" 71 | | <a> 72 | | <div> 73 | | id="3" 74 | | <a> 75 | | <div> 76 | | id="4" 77 | | <a> 78 | | <div> 79 | | id="5" 80 | | <a> 81 | | <div> 82 | | id="6" 83 | | <a> 84 | | <div> 85 | | id="7" 86 | | <a> 87 | | <div> 88 | | id="8" 89 | | <a> 90 | | <div> 91 | | id="9" 92 | | "A" 93 | 94 | #data 95 | <a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a> 96 | #errors 97 | #document 98 | | <html> 99 | | <head> 100 | | <body> 101 | | <a> 102 | | <b> 103 | | <b> 104 | | <div> 105 | | id="1" 106 | | <a> 107 | | <div> 108 | | id="2" 109 | | <a> 110 | | <div> 111 | | id="3" 112 | | <a> 113 | | <div> 114 | | id="4" 115 | | <a> 116 | | <div> 117 | | id="5" 118 | | <a> 119 | | <div> 120 | | id="6" 121 | | <a> 122 | | <div> 123 | | id="7" 124 | | <a> 125 | | <div> 126 | | id="8" 127 | | <a> 128 | | <div> 129 | | id="9" 130 | | <div> 131 | | id="10" 132 | | "A" 133 | 134 | #data 135 | <cite><b><cite><i><cite><i><cite><i><div>X</b>TEST 136 | #errors 137 | Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE. 138 | Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm. 139 | Line: 1 Col: 50 Expected closing tag. Unexpected end of file. 140 | #document 141 | | <html> 142 | | <head> 143 | | <body> 144 | | <cite> 145 | | <b> 146 | | <cite> 147 | | <i> 148 | | <cite> 149 | | <i> 150 | | <cite> 151 | | <i> 152 | | <i> 153 | | <i> 154 | | <div> 155 | | <b> 156 | | "X" 157 | | "TEST" 158 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests23.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X 3 | #errors 4 | 3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. 5 | 116: Unclosed elements. 6 | 117: End of file seen and there were open elements. 7 | #document 8 | | <html> 9 | | <head> 10 | | <body> 11 | | <p> 12 | | <font> 13 | | size="4" 14 | | <font> 15 | | color="red" 16 | | <font> 17 | | size="4" 18 | | <font> 19 | | size="4" 20 | | <font> 21 | | size="4" 22 | | <font> 23 | | size="4" 24 | | <font> 25 | | size="4" 26 | | <font> 27 | | color="red" 28 | | <p> 29 | | <font> 30 | | color="red" 31 | | <font> 32 | | size="4" 33 | | <font> 34 | | size="4" 35 | | <font> 36 | | size="4" 37 | | <font> 38 | | color="red" 39 | | "X" 40 | 41 | #data 42 | <p><font size=4><font size=4><font size=4><font size=4><p>X 43 | #errors 44 | #document 45 | | <html> 46 | | <head> 47 | | <body> 48 | | <p> 49 | | <font> 50 | | size="4" 51 | | <font> 52 | | size="4" 53 | | <font> 54 | | size="4" 55 | | <font> 56 | | size="4" 57 | | <p> 58 | | <font> 59 | | size="4" 60 | | <font> 61 | | size="4" 62 | | <font> 63 | | size="4" 64 | | "X" 65 | 66 | #data 67 | <p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X 68 | #errors 69 | #document 70 | | <html> 71 | | <head> 72 | | <body> 73 | | <p> 74 | | <font> 75 | | size="4" 76 | | <font> 77 | | size="4" 78 | | <font> 79 | | size="4" 80 | | <font> 81 | | size="5" 82 | | <font> 83 | | size="4" 84 | | <p> 85 | | <font> 86 | | size="4" 87 | | <font> 88 | | size="4" 89 | | <font> 90 | | size="5" 91 | | <font> 92 | | size="4" 93 | | "X" 94 | 95 | #data 96 | <p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X 97 | #errors 98 | #document 99 | | <html> 100 | | <head> 101 | | <body> 102 | | <p> 103 | | <font> 104 | | id="a" 105 | | size="4" 106 | | <font> 107 | | id="b" 108 | | size="4" 109 | | <font> 110 | | size="4" 111 | | <font> 112 | | size="4" 113 | | <p> 114 | | <font> 115 | | id="a" 116 | | size="4" 117 | | <font> 118 | | id="b" 119 | | size="4" 120 | | <font> 121 | | size="4" 122 | | <font> 123 | | size="4" 124 | | "X" 125 | 126 | #data 127 | <p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y 128 | #errors 129 | #document 130 | | <html> 131 | | <head> 132 | | <body> 133 | | <p> 134 | | <b> 135 | | id="a" 136 | | <b> 137 | | id="a" 138 | | <b> 139 | | id="a" 140 | | <b> 141 | | <object> 142 | | <b> 143 | | id="a" 144 | | <b> 145 | | id="a" 146 | | "X" 147 | | <p> 148 | | <b> 149 | | id="a" 150 | | <b> 151 | | id="a" 152 | | <b> 153 | | id="a" 154 | | <b> 155 | | "Y" 156 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests24.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!DOCTYPE html>&NotEqualTilde; 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | "≂̸" 10 | 11 | #data 12 | <!DOCTYPE html>&NotEqualTilde;A 13 | #errors 14 | #document 15 | | <!DOCTYPE html> 16 | | <html> 17 | | <head> 18 | | <body> 19 | | "≂̸A" 20 | 21 | #data 22 | <!DOCTYPE html>&ThickSpace; 23 | #errors 24 | #document 25 | | <!DOCTYPE html> 26 | | <html> 27 | | <head> 28 | | <body> 29 | | "  " 30 | 31 | #data 32 | <!DOCTYPE html>&ThickSpace;A 33 | #errors 34 | #document 35 | | <!DOCTYPE html> 36 | | <html> 37 | | <head> 38 | | <body> 39 | | "  A" 40 | 41 | #data 42 | <!DOCTYPE html>&NotSubset; 43 | #errors 44 | #document 45 | | <!DOCTYPE html> 46 | | <html> 47 | | <head> 48 | | <body> 49 | | "⊂⃒" 50 | 51 | #data 52 | <!DOCTYPE html>&NotSubset;A 53 | #errors 54 | #document 55 | | <!DOCTYPE html> 56 | | <html> 57 | | <head> 58 | | <body> 59 | | "⊂⃒A" 60 | 61 | #data 62 | <!DOCTYPE html>&Gopf; 63 | #errors 64 | #document 65 | | <!DOCTYPE html> 66 | | <html> 67 | | <head> 68 | | <body> 69 | | "𝔾" 70 | 71 | #data 72 | <!DOCTYPE html>&Gopf;A 73 | #errors 74 | #document 75 | | <!DOCTYPE html> 76 | | <html> 77 | | <head> 78 | | <body> 79 | | "𝔾A" 80 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests25.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!DOCTYPE html><body><foo>A 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <foo> 10 | | "A" 11 | 12 | #data 13 | <!DOCTYPE html><body><area>A 14 | #errors 15 | #document 16 | | <!DOCTYPE html> 17 | | <html> 18 | | <head> 19 | | <body> 20 | | <area> 21 | | "A" 22 | 23 | #data 24 | <!DOCTYPE html><body><base>A 25 | #errors 26 | #document 27 | | <!DOCTYPE html> 28 | | <html> 29 | | <head> 30 | | <body> 31 | | <base> 32 | | "A" 33 | 34 | #data 35 | <!DOCTYPE html><body><basefont>A 36 | #errors 37 | #document 38 | | <!DOCTYPE html> 39 | | <html> 40 | | <head> 41 | | <body> 42 | | <basefont> 43 | | "A" 44 | 45 | #data 46 | <!DOCTYPE html><body><bgsound>A 47 | #errors 48 | #document 49 | | <!DOCTYPE html> 50 | | <html> 51 | | <head> 52 | | <body> 53 | | <bgsound> 54 | | "A" 55 | 56 | #data 57 | <!DOCTYPE html><body><br>A 58 | #errors 59 | #document 60 | | <!DOCTYPE html> 61 | | <html> 62 | | <head> 63 | | <body> 64 | | <br> 65 | | "A" 66 | 67 | #data 68 | <!DOCTYPE html><body><col>A 69 | #errors 70 | 26: Stray start tag “col”. 71 | #document 72 | | <!DOCTYPE html> 73 | | <html> 74 | | <head> 75 | | <body> 76 | | "A" 77 | 78 | #data 79 | <!DOCTYPE html><body><command>A 80 | #errors 81 | #document 82 | | <!DOCTYPE html> 83 | | <html> 84 | | <head> 85 | | <body> 86 | | <command> 87 | | "A" 88 | 89 | #data 90 | <!DOCTYPE html><body><embed>A 91 | #errors 92 | #document 93 | | <!DOCTYPE html> 94 | | <html> 95 | | <head> 96 | | <body> 97 | | <embed> 98 | | "A" 99 | 100 | #data 101 | <!DOCTYPE html><body><frame>A 102 | #errors 103 | 26: Stray start tag “frame”. 104 | #document 105 | | <!DOCTYPE html> 106 | | <html> 107 | | <head> 108 | | <body> 109 | | "A" 110 | 111 | #data 112 | <!DOCTYPE html><body><hr>A 113 | #errors 114 | #document 115 | | <!DOCTYPE html> 116 | | <html> 117 | | <head> 118 | | <body> 119 | | <hr> 120 | | "A" 121 | 122 | #data 123 | <!DOCTYPE html><body><img>A 124 | #errors 125 | #document 126 | | <!DOCTYPE html> 127 | | <html> 128 | | <head> 129 | | <body> 130 | | <img> 131 | | "A" 132 | 133 | #data 134 | <!DOCTYPE html><body><input>A 135 | #errors 136 | #document 137 | | <!DOCTYPE html> 138 | | <html> 139 | | <head> 140 | | <body> 141 | | <input> 142 | | "A" 143 | 144 | #data 145 | <!DOCTYPE html><body><keygen>A 146 | #errors 147 | #document 148 | | <!DOCTYPE html> 149 | | <html> 150 | | <head> 151 | | <body> 152 | | <keygen> 153 | | "A" 154 | 155 | #data 156 | <!DOCTYPE html><body><link>A 157 | #errors 158 | #document 159 | | <!DOCTYPE html> 160 | | <html> 161 | | <head> 162 | | <body> 163 | | <link> 164 | | "A" 165 | 166 | #data 167 | <!DOCTYPE html><body><meta>A 168 | #errors 169 | #document 170 | | <!DOCTYPE html> 171 | | <html> 172 | | <head> 173 | | <body> 174 | | <meta> 175 | | "A" 176 | 177 | #data 178 | <!DOCTYPE html><body><param>A 179 | #errors 180 | #document 181 | | <!DOCTYPE html> 182 | | <html> 183 | | <head> 184 | | <body> 185 | | <param> 186 | | "A" 187 | 188 | #data 189 | <!DOCTYPE html><body><source>A 190 | #errors 191 | #document 192 | | <!DOCTYPE html> 193 | | <html> 194 | | <head> 195 | | <body> 196 | | <source> 197 | | "A" 198 | 199 | #data 200 | <!DOCTYPE html><body><track>A 201 | #errors 202 | #document 203 | | <!DOCTYPE html> 204 | | <html> 205 | | <head> 206 | | <body> 207 | | <track> 208 | | "A" 209 | 210 | #data 211 | <!DOCTYPE html><body><wbr>A 212 | #errors 213 | #document 214 | | <!DOCTYPE html> 215 | | <html> 216 | | <head> 217 | | <body> 218 | | <wbr> 219 | | "A" 220 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests26.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a> 3 | #errors 4 | #document 5 | | <!DOCTYPE html> 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <a> 10 | | href="#1" 11 | | <nobr> 12 | | "1" 13 | | <nobr> 14 | | <nobr> 15 | | <br> 16 | | <a> 17 | | href="#2" 18 | | <a> 19 | | href="#2" 20 | | <nobr> 21 | | "2" 22 | | <nobr> 23 | | <nobr> 24 | | <br> 25 | | <a> 26 | | href="#3" 27 | | <a> 28 | | href="#3" 29 | | <nobr> 30 | | "3" 31 | | <nobr> 32 | 33 | #data 34 | <!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3 35 | #errors 36 | #document 37 | | <!DOCTYPE html> 38 | | <html> 39 | | <head> 40 | | <body> 41 | | <b> 42 | | <nobr> 43 | | "1" 44 | | <nobr> 45 | | <nobr> 46 | | <i> 47 | | <i> 48 | | <nobr> 49 | | "2" 50 | | <nobr> 51 | | <nobr> 52 | | "3" 53 | 54 | #data 55 | <!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3 56 | #errors 57 | #document 58 | | <!DOCTYPE html> 59 | | <html> 60 | | <head> 61 | | <body> 62 | | <b> 63 | | <nobr> 64 | | "1" 65 | | <nobr> 66 | | <i> 67 | | <i> 68 | | <nobr> 69 | | "2" 70 | | <nobr> 71 | | <nobr> 72 | | "3" 73 | | <table> 74 | 75 | #data 76 | <!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3 77 | #errors 78 | #document 79 | | <!DOCTYPE html> 80 | | <html> 81 | | <head> 82 | | <body> 83 | | <b> 84 | | <nobr> 85 | | "1" 86 | | <table> 87 | | <tbody> 88 | | <tr> 89 | | <td> 90 | | <nobr> 91 | | <i> 92 | | <i> 93 | | <nobr> 94 | | "2" 95 | | <nobr> 96 | | <nobr> 97 | | "3" 98 | 99 | #data 100 | <!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3 101 | #errors 102 | #document 103 | | <!DOCTYPE html> 104 | | <html> 105 | | <head> 106 | | <body> 107 | | <b> 108 | | <nobr> 109 | | "1" 110 | | <div> 111 | | <b> 112 | | <nobr> 113 | | <nobr> 114 | | <nobr> 115 | | <i> 116 | | <i> 117 | | <nobr> 118 | | "2" 119 | | <nobr> 120 | | <nobr> 121 | | "3" 122 | 123 | #data 124 | <!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3 125 | #errors 126 | #document 127 | | <!DOCTYPE html> 128 | | <html> 129 | | <head> 130 | | <body> 131 | | <b> 132 | | <nobr> 133 | | "1" 134 | | <nobr> 135 | | <div> 136 | | <nobr> 137 | | <i> 138 | | <i> 139 | | <nobr> 140 | | "2" 141 | | <nobr> 142 | | <nobr> 143 | | "3" 144 | 145 | #data 146 | <!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr> 147 | #errors 148 | #document 149 | | <!DOCTYPE html> 150 | | <html> 151 | | <head> 152 | | <body> 153 | | <b> 154 | | <nobr> 155 | | "1" 156 | | <nobr> 157 | | <ins> 158 | | <nobr> 159 | | <i> 160 | | <i> 161 | | <nobr> 162 | 163 | #data 164 | <!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2 165 | #errors 166 | #document 167 | | <!DOCTYPE html> 168 | | <html> 169 | | <head> 170 | | <body> 171 | | <b> 172 | | <nobr> 173 | | "1" 174 | | <ins> 175 | | <nobr> 176 | | <nobr> 177 | | <i> 178 | | "2" 179 | 180 | #data 181 | <!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i> 182 | #errors 183 | #document 184 | | <!DOCTYPE html> 185 | | <html> 186 | | <head> 187 | | <body> 188 | | <b> 189 | | "1" 190 | | <nobr> 191 | | <nobr> 192 | | <i> 193 | | <i> 194 | | <nobr> 195 | | "2" 196 | 197 | #data 198 | <p><code x</code></p> 199 | 200 | #errors 201 | #document 202 | | <html> 203 | | <head> 204 | | <body> 205 | | <p> 206 | | <code> 207 | | code="" 208 | | x<="" 209 | | <code> 210 | | code="" 211 | | x<="" 212 | | " 213 | " 214 | 215 | #data 216 | <!DOCTYPE html><svg><foreignObject><p><i></p>a 217 | #errors 218 | 45: End tag “p” seen, but there were open elements. 219 | 41: Unclosed element “i”. 220 | 46: End of file seen and there were open elements. 221 | 35: Unclosed element “foreignObject”. 222 | 20: Unclosed element “svg”. 223 | #document 224 | | <!DOCTYPE html> 225 | | <html> 226 | | <head> 227 | | <body> 228 | | <svg svg> 229 | | <svg foreignObject> 230 | | <p> 231 | | <i> 232 | | <i> 233 | | "a" 234 | 235 | #data 236 | <!DOCTYPE html><table><tr><td><svg><foreignObject><p><i></p>a 237 | #errors 238 | 56: End tag “p” seen, but there were open elements. 239 | 52: Unclosed element “i”. 240 | 57: End of file seen and there were open elements. 241 | 46: Unclosed element “foreignObject”. 242 | 31: Unclosed element “svg”. 243 | 22: Unclosed element “table”. 244 | #document 245 | | <!DOCTYPE html> 246 | | <html> 247 | | <head> 248 | | <body> 249 | | <table> 250 | | <tbody> 251 | | <tr> 252 | | <td> 253 | | <svg svg> 254 | | <svg foreignObject> 255 | | <p> 256 | | <i> 257 | | <i> 258 | | "a" 259 | 260 | #data 261 | <!DOCTYPE html><math><mtext><p><i></p>a 262 | #errors 263 | 38: End tag “p” seen, but there were open elements. 264 | 34: Unclosed element “i”. 265 | 39: End of file in a foreign namespace context. 266 | #document 267 | | <!DOCTYPE html> 268 | | <html> 269 | | <head> 270 | | <body> 271 | | <mathml math> 272 | | <mathml mtext> 273 | | <p> 274 | | <i> 275 | | <i> 276 | | "a" 277 | 278 | #data 279 | <!DOCTYPE html><table><tr><td><math><mtext><p><i></p>a 280 | #errors 281 | 53: End tag “p” seen, but there were open elements. 282 | 49: Unclosed element “i”. 283 | 54: End of file in a foreign namespace context. 284 | #document 285 | | <!DOCTYPE html> 286 | | <html> 287 | | <head> 288 | | <body> 289 | | <table> 290 | | <tbody> 291 | | <tr> 292 | | <td> 293 | | <mathml math> 294 | | <mathml mtext> 295 | | <p> 296 | | <i> 297 | | <i> 298 | | "a" 299 | 300 | #data 301 | <!DOCTYPE html><body><div><!/div>a 302 | #errors 303 | 29: Bogus comment. 304 | 34: End of file seen and there were open elements. 305 | 26: Unclosed element “div”. 306 | #document 307 | | <!DOCTYPE html> 308 | | <html> 309 | | <head> 310 | | <body> 311 | | <div> 312 | | <!-- /div --> 313 | | "a" 314 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests3.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <head></head><style></style> 3 | #errors 4 | Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. 5 | Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved. 6 | #document 7 | | <html> 8 | | <head> 9 | | <style> 10 | | <body> 11 | 12 | #data 13 | <head></head><script></script> 14 | #errors 15 | Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. 16 | Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved. 17 | #document 18 | | <html> 19 | | <head> 20 | | <script> 21 | | <body> 22 | 23 | #data 24 | <head></head><!-- --><style></style><!-- --><script></script> 25 | #errors 26 | Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. 27 | Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved. 28 | #document 29 | | <html> 30 | | <head> 31 | | <style> 32 | | <script> 33 | | <!-- --> 34 | | <!-- --> 35 | | <body> 36 | 37 | #data 38 | <head></head><!-- -->x<style></style><!-- --><script></script> 39 | #errors 40 | Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. 41 | #document 42 | | <html> 43 | | <head> 44 | | <!-- --> 45 | | <body> 46 | | "x" 47 | | <style> 48 | | <!-- --> 49 | | <script> 50 | 51 | #data 52 | <!DOCTYPE html><html><head></head><body><pre> 53 | </pre></body></html> 54 | #errors 55 | #document 56 | | <!DOCTYPE html> 57 | | <html> 58 | | <head> 59 | | <body> 60 | | <pre> 61 | 62 | #data 63 | <!DOCTYPE html><html><head></head><body><pre> 64 | foo</pre></body></html> 65 | #errors 66 | #document 67 | | <!DOCTYPE html> 68 | | <html> 69 | | <head> 70 | | <body> 71 | | <pre> 72 | | "foo" 73 | 74 | #data 75 | <!DOCTYPE html><html><head></head><body><pre> 76 | 77 | foo</pre></body></html> 78 | #errors 79 | #document 80 | | <!DOCTYPE html> 81 | | <html> 82 | | <head> 83 | | <body> 84 | | <pre> 85 | | " 86 | foo" 87 | 88 | #data 89 | <!DOCTYPE html><html><head></head><body><pre> 90 | foo 91 | </pre></body></html> 92 | #errors 93 | #document 94 | | <!DOCTYPE html> 95 | | <html> 96 | | <head> 97 | | <body> 98 | | <pre> 99 | | "foo 100 | " 101 | 102 | #data 103 | <!DOCTYPE html><html><head></head><body><pre>x</pre><span> 104 | </span></body></html> 105 | #errors 106 | #document 107 | | <!DOCTYPE html> 108 | | <html> 109 | | <head> 110 | | <body> 111 | | <pre> 112 | | "x" 113 | | <span> 114 | | " 115 | " 116 | 117 | #data 118 | <!DOCTYPE html><html><head></head><body><pre>x 119 | y</pre></body></html> 120 | #errors 121 | #document 122 | | <!DOCTYPE html> 123 | | <html> 124 | | <head> 125 | | <body> 126 | | <pre> 127 | | "x 128 | y" 129 | 130 | #data 131 | <!DOCTYPE html><html><head></head><body><pre>x<div> 132 | y</pre></body></html> 133 | #errors 134 | Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag. 135 | #document 136 | | <!DOCTYPE html> 137 | | <html> 138 | | <head> 139 | | <body> 140 | | <pre> 141 | | "x" 142 | | <div> 143 | | " 144 | y" 145 | 146 | #data 147 | <!DOCTYPE html><pre>&#x0a;&#x0a;A</pre> 148 | #errors 149 | #document 150 | | <!DOCTYPE html> 151 | | <html> 152 | | <head> 153 | | <body> 154 | | <pre> 155 | | " 156 | A" 157 | 158 | #data 159 | <!DOCTYPE html><HTML><META><HEAD></HEAD></HTML> 160 | #errors 161 | Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored. 162 | #document 163 | | <!DOCTYPE html> 164 | | <html> 165 | | <head> 166 | | <meta> 167 | | <body> 168 | 169 | #data 170 | <!DOCTYPE html><HTML><HEAD><head></HEAD></HTML> 171 | #errors 172 | Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored. 173 | #document 174 | | <!DOCTYPE html> 175 | | <html> 176 | | <head> 177 | | <body> 178 | 179 | #data 180 | <textarea>foo<span>bar</span><i>baz 181 | #errors 182 | Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE. 183 | Line: 1 Col: 35 Expected closing tag. Unexpected end of file. 184 | #document 185 | | <html> 186 | | <head> 187 | | <body> 188 | | <textarea> 189 | | "foo<span>bar</span><i>baz" 190 | 191 | #data 192 | <title>foo<span>bar</em><i>baz 193 | #errors 194 | Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. 195 | Line: 1 Col: 30 Unexpected end of file. Expected end tag (title). 196 | #document 197 | | <html> 198 | | <head> 199 | | <title> 200 | | "foo<span>bar</em><i>baz" 201 | | <body> 202 | 203 | #data 204 | <!DOCTYPE html><textarea> 205 | </textarea> 206 | #errors 207 | #document 208 | | <!DOCTYPE html> 209 | | <html> 210 | | <head> 211 | | <body> 212 | | <textarea> 213 | 214 | #data 215 | <!DOCTYPE html><textarea> 216 | foo</textarea> 217 | #errors 218 | #document 219 | | <!DOCTYPE html> 220 | | <html> 221 | | <head> 222 | | <body> 223 | | <textarea> 224 | | "foo" 225 | 226 | #data 227 | <!DOCTYPE html><textarea> 228 | 229 | foo</textarea> 230 | #errors 231 | #document 232 | | <!DOCTYPE html> 233 | | <html> 234 | | <head> 235 | | <body> 236 | | <textarea> 237 | | " 238 | foo" 239 | 240 | #data 241 | <!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html> 242 | #errors 243 | Line: 1 Col: 60 Missing end tag (div, li). 244 | #document 245 | | <!DOCTYPE html> 246 | | <html> 247 | | <head> 248 | | <body> 249 | | <ul> 250 | | <li> 251 | | <div> 252 | | <p> 253 | | <li> 254 | 255 | #data 256 | <!doctype html><nobr><nobr><nobr> 257 | #errors 258 | Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr). 259 | Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr). 260 | Line: 1 Col: 33 Expected closing tag. Unexpected end of file. 261 | #document 262 | | <!DOCTYPE html> 263 | | <html> 264 | | <head> 265 | | <body> 266 | | <nobr> 267 | | <nobr> 268 | | <nobr> 269 | 270 | #data 271 | <!doctype html><nobr><nobr></nobr><nobr> 272 | #errors 273 | Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr). 274 | Line: 1 Col: 40 Expected closing tag. Unexpected end of file. 275 | #document 276 | | <!DOCTYPE html> 277 | | <html> 278 | | <head> 279 | | <body> 280 | | <nobr> 281 | | <nobr> 282 | | <nobr> 283 | 284 | #data 285 | <!doctype html><html><body><p><table></table></body></html> 286 | #errors 287 | Not known 288 | #document 289 | | <!DOCTYPE html> 290 | | <html> 291 | | <head> 292 | | <body> 293 | | <p> 294 | | <table> 295 | 296 | #data 297 | <p><table></table> 298 | #errors 299 | Not known 300 | #document 301 | | <html> 302 | | <head> 303 | | <body> 304 | | <p> 305 | | <table> 306 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests4.dat: -------------------------------------------------------------------------------- 1 | #data 2 | direct div content 3 | #errors 4 | #document-fragment 5 | div 6 | #document 7 | | "direct div content" 8 | 9 | #data 10 | direct textarea content 11 | #errors 12 | #document-fragment 13 | textarea 14 | #document 15 | | "direct textarea content" 16 | 17 | #data 18 | textarea content with <em>pseudo</em> <foo>markup 19 | #errors 20 | #document-fragment 21 | textarea 22 | #document 23 | | "textarea content with <em>pseudo</em> <foo>markup" 24 | 25 | #data 26 | this is &#x0043;DATA inside a <style> element 27 | #errors 28 | #document-fragment 29 | style 30 | #document 31 | | "this is &#x0043;DATA inside a <style> element" 32 | 33 | #data 34 | </plaintext> 35 | #errors 36 | #document-fragment 37 | plaintext 38 | #document 39 | | "</plaintext>" 40 | 41 | #data 42 | setting html's innerHTML 43 | #errors 44 | Line: 1 Col: 24 Unexpected EOF in inner html mode. 45 | #document-fragment 46 | html 47 | #document 48 | | <head> 49 | | <body> 50 | | "setting html's innerHTML" 51 | 52 | #data 53 | <title>setting head's innerHTML</title> 54 | #errors 55 | #document-fragment 56 | head 57 | #document 58 | | <title> 59 | | "setting head's innerHTML" 60 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests5.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <style> <!-- </style>x 3 | #errors 4 | Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. 5 | Line: 1 Col: 22 Unexpected end of file. Expected end tag (style). 6 | #document 7 | | <html> 8 | | <head> 9 | | <style> 10 | | " <!-- " 11 | | <body> 12 | | "x" 13 | 14 | #data 15 | <style> <!-- </style> --> </style>x 16 | #errors 17 | Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. 18 | #document 19 | | <html> 20 | | <head> 21 | | <style> 22 | | " <!-- " 23 | | " " 24 | | <body> 25 | | "--> x" 26 | 27 | #data 28 | <style> <!--> </style>x 29 | #errors 30 | Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. 31 | #document 32 | | <html> 33 | | <head> 34 | | <style> 35 | | " <!--> " 36 | | <body> 37 | | "x" 38 | 39 | #data 40 | <style> <!---> </style>x 41 | #errors 42 | Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. 43 | #document 44 | | <html> 45 | | <head> 46 | | <style> 47 | | " <!---> " 48 | | <body> 49 | | "x" 50 | 51 | #data 52 | <iframe> <!---> </iframe>x 53 | #errors 54 | Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE. 55 | #document 56 | | <html> 57 | | <head> 58 | | <body> 59 | | <iframe> 60 | | " <!---> " 61 | | "x" 62 | 63 | #data 64 | <iframe> <!--- </iframe>->x</iframe> --> </iframe>x 65 | #errors 66 | Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE. 67 | #document 68 | | <html> 69 | | <head> 70 | | <body> 71 | | <iframe> 72 | | " <!--- " 73 | | "->x --> x" 74 | 75 | #data 76 | <script> <!-- </script> --> </script>x 77 | #errors 78 | Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. 79 | #document 80 | | <html> 81 | | <head> 82 | | <script> 83 | | " <!-- " 84 | | " " 85 | | <body> 86 | | "--> x" 87 | 88 | #data 89 | <title> <!-- </title> --> </title>x 90 | #errors 91 | Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. 92 | #document 93 | | <html> 94 | | <head> 95 | | <title> 96 | | " <!-- " 97 | | " " 98 | | <body> 99 | | "--> x" 100 | 101 | #data 102 | <textarea> <!--- </textarea>->x</textarea> --> </textarea>x 103 | #errors 104 | Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE. 105 | #document 106 | | <html> 107 | | <head> 108 | | <body> 109 | | <textarea> 110 | | " <!--- " 111 | | "->x --> x" 112 | 113 | #data 114 | <style> <!</-- </style>x 115 | #errors 116 | Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. 117 | #document 118 | | <html> 119 | | <head> 120 | | <style> 121 | | " <!</-- " 122 | | <body> 123 | | "x" 124 | 125 | #data 126 | <p><xmp></xmp> 127 | #errors 128 | XXX: Unknown 129 | #document 130 | | <html> 131 | | <head> 132 | | <body> 133 | | <p> 134 | | <xmp> 135 | 136 | #data 137 | <xmp> <!-- > --> </xmp> 138 | #errors 139 | Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE. 140 | #document 141 | | <html> 142 | | <head> 143 | | <body> 144 | | <xmp> 145 | | " <!-- > --> " 146 | 147 | #data 148 | <title>&amp;</title> 149 | #errors 150 | Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. 151 | #document 152 | | <html> 153 | | <head> 154 | | <title> 155 | | "&" 156 | | <body> 157 | 158 | #data 159 | <title><!--&amp;--></title> 160 | #errors 161 | Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. 162 | #document 163 | | <html> 164 | | <head> 165 | | <title> 166 | | "<!--&-->" 167 | | <body> 168 | 169 | #data 170 | <title><!--</title> 171 | #errors 172 | Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. 173 | Line: 1 Col: 19 Unexpected end of file. Expected end tag (title). 174 | #document 175 | | <html> 176 | | <head> 177 | | <title> 178 | | "<!--" 179 | | <body> 180 | 181 | #data 182 | <noscript><!--</noscript>--></noscript> 183 | #errors 184 | Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE. 185 | #document 186 | | <html> 187 | | <head> 188 | | <noscript> 189 | | "<!--" 190 | | <body> 191 | | "-->" 192 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tests8.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <div> 3 | <div></div> 4 | </span>x 5 | #errors 6 | Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE. 7 | Line: 3 Col: 7 Unexpected end tag (span). Ignored. 8 | Line: 3 Col: 8 Expected closing tag. Unexpected end of file. 9 | #document 10 | | <html> 11 | | <head> 12 | | <body> 13 | | <div> 14 | | " 15 | " 16 | | <div> 17 | | " 18 | x" 19 | 20 | #data 21 | <div>x<div></div> 22 | </span>x 23 | #errors 24 | Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE. 25 | Line: 2 Col: 7 Unexpected end tag (span). Ignored. 26 | Line: 2 Col: 8 Expected closing tag. Unexpected end of file. 27 | #document 28 | | <html> 29 | | <head> 30 | | <body> 31 | | <div> 32 | | "x" 33 | | <div> 34 | | " 35 | x" 36 | 37 | #data 38 | <div>x<div></div>x</span>x 39 | #errors 40 | Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE. 41 | Line: 1 Col: 25 Unexpected end tag (span). Ignored. 42 | Line: 1 Col: 26 Expected closing tag. Unexpected end of file. 43 | #document 44 | | <html> 45 | | <head> 46 | | <body> 47 | | <div> 48 | | "x" 49 | | <div> 50 | | "xx" 51 | 52 | #data 53 | <div>x<div></div>y</span>z 54 | #errors 55 | Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE. 56 | Line: 1 Col: 25 Unexpected end tag (span). Ignored. 57 | Line: 1 Col: 26 Expected closing tag. Unexpected end of file. 58 | #document 59 | | <html> 60 | | <head> 61 | | <body> 62 | | <div> 63 | | "x" 64 | | <div> 65 | | "yz" 66 | 67 | #data 68 | <table><div>x<div></div>x</span>x 69 | #errors 70 | Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE. 71 | Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode. 72 | Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode. 73 | Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode. 74 | Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode. 75 | Line: 1 Col: 32 Unexpected end tag (span). Ignored. 76 | Line: 1 Col: 33 Unexpected end of file. Expected table content. 77 | #document 78 | | <html> 79 | | <head> 80 | | <body> 81 | | <div> 82 | | "x" 83 | | <div> 84 | | "xx" 85 | | <table> 86 | 87 | #data 88 | x<table>x 89 | #errors 90 | Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE. 91 | Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode. 92 | Line: 1 Col: 9 Unexpected end of file. Expected table content. 93 | #document 94 | | <html> 95 | | <head> 96 | | <body> 97 | | "xx" 98 | | <table> 99 | 100 | #data 101 | x<table><table>x 102 | #errors 103 | Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE. 104 | Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table). 105 | Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode. 106 | Line: 1 Col: 16 Unexpected end of file. Expected table content. 107 | #document 108 | | <html> 109 | | <head> 110 | | <body> 111 | | "x" 112 | | <table> 113 | | "x" 114 | | <table> 115 | 116 | #data 117 | <b>a<div></div><div></b>y 118 | #errors 119 | Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE. 120 | Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm. 121 | Line: 1 Col: 25 Expected closing tag. Unexpected end of file. 122 | #document 123 | | <html> 124 | | <head> 125 | | <body> 126 | | <b> 127 | | "a" 128 | | <div> 129 | | <div> 130 | | <b> 131 | | "y" 132 | 133 | #data 134 | <a><div><p></a> 135 | #errors 136 | Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE. 137 | Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm. 138 | Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm. 139 | Line: 1 Col: 15 Expected closing tag. Unexpected end of file. 140 | #document 141 | | <html> 142 | | <head> 143 | | <body> 144 | | <a> 145 | | <div> 146 | | <a> 147 | | <p> 148 | | <a> 149 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/tricky01.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <b><p>Bold </b> Not bold</p> 3 | Also not bold. 4 | #errors 5 | #document 6 | | <html> 7 | | <head> 8 | | <body> 9 | | <b> 10 | | <p> 11 | | <b> 12 | | "Bold " 13 | | " Not bold" 14 | | " 15 | Also not bold." 16 | 17 | #data 18 | <html> 19 | <font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain 20 | <p>I should not be red. <font color=red>Red. <i>Italic and red.</p> 21 | <p>Italic and red. </i> Red.</font> I should not be red.</p> 22 | <b>Bold <i>Bold and italic</b> Only Italic </i> Plain 23 | #errors 24 | #document 25 | | <html> 26 | | <head> 27 | | <body> 28 | | <font> 29 | | color="red" 30 | | <i> 31 | | "Italic and Red" 32 | | <i> 33 | | <p> 34 | | <font> 35 | | color="red" 36 | | "Italic and Red " 37 | | " Just italic." 38 | | " Italic only." 39 | | " Plain 40 | " 41 | | <p> 42 | | "I should not be red. " 43 | | <font> 44 | | color="red" 45 | | "Red. " 46 | | <i> 47 | | "Italic and red." 48 | | <font> 49 | | color="red" 50 | | <i> 51 | | " 52 | " 53 | | <p> 54 | | <font> 55 | | color="red" 56 | | <i> 57 | | "Italic and red. " 58 | | " Red." 59 | | " I should not be red." 60 | | " 61 | " 62 | | <b> 63 | | "Bold " 64 | | <i> 65 | | "Bold and italic" 66 | | <i> 67 | | " Only Italic " 68 | | " Plain" 69 | 70 | #data 71 | <html><body> 72 | <p><font size="7">First paragraph.</p> 73 | <p>Second paragraph.</p></font> 74 | <b><p><i>Bold and Italic</b> Italic</p> 75 | #errors 76 | #document 77 | | <html> 78 | | <head> 79 | | <body> 80 | | " 81 | " 82 | | <p> 83 | | <font> 84 | | size="7" 85 | | "First paragraph." 86 | | <font> 87 | | size="7" 88 | | " 89 | " 90 | | <p> 91 | | "Second paragraph." 92 | | " 93 | " 94 | | <b> 95 | | <p> 96 | | <b> 97 | | <i> 98 | | "Bold and Italic" 99 | | <i> 100 | | " Italic" 101 | 102 | #data 103 | <html> 104 | <dl> 105 | <dt><b>Boo 106 | <dd>Goo? 107 | </dl> 108 | </html> 109 | #errors 110 | #document 111 | | <html> 112 | | <head> 113 | | <body> 114 | | <dl> 115 | | " 116 | " 117 | | <dt> 118 | | <b> 119 | | "Boo 120 | " 121 | | <dd> 122 | | <b> 123 | | "Goo? 124 | " 125 | | <b> 126 | | " 127 | " 128 | 129 | #data 130 | <html><body> 131 | <label><a><div>Hello<div>World</div></a></label> 132 | </body></html> 133 | #errors 134 | #document 135 | | <html> 136 | | <head> 137 | | <body> 138 | | " 139 | " 140 | | <label> 141 | | <a> 142 | | <div> 143 | | <a> 144 | | "Hello" 145 | | <div> 146 | | "World" 147 | | " 148 | " 149 | 150 | #data 151 | <table><center> <font>a</center> <img> <tr><td> </td> </tr> </table> 152 | #errors 153 | #document 154 | | <html> 155 | | <head> 156 | | <body> 157 | | <center> 158 | | " " 159 | | <font> 160 | | "a" 161 | | <font> 162 | | <img> 163 | | " " 164 | | <table> 165 | | " " 166 | | <tbody> 167 | | <tr> 168 | | <td> 169 | | " " 170 | | " " 171 | | " " 172 | 173 | #data 174 | <table><tr><p><a><p>You should see this text. 175 | #errors 176 | #document 177 | | <html> 178 | | <head> 179 | | <body> 180 | | <p> 181 | | <a> 182 | | <p> 183 | | <a> 184 | | "You should see this text." 185 | | <table> 186 | | <tbody> 187 | | <tr> 188 | 189 | #data 190 | <TABLE> 191 | <TR> 192 | <CENTER><CENTER><TD></TD></TR><TR> 193 | <FONT> 194 | <TABLE><tr></tr></TABLE> 195 | </P> 196 | <a></font><font></a> 197 | This page contains an insanely badly-nested tag sequence. 198 | #errors 199 | #document 200 | | <html> 201 | | <head> 202 | | <body> 203 | | <center> 204 | | <center> 205 | | <font> 206 | | " 207 | " 208 | | <table> 209 | | " 210 | " 211 | | <tbody> 212 | | <tr> 213 | | " 214 | " 215 | | <td> 216 | | <tr> 217 | | " 218 | " 219 | | <table> 220 | | <tbody> 221 | | <tr> 222 | | <font> 223 | | " 224 | " 225 | | <p> 226 | | " 227 | " 228 | | <a> 229 | | <a> 230 | | <font> 231 | | <font> 232 | | " 233 | This page contains an insanely badly-nested tag sequence." 234 | 235 | #data 236 | <html> 237 | <body> 238 | <b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the 239 | nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre> 240 | </body> 241 | </html> 242 | #errors 243 | #document 244 | | <html> 245 | | <head> 246 | | <body> 247 | | " 248 | " 249 | | <b> 250 | | <nobr> 251 | | <div> 252 | | <b> 253 | | <nobr> 254 | | "This text is in a div inside a nobr" 255 | | "More text that should not be in the nobr, i.e., the 256 | nobr should have closed the div inside it implicitly. " 257 | | <pre> 258 | | "A pre tag outside everything else." 259 | | " 260 | 261 | " 262 | -------------------------------------------------------------------------------- /tests/testdata/tree-construction/webkit02.dat: -------------------------------------------------------------------------------- 1 | #data 2 | <foo bar=qux/> 3 | #errors 4 | #document 5 | | <html> 6 | | <head> 7 | | <body> 8 | | <foo> 9 | | bar="qux/" 10 | 11 | #data 12 | <p id="status"><noscript><strong>A</strong></noscript><span>B</span></p> 13 | #errors 14 | #document 15 | | <html> 16 | | <head> 17 | | <body> 18 | | <p> 19 | | id="status" 20 | | <noscript> 21 | | "<strong>A</strong>" 22 | | <span> 23 | | "B" 24 | 25 | #data 26 | <div><sarcasm><div></div></sarcasm></div> 27 | #errors 28 | #document 29 | | <html> 30 | | <head> 31 | | <body> 32 | | <div> 33 | | <sarcasm> 34 | | <div> 35 | 36 | #data 37 | <html><body><img src="" border="0" alt="><div>A</div></body></html> 38 | #errors 39 | #document 40 | | <html> 41 | | <head> 42 | | <body> 43 | 44 | #data 45 | <table><td></tbody>A 46 | #errors 47 | #document 48 | | <html> 49 | | <head> 50 | | <body> 51 | | "A" 52 | | <table> 53 | | <tbody> 54 | | <tr> 55 | | <td> 56 | 57 | #data 58 | <table><td></thead>A 59 | #errors 60 | #document 61 | | <html> 62 | | <head> 63 | | <body> 64 | | <table> 65 | | <tbody> 66 | | <tr> 67 | | <td> 68 | | "A" 69 | 70 | #data 71 | <table><td></tfoot>A 72 | #errors 73 | #document 74 | | <html> 75 | | <head> 76 | | <body> 77 | | <table> 78 | | <tbody> 79 | | <tr> 80 | | <td> 81 | | "A" 82 | 83 | #data 84 | <table><thead><td></tbody>A 85 | #errors 86 | #document 87 | | <html> 88 | | <head> 89 | | <body> 90 | | <table> 91 | | <thead> 92 | | <tr> 93 | | <td> 94 | | "A" 95 | -------------------------------------------------------------------------------- /toxml.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2017 Thomas Bakketun <thomas.bakketun@copyleft.no> 4 | ;;;; 5 | ;;;; This library is free software: you can redistribute it and/or modify 6 | ;;;; it under the terms of the GNU Lesser General Public License as published 7 | ;;;; by the Free Software Foundation, either version 3 of the License, or 8 | ;;;; (at your option) any later version. 9 | ;;;; 10 | ;;;; This library is distributed in the hope that it will be useful, 11 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | ;;;; GNU General Public License for more details. 14 | ;;;; 15 | ;;;; You should have received a copy of the GNU General Public License 16 | ;;;; along with this library. If not, see <http://www.gnu.org/licenses/>. 17 | 18 | (in-package #:html5-parser) 19 | 20 | 21 | (defun xml-escape-name (name) 22 | "Escapes a node name (element, attribute, doctype) by replacing any 23 | character not valid in XML name by Uxxxxxx, where x is the code point 24 | as six hex digits. This encoding is reversable, since the HTML parser 25 | down cases all characters in names. 26 | 27 | See: https://www.w3.org/TR/html5/syntax.html#coercing-an-html-dom-into-an-infoset" 28 | (if (and (xml-name-start-char-p (char name 0)) 29 | (every #'xml-name-char-p name)) 30 | name 31 | (with-output-to-string (out) 32 | (loop for first = t then nil 33 | for c across name do 34 | (if (if first 35 | (xml-name-start-char-p c) 36 | (xml-name-char-p c)) 37 | (princ c out) 38 | (format out "U~:@(~6,'0X~)" (char-code c))))))) 39 | 40 | 41 | (defun xml-unescape-name (name) 42 | "Reverert escaping done by xml-unescape-name." 43 | (cl-ppcre:regex-replace-all 44 | "U[0-9A-F]{6}" 45 | name 46 | (lambda (u) 47 | (string (code-char (parse-integer u :start 1 :radix 16)))) 48 | :simple-calls t)) 49 | 50 | 51 | (defun xml-name-start-char-p (c) 52 | (or (char<= #\a c #\z) 53 | (char= #\_ c) 54 | (char<= #\A c #\Z) 55 | (char<= (code-char #xC0) c (code-char #xD6)) 56 | (char<= (code-char #xD8) c (code-char #xF6)) 57 | (char<= (code-char #xF8) c (code-char #x2FF)) 58 | (char<= (code-char #x370) c (code-char #x37D)) 59 | (char<= (code-char #x37F) c (code-char #x1FFF)) 60 | (char<= (code-char #x200C) c (code-char #x200D)) 61 | (char<= (code-char #x2070) c (code-char #x218F)) 62 | (char<= (code-char #x2C00) c (code-char #x2FEF)) 63 | (char<= (code-char #x3001) c (code-char #xD7FF)) 64 | (char<= (code-char #xF900) c (code-char #xFDCF)) 65 | (char<= (code-char #xFDF0) c (code-char #xFFFD)) 66 | (char<= (code-char #x10000) c (code-char #xEFFFF)))) 67 | 68 | 69 | (defun xml-name-char-p (c) 70 | (or (xml-name-start-char-p c) 71 | (char= #\- c) 72 | (char= #\. c) 73 | (char<= #\0 c #\9) 74 | (char= (code-char #xB7) c) 75 | (char<= (code-char #x0300) c (code-char #x036F)) 76 | (char<= (code-char #x203F) c (code-char #x2040)))) 77 | -------------------------------------------------------------------------------- /xmls.lisp: -------------------------------------------------------------------------------- 1 | ;;;; HTML5 parser for Common Lisp 2 | ;;;; 3 | ;;;; Copyright (C) 2012 Thomas Bakketun <thomas.bakketun@copyleft.no> 4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke <asgeir@copyleft.no> 5 | ;;;; Copyright (C) 2012 Mathias Hellevang 6 | ;;;; Copyright (C) 2012 Stian Sletner <stian@copyleft.no> 7 | ;;;; 8 | ;;;; This library is free software: you can redistribute it and/or modify 9 | ;;;; it under the terms of the GNU Lesser General Public License as published 10 | ;;;; by the Free Software Foundation, either version 3 of the License, or 11 | ;;;; (at your option) any later version. 12 | ;;;; 13 | ;;;; This library is distributed in the hope that it will be useful, 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;;;; GNU General Public License for more details. 17 | ;;;; 18 | ;;;; You should have received a copy of the GNU General Public License 19 | ;;;; along with this library. If not, see <http://www.gnu.org/licenses/>. 20 | 21 | (in-package #:html5-parser) 22 | 23 | 24 | (defmethod transform-html5-dom ((to-type (eql :xmls)) node 25 | &key namespace comments) 26 | "Convert a node into an XMLS-compatible tree of conses, starting 27 | at. If the node is a document-fragement a list of XMLS trees is returned." 28 | (labels ((node-to-xmls (node parent-ns xlink-defined) 29 | (ecase (node-type node) 30 | (:document 31 | (let (root) 32 | (element-map-children (lambda (n) 33 | (when (string= (node-name n) "html") 34 | (setf root n))) 35 | node) 36 | (assert root) 37 | (node-to-xmls root parent-ns xlink-defined))) 38 | (:document-fragment 39 | (let (xmls-nodes) 40 | (element-map-children (lambda (node) 41 | (push (node-to-xmls node parent-ns xlink-defined) 42 | xmls-nodes)) 43 | node) 44 | (nreverse xmls-nodes))) 45 | (:element 46 | (let (attrs children) 47 | (element-map-attributes (lambda (name node-namespace value) 48 | (when (and namespace 49 | (not xlink-defined) 50 | (equal node-namespace (html5-constants:find-namespace "xlink"))) 51 | (push '#.(list "xmlns:xlink" (html5-constants:find-namespace "xlink")) attrs) 52 | (setf xlink-defined t)) 53 | (push (list (if node-namespace 54 | name 55 | (xml-escape-name name)) 56 | value) 57 | attrs)) 58 | node) 59 | (element-map-children (lambda (c) 60 | (push c children)) 61 | node) 62 | 63 | (apply #'list 64 | (if (and namespace 65 | (not (equal parent-ns (node-namespace node)))) 66 | (cons (node-name node) (node-namespace node)) 67 | (xml-escape-name (node-name node))) 68 | attrs 69 | (mapcar (lambda (c) 70 | (node-to-xmls c (node-namespace node) xlink-defined)) 71 | (nreverse children))))) 72 | (:text 73 | (node-value node)) 74 | (:comment 75 | (when comments 76 | (list :comment nil (node-value node))))))) 77 | (node-to-xmls node nil nil))) 78 | 79 | 80 | (defmethod transform-html5-dom ((to-type (eql :xmls-ns)) node &key) 81 | (transform-html5-dom :xmls node :namespace t)) 82 | --------------------------------------------------------------------------------