├── .gitignore
├── .travis.yml
├── COPYING
├── COPYING.LIB
├── LICENSES
├── README.md
├── cl-html5-parser.asd
├── constants.lisp
├── cxml
├── cl-html5-parser-cxml.asd
└── cxml-dom.lisp
├── entities.lisp
├── html5-parser-class.lisp
├── html5-parser.lisp
├── inputstream.lisp
├── packages.lisp
├── simple-tree.lisp
├── tests
├── cl-html5-parser-tests.asd
├── packages.lisp
├── run-tests.lisp
├── support.lisp
├── test-inputstream.lisp
├── test-parser.lisp
├── test-tokenizer.lisp
├── test-tree-builder.lisp
└── testdata
│ ├── encoding
│ ├── test-yahoo-jp.dat
│ ├── tests1.dat
│ └── tests2.dat
│ ├── sanitizer
│ └── tests1.dat
│ ├── serializer
│ ├── core.test
│ ├── injectmeta.test
│ ├── optionaltags.test
│ ├── options.test
│ └── whitespace.test
│ ├── sniffer
│ └── htmlOrFeed.json
│ ├── tokenizer
│ ├── aa-lisp-tests.test
│ ├── contentModelFlags.test
│ ├── domjs.test
│ ├── entities.test
│ ├── escapeFlag.test
│ ├── namedEntities.test
│ ├── numericEntities.test
│ ├── pendingSpecChanges.test
│ ├── test1.test
│ ├── test2.test
│ ├── test3.test
│ ├── test4.test
│ ├── unicodeChars.test
│ ├── unicodeCharsProblematic.test
│ └── xmlViolation.test
│ └── tree-construction
│ ├── adoption01.dat
│ ├── adoption02.dat
│ ├── button.dat
│ ├── comments01.dat
│ ├── doctype01.dat
│ ├── domjs-unsafe.dat
│ ├── entities01.dat
│ ├── entities02.dat
│ ├── html5test-com.dat
│ ├── inbody01.dat
│ ├── isindex.dat
│ ├── pending-spec-changes-plain-text-unsafe.dat
│ ├── pending-spec-changes.dat
│ ├── plain-text-unsafe.dat
│ ├── scriptdata01.dat
│ ├── tables01.dat
│ ├── tests1.dat
│ ├── tests10.dat
│ ├── tests11.dat
│ ├── tests12.dat
│ ├── tests14.dat
│ ├── tests15.dat
│ ├── tests16.dat
│ ├── tests17.dat
│ ├── tests18.dat
│ ├── tests19.dat
│ ├── tests2.dat
│ ├── tests20.dat
│ ├── tests21.dat
│ ├── tests22.dat
│ ├── tests23.dat
│ ├── tests24.dat
│ ├── tests25.dat
│ ├── tests26.dat
│ ├── tests3.dat
│ ├── tests4.dat
│ ├── tests5.dat
│ ├── tests6.dat
│ ├── tests7.dat
│ ├── tests8.dat
│ ├── tests9.dat
│ ├── tests_innerHTML_1.dat
│ ├── tricky01.dat
│ ├── webkit01.dat
│ └── webkit02.dat
├── tokenizer.lisp
├── toxml.lisp
├── tree-help.lisp
└── xmls.lisp
/.gitignore:
--------------------------------------------------------------------------------
1 | README.html
2 |
3 | *.fasl
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: lisp
2 | sudo: required
3 |
4 | branches:
5 | only:
6 | - master
7 |
8 | env:
9 | matrix:
10 | - LISP=sbcl
11 |
12 | install:
13 | - curl -L https://github.com/luismbo/cl-travis/raw/master/install.sh | sh
14 |
15 | script:
16 | - cl -e "(ql:quickload :cl-html5-parser) (ql:quickload :cl-html5-parser-tests) (html5-parser-tests::run-html5-parser-tests)"
17 |
--------------------------------------------------------------------------------
/COPYING.LIB:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/LICENSES:
--------------------------------------------------------------------------------
1 | This software is ported from html5lib, see http://code.google.com/p/html5lib/
2 | html5lib is distributed under the following license:
3 |
4 | Copyright (c) 2006-2011 The Authors
5 |
6 | Contributors:
7 | James Graham - jg307@cam.ac.uk
8 | Anne van Kesteren - annevankesteren@gmail.com
9 | Lachlan Hunt - lachlan.hunt@lachy.id.au
10 | Matt McDonald - kanashii@kanashii.ca
11 | Sam Ruby - rubys@intertwingly.net
12 | Ian Hickson (Google) - ian@hixie.ch
13 | Thomas Broyer - t.broyer@ltgt.net
14 | Jacques Distler - distler@golem.ph.utexas.edu
15 | Henri Sivonen - hsivonen@iki.fi
16 | Adam Barth - abarth@webkit.org
17 | Eric Seidel - eric@webkit.org
18 | The Mozilla Foundation (contributions from Henri Sivonen since 2008)
19 | David Flanagan (Mozilla) - dflanagan@mozilla.com
20 |
21 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
22 |
23 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
24 |
25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | cl-html5-parser: HTML5 parser for Common Lisp
2 | =============================================
3 |
4 | ## Abstract
5 |
6 | cl-html5-parser is a HTML5 parser for Common Lisp with the following features:
7 |
8 | * It is a port of the Python library [html5lib](http://code.google.com/p/html5lib/).
9 | * It passes all relevant tests from html5lib.
10 | * It is not tied to a specific DOM implementation.
11 |
12 |
13 | ## Requirements
14 |
15 | * SBCL or ECL.
16 | * CL-PPCRE and FLEXI-STREAMS.
17 |
18 | Might work with CLISP, ABCL and Clozure CL, but many of the tests don't pass there.
19 |
20 |
21 | ## Usage
22 |
23 |
24 | ### Parsing
25 |
26 | Parsing functions are in the package HTML5-PARSER.
27 |
28 | ```
29 | parse-html5 source &key encoding strictp dom
30 | => document, errors
31 | ```
32 |
33 | Parse an HTML document from source. Source can be a string, a pathname
34 | or a stream. When parsing from a stream encoding detection is not
35 | supported, encoding must be supplied via the encoding keyword
36 | parameter.
37 |
38 | When strictp is true, parsing stops on first error.
39 |
40 | Returns two values. The primary value is the document node. The
41 | secondary value is a list of errors found during parsing. The format
42 | of this list is subject to change.
43 |
44 | The type of document depends on the dom parameter. By default it's an
45 | instance of cl-html5-parser's own DOM implementation. See the DOM
46 | paragraph below for more information.
47 |
48 | ```
49 | parse-html5-fragment source &key container encoding strictp dom
50 | => document-fragment, errors
51 | ```
52 |
53 | Parses a fragment of HTML. Container sets the context, defaults to
54 | "div". Returns a document-fragment node. For the other parameters see
55 | `PARSE-HTML5`.
56 |
57 |
58 | ### Example
59 | ```common-lisp
60 | (html5-parser:parse-html5-fragment "Parse some HTML" :dom :xmls)
61 | ==> ("Parse " ("i" NIL "some") " HTML")
62 | ```
63 |
64 | ### The DOM
65 |
66 | Parsing HTML5 is not possible without a
67 | [DOM](http://en.wikipedia.org/wiki/Document_Object_Model). cl-html5-parser
68 | defines a minimal DOM implementation for this task. Functions for
69 | traversing documents are exported by the HTML5-PARSER package.
70 |
71 | Alternatively the parser can be instructed to to convert the document
72 | into other DOM implementations using the dom parameter. The conversion
73 | is done by simply calling the generic function
74 | transform-html5-dom. Support for other DOM implementations can be
75 | added by defining new methods for this generic function. The dom
76 | parameter is either a symbol or a list where the car is a symbol and
77 | the rest is key arguments. Below is the currently supported target
78 | types.
79 |
80 |
81 | ### Namespace of elements and attributes
82 |
83 | The HTML5 syntax has no support for namespaces, however the standard
84 | defines special rules to set the expected namespace for SVG and MathML
85 | elements and the following attributes: `xlink:actuate`,
86 | `xlink:arcrole`, `xlink:href`, `xlink:role`, `xlink:show`,
87 | `xlink:title`, `xlink:type`, `xml:base`, `xml:lang`, `xml:space`,
88 | `xmlns`, `xmlns:xlink`. Please note that this only applies to SVG and
89 | MathML elements. Attributes of HTML elements will never get a
90 | namespace.
91 |
92 | #### Examples
93 |
94 | ```html
95 | " :dom :xmls-ns)
105 | ==>
106 | (("html" . "http://www.w3.org/1999/xhtml")
107 | (("xmlU00003Alang" "en") ("xmlU000040lang" "en")) ("head" NIL) ("body" NIL))
108 | ```
109 |
110 | On an HTML element `xml:lang` and `xml@lang` are just attributes with
111 | unusual characters in their name. In the HTML DOM these names are kept
112 | as is, but when converting to XML they are escaped, to ensure the XML
113 | becomes valid. This escaping can be reversed with
114 | `HTML5-PARSER:XML-UNESCAPE-NAME`.
115 |
116 | ```common-lisp
117 | (html5-parser:parse-html5 " " :dom :xmls-ns)
118 | ==>
119 | (("html" . "http://www.w3.org/1999/xhtml") NIL ("head" NIL)
120 | ("body" NIL
121 | (("svg" . "http://www.w3.org/2000/svg")
122 | (("xml:lang" "en") ("xmlU000040lang" "en") ("xlink:href" "#")
123 | ("xmlns:xlink" "http://www.w3.org/1999/xlink") ("xlinkU00003Ato" "#")))))
124 | ```
125 |
126 | In this case the `xml:lang` and `xmlns:xlink` is one of those
127 | attributes with known namespace when used on SVG and MathML
128 | elements. However `xlink:to` is not the list, even if it's defined in
129 | the xlink standard.
130 |
131 | ### :XMLS or (:XMLS &key namespace comments)
132 |
133 | Converts a node into a simple
134 | [XMLS](http://common-lisp.net/project/xmls/)-like list structure.
135 | If node is a document fragment a list of XMLS nodes a returned. In
136 | all other cases a single XMLS node is returned.
137 |
138 | If namespace argument is true, tag names are conses of name and
139 | namespace URI.
140 |
141 | By default comments are stripped. If comments argument is true,
142 | comments are returned as (:COMMENT NIL "comment text"). This extension
143 | of XMLS format.
144 |
145 |
146 | ### :CXML
147 |
148 | Convert to [Closure XML Parser](http://common-lisp.net/project/cxml/)
149 | DOM implementation. In order to use this you must load/depend on the
150 | the system cl-html5-parser-cxml.
151 |
152 |
153 | ## License
154 |
155 | This library is available under the
156 | [GNU Lesser General Public License v3.0](http://www.gnu.org/licenses/lgpl.html).
157 |
--------------------------------------------------------------------------------
/cl-html5-parser.asd:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2017 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (defsystem #:cl-html5-parser
22 | :name "cl-html5-parser"
23 | :description "A HTML5 parser for Common Lisp"
24 | :licence "GNU Lesser General Public License"
25 | :author "Thomas Bakketun "
26 | :depends-on (:cl-ppcre :flexi-streams :string-case)
27 | :serial t
28 | :components ((:file "packages")
29 | (:file "constants")
30 | (:file "entities")
31 | (:file "inputstream")
32 | (:file "tokenizer")
33 | (:file "simple-tree")
34 | (:file "html5-parser-class")
35 | (:file "tree-help")
36 | (:file "html5-parser")
37 | (:file "toxml")
38 | (:file "xmls")))
39 |
40 |
41 | (defmethod perform ((o test-op) (c (eql (find-system '#:cl-html5-parser))))
42 | (operate 'load-op '#:cl-html5-parser-tests)
43 | (funcall (find-symbol (string :run-html5-parser-tests)
44 | :html5-parser-tests)))
45 |
--------------------------------------------------------------------------------
/cxml/cl-html5-parser-cxml.asd:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2017 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (defsystem #:cl-html5-parser-cxml
22 | :name "cl-html5-parser"
23 | :description "CXML integration for cl-html5-parser"
24 | :licence "GNU Lesser General Public License"
25 | :author "Thomas Bakketun "
26 | :depends-on (:cl-html5-parser :cxml)
27 | :serial t
28 | :components ((:file "cxml-dom")))
29 |
--------------------------------------------------------------------------------
/cxml/cxml-dom.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2014 Joe Taylor
4 | ;;;; Copyright (C) 2012 Thomas Bakketun
5 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
6 | ;;;; Copyright (C) 2012 Mathias Hellevang
7 | ;;;; Copyright (C) 2012 Stian Sletner
8 | ;;;;
9 | ;;;; This library is free software: you can redistribute it and/or modify
10 | ;;;; it under the terms of the GNU Lesser General Public License as published
11 | ;;;; by the Free Software Foundation, either version 3 of the License, or
12 | ;;;; (at your option) any later version.
13 | ;;;;
14 | ;;;; This library is distributed in the hope that it will be useful,
15 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | ;;;; GNU General Public License for more details.
18 | ;;;;
19 | ;;;; You should have received a copy of the GNU General Public License
20 | ;;;; along with this library. If not, see .
21 |
22 | (in-package #:html5-parser)
23 |
24 |
25 | (defmethod transform-html5-dom ((to-type (eql :cxml)) node &key)
26 | (let ((document-type)
27 | (document)
28 | (document-fragment))
29 | (labels ((walk (node &optional parent xlink-defined)
30 | (ecase (node-type node)
31 | (:document-type
32 | (setf document-type (dom:create-document-type 'rune-dom:implementation
33 | (xml-escape-name (node-name node))
34 | (node-public-id node)
35 | (node-system-id node))))
36 | (:document
37 | (element-map-children #'walk node))
38 | (:document-fragment
39 | (setf document (dom:create-document 'rune-dom:implementation nil nil nil))
40 | (setf document-fragment (dom:create-document-fragment document))
41 | (element-map-children (lambda (c) (walk c document-fragment xlink-defined)) node))
42 | (:element
43 | (let ((element
44 | (if document
45 | (dom:create-element-ns document (node-namespace node) (xml-escape-name (node-name node)))
46 | (dom:document-element
47 | (setf document (dom:create-document 'rune-dom:implementation
48 | (node-namespace node)
49 | (xml-escape-name (node-name node))
50 | document-type))))))
51 | (unless (and parent
52 | (equal (node-namespace node) (dom:namespace-uri parent)))
53 | (dom:set-attribute-ns element (html5-constants:find-namespace "xmlns")
54 | "xmlns" (node-namespace node)))
55 | (element-map-attributes (lambda (name namespace value)
56 | (when (and (not xlink-defined)
57 | (equal namespace (html5-constants:find-namespace "xlink")))
58 | (dom:set-attribute element "xmlns:xlink" (html5-constants:find-namespace "xlink"))
59 | (setf xlink-defined t))
60 | (if namespace
61 | (dom:set-attribute-ns element namespace name value)
62 | (dom:set-attribute element (xml-escape-name name) value)))
63 | node)
64 | (element-map-children (lambda (c) (walk c element xlink-defined)) node)
65 | (dom:append-child (or parent document) element)))
66 | (:text
67 | (dom:append-child (or parent document)
68 | (dom:create-text-node document (node-value node))))
69 | (:comment
70 | (dom:append-child (or parent document)
71 | (dom:create-comment document (node-value node)))))))
72 | (walk node))
73 | (or document-fragment document)))
74 |
--------------------------------------------------------------------------------
/html5-parser-class.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (in-package :html5-parser)
22 |
23 | (defvar *parser*)
24 |
25 | (defclass html-parser ()
26 | ((html-namespace :initform (find-namespace "html"))
27 | (strict :initarg :strict)
28 | (inner-html-mode)
29 | (container :initform "div")
30 | (tokenizer)
31 | (document :initform (make-document))
32 | (errors :initform '())
33 | (phase :accessor parser-phase)
34 | first-start-tag
35 | compat-mode
36 | inner-html
37 | last-phase
38 | original-phase
39 | before-rcdata-phase
40 | (character-tokens :initform nil)
41 | frameset-ok
42 | open-elements
43 | active-formatting-elements
44 | head-pointer
45 | form-pointer
46 | insert-from-table
47 | (in-body-process-space-characters-mode :initform :non-pre)))
48 |
--------------------------------------------------------------------------------
/packages.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (defpackage :html5-constants
22 | (:use
23 | :common-lisp)
24 | (:export
25 | #:+namespaces+
26 | #:find-namespace
27 | #:find-prefix
28 | #:+scoping-elements+
29 | #:+formatting-elements+
30 | #:+special-elements+
31 | #:+html-integration-point-elements+
32 | #:+mathml-text-integration-point-elements+
33 | #:+eof+
34 | #:+token-types+
35 | #:+tag-token-types+
36 | #:+space-characters+
37 | #:+table-insert-mode-elements+
38 | #:+ascii-lowercase+
39 | #:+ascii-uppercase+
40 | #:+ascii-letters+
41 | #:ascii-letter-p
42 | #:+digits+
43 | #:+hex-digits+
44 | #:ascii-upper-2-lower
45 | #:+replacement-characters+
46 | #:+cdata-elements+
47 | #:+rcdata-elements+
48 | #:+html-integration-point-elements+
49 | #:+mathml-text-integration-point-elements+
50 | #:+quirks-mode-doctypes-regexp+
51 | #:ascii-upper-2-lower
52 | #:+replacement-characters+
53 | #:+heading-elements+))
54 |
55 | (defpackage :html5-parser
56 | (:use
57 | :common-lisp
58 | :html5-constants
59 | :cl-ppcre)
60 | (:export
61 | #:parse-html5
62 | #:parse-html5-fragment
63 | #:transform-html5-dom
64 |
65 | #:xml-escape-name
66 | #:xml-unescape-name
67 |
68 | ;; A simple DOM
69 | #:make-document
70 | #:make-fragment
71 | #:make-doctype
72 | #:make-comment
73 | #:make-element
74 | #:make-text-node
75 |
76 | #:node-type
77 | #:node-name
78 | #:node-namespace
79 | #:node-value
80 | #:node-public-id
81 | #:node-system-id
82 | #:element-attribute
83 |
84 | #:node-append-child
85 | #:node-insert-before
86 | #:node-remove-child
87 |
88 | #:node-parent
89 | #:node-first-child
90 | #:node-last-child
91 | #:node-previous-sibling
92 | #:node-next-sibling
93 | #:element-map-attributes
94 | #:element-map-attributes*
95 | #:element-map-children))
96 |
--------------------------------------------------------------------------------
/simple-tree.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (in-package :html5-parser)
22 |
23 | ;; A basic implementation of a DOM-core like thing
24 |
25 | (defclass node ()
26 | ((type :initform :node :allocation :class :reader node-type)
27 | (name :initarg :name :initform nil :reader node-name)
28 | (namespace :initarg :namespace :initform nil :reader node-namespace)
29 | (parent :initform nil :reader node-parent)
30 | (value :initform nil :initarg :value
31 | :accessor node-value)
32 | (child-nodes :initform nil :accessor %node-child-nodes)
33 | (last-child :initform nil :accessor last-child)))
34 |
35 | (defmethod (setf %node-child-nodes) :after (value (node node))
36 | (setf (last-child node) (last value)))
37 |
38 | (defclass document (node)
39 | ((type :initform :document :allocation :class)))
40 |
41 | (defclass document-fragment (document)
42 | ((type :initform :document-fragment :allocation :class)))
43 |
44 | (defclass document-type (node)
45 | ((type :initform :document-type :allocation :class)
46 | (public-id :initarg :public-id :reader node-public-id)
47 | (system-id :initarg :system-id :reader node-system-id)))
48 |
49 | (defclass text-node (node)
50 | ((type :initform :text :allocation :class)))
51 |
52 | (defclass element (node)
53 | ((type :initform :element :allocation :class)
54 | (attributes :initform nil :accessor %node-attributes)))
55 |
56 | (defclass comment-node (node)
57 | ((type :initform :comment :allocation :class)))
58 |
59 | ;;;
60 | ;;; Creating nodes
61 | ;;;
62 |
63 | (defun make-document ()
64 | (make-instance 'document))
65 |
66 | (defun make-fragment (document)
67 | (declare (ignore document))
68 | (make-instance 'document-fragment))
69 |
70 | (defun make-doctype (document name public-id system-id)
71 | (declare (ignore document))
72 | (make-instance 'document-type :name name :public-id public-id :system-id system-id))
73 |
74 | (defun make-comment (document data)
75 | (declare (ignore document))
76 | (make-instance 'comment-node :value data))
77 |
78 | (defun make-element (document name namespace)
79 | (declare (ignore document))
80 | (make-instance 'element :name name :namespace namespace))
81 |
82 | (defun make-text-node (document data)
83 | (declare (ignore document))
84 | (make-instance 'text-node :value data))
85 |
86 | ;;;
87 | ;;; Node methods
88 | ;;;
89 |
90 | (defun node-first-child (node)
91 | (car (%node-child-nodes node)))
92 |
93 | (defun node-last-child (node)
94 | (car (last-child node)))
95 |
96 | (defun node-previous-sibling (node)
97 | (loop for (this next) on (%node-child-nodes (node-parent node))
98 | when (eql next node) do (return this)))
99 |
100 | (defun node-next-sibling (node)
101 | (loop for (this next) on (%node-child-nodes (node-parent node))
102 | when (eql this node) do (return next)))
103 |
104 | (defun node-append-child (node child)
105 | (when (node-parent child)
106 | (node-remove-child (node-parent child) child))
107 | (setf (slot-value child 'parent) node)
108 | (if (%node-child-nodes node)
109 | (setf (last-child node)
110 | (push child (cdr (last-child node))))
111 | (setf (%node-child-nodes node)
112 | (list child)))
113 | (%node-child-nodes node))
114 |
115 | (defun node-remove-child (node child)
116 | (setf (%node-child-nodes node)
117 | (remove child (%node-child-nodes node)))
118 | (setf (slot-value child 'parent) nil))
119 |
120 | (defun node-insert-before (node child insert-before)
121 | (let ((child-nodes (%node-child-nodes node)))
122 | (setf (slot-value child 'parent) node)
123 | (labels ((insert-before (child-nodes)
124 | (cond ((endp child-nodes)
125 | (cons child nil))
126 | ((eql (car child-nodes) insert-before)
127 | (cons child child-nodes))
128 | (t (rplacd child-nodes (insert-before (cdr child-nodes)))))))
129 | (setf (%node-child-nodes node)
130 | (insert-before child-nodes)))))
131 |
132 | (defun element-attribute (node attribute &optional namespace)
133 | (cdr (assoc (cons attribute namespace)
134 | (%node-attributes node)
135 | :test #'equal)))
136 |
137 | (defun (setf element-attribute) (new-value node attribute
138 | &optional namespace)
139 | (check-type attribute string)
140 | (check-type new-value string)
141 | (let ((old-attr (assoc (cons attribute namespace)
142 | (%node-attributes node)
143 | :test #'equal)))
144 | (if old-attr
145 | (setf (cdr old-attr) new-value)
146 | (push (cons (cons attribute namespace) new-value) (%node-attributes node)))))
147 |
148 | ;;;
149 | ;;; Traversing
150 | ;;;
151 |
152 | (defun element-map-children (function node)
153 | (map nil function (%node-child-nodes node)))
154 |
155 | (defun element-map-attributes* (function node)
156 | (loop for ((name . namespace) . value) in (%node-attributes node)
157 | do (funcall function name namespace value)))
158 |
159 | (defun element-map-attributes (function node)
160 | (element-map-attributes*
161 | (lambda (name namespace value)
162 | (funcall function
163 | (if namespace
164 | (format nil "~A:~A" (html5-constants:find-prefix namespace) name)
165 | name)
166 | namespace
167 | value))
168 | node))
169 |
170 | ;;
171 | ;; Printing for the ease of debugging
172 | ;;
173 |
174 | (defun node-count (tree)
175 | (typecase tree
176 | (element (1+ (apply #'+ (mapcar #'node-count (%node-child-nodes tree)))))
177 | ((or document document-fragment)
178 | (apply #'+ (mapcar #'node-count (%node-child-nodes tree))))
179 | (t 1)))
180 |
181 | (defmethod print-object ((node document) stream)
182 | (print-unreadable-object (node stream :type t :identity t)
183 | (format stream "nodes: ~A" (node-count node))))
184 |
185 | (defmethod print-object ((node node) stream)
186 | (print-unreadable-object (node stream :type t :identity t)
187 | (format stream "~A" (node-name node))))
188 |
189 | (defmethod print-object ((node text-node) stream)
190 | (print-unreadable-object (node stream :type t :identity t)
191 | (write (node-value node) :stream stream :length 30)))
192 |
--------------------------------------------------------------------------------
/tests/cl-html5-parser-tests.asd:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2017 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (defsystem #:cl-html5-parser-tests
22 | :depends-on (:cl-html5-parser :stefil :json-streams :split-sequence)
23 | :components ((:file "packages")
24 | (:file "support")
25 | (:file "test-inputstream")
26 | (:file "test-tokenizer")
27 | (:file "test-tree-builder")
28 | (:file "test-parser")
29 | (:file "run-tests")))
30 |
--------------------------------------------------------------------------------
/tests/packages.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (defpackage :html5-parser-tests
22 | (:use
23 | :common-lisp
24 | :html5-parser
25 | :cl-ppcre
26 | :stefil))
27 |
--------------------------------------------------------------------------------
/tests/run-tests.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (in-package :html5-parser-tests)
22 |
23 | (defun run-html5-parser-tests ()
24 | (values (input-stream-tests)
25 | (test-tokenizer)
26 | (tree-builder-tests)
27 | (test-parser)))
28 |
--------------------------------------------------------------------------------
/tests/support.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2017 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (in-package :html5-parser-tests)
22 |
23 | (defun html5lib-test-files (subdirectory &key (type "dat"))
24 | (directory (merge-pathnames (make-pathname :directory `(:relative ,subdirectory)
25 | :name :wild
26 | :type type)
27 | (asdf:system-relative-pathname :cl-html5-parser-tests "testdata/"))))
28 |
29 | (defun parse-test-part (in)
30 | (let ((line (read-line in nil)))
31 | (when line
32 | (assert (char= #\# (char line 0)))
33 | (let ((name (intern (string-upcase (subseq line 1)) :keyword))
34 | (value (with-output-to-string (out)
35 | (loop for next-char = (peek-char nil in nil)
36 | while (and next-char (char/= #\# next-char))
37 | do (write-line (read-line in) out)))))
38 | (list name (subseq value 0 (max 0 (1- (length value)))))))))
39 |
40 |
41 | (defun parse-one-test (in)
42 | (loop for part = (parse-test-part in)
43 | while part
44 | append part
45 | until (eql (car part) :document)))
46 |
47 | (defun parse-test-data (filename)
48 | (with-open-file (in filename)
49 | (loop for test = (parse-one-test in)
50 | while test
51 | collect test)))
52 |
53 |
--------------------------------------------------------------------------------
/tests/test-inputstream.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (in-package :html5-parser-tests)
22 |
23 | (in-root-suite)
24 | (defsuite input-stream-tests)
25 | (in-suite input-stream-tests)
26 |
27 | (deftest test-read-char ()
28 | (let ((stream (html5-parser::make-html-input-stream "hello")))
29 | (is (eql #\h (html5-parser::html5-stream-char stream)))
30 | (is (eql #\e (html5-parser::html5-stream-char stream)))))
31 |
32 | (deftest test-unget ()
33 | (let ((stream (html5-parser::make-html-input-stream "hei")))
34 | (is (eql #\h (html5-parser::html5-stream-char stream)))
35 | (is (eql #\e (html5-parser::html5-stream-char stream)))
36 | (is (eql #\i (html5-parser::html5-stream-char stream)))
37 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream)))
38 | (html5-parser::html5-stream-unget stream html5-constants::+eof+)
39 | (html5-parser::html5-stream-unget stream #\i)
40 | (is (eql #\i (html5-parser::html5-stream-char stream)))
41 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream)))))
42 |
43 | (deftest test-chars-until ()
44 | (let ((stream (html5-parser::make-html-input-stream "hello<--__-->a")))
45 | (is (equal "hello" (html5-parser::html5-stream-chars-until stream "><")))
46 | (is (eql #\< (html5-parser::html5-stream-char stream)))
47 | (is (equal "--__-->" (html5-parser::html5-stream-chars-until stream "<>-_" t)))
48 | (is (eql #\a (html5-parser::html5-stream-char stream)))))
49 |
50 | (deftest test-chars-until-eof ()
51 | (let ((stream (html5-parser::make-html-input-stream "hello")))
52 | (is (equal "hello" (html5-parser::html5-stream-chars-until stream "?")))
53 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream)))))
54 |
55 | (deftest test-line-ending-fix ()
56 | (let ((stream (html5-parser::make-html-input-stream (coerce #(#\a #\Newline
57 | #\b #\Return
58 | #\c #\Return #\Newline
59 | #\d)
60 | 'string))))
61 | (is (eql #\a (html5-parser::html5-stream-char stream)))
62 | (is (eql #\Newline (html5-parser::html5-stream-char stream)))
63 | (is (eql #\b (html5-parser::html5-stream-char stream)))
64 | (is (eql #\Newline (html5-parser::html5-stream-char stream)))
65 | (is (eql #\c (html5-parser::html5-stream-char stream)))
66 | (is (eql #\Newline (html5-parser::html5-stream-char stream)))
67 | (is (eql #\d (html5-parser::html5-stream-char stream)))
68 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream)))))
69 |
70 | (deftest test-line-ending-fix2 ()
71 | (let ((stream (html5-parser::make-html-input-stream (coerce #(#\< #\? #\Return)
72 | 'string))))
73 | (is (eql #\< (html5-parser::html5-stream-char stream)))
74 | (is (eql #\? (html5-parser::html5-stream-char stream)))
75 | (is (eql #\Newline (html5-parser::html5-stream-char stream)))
76 | (is (eql html5-constants::+eof+ (html5-parser::html5-stream-char stream)))))
77 |
78 |
79 | (deftest test-bom ()
80 | (let ((stream (html5-parser::make-html-input-stream #(#xef #xbb #xbf 39))))
81 | (is (eql (car (html5-parser::html5-stream-encoding stream))
82 | :utf-8))
83 | (is (eql (html5-parser::html5-stream-char stream)
84 | #\'))))
85 |
86 |
--------------------------------------------------------------------------------
/tests/test-parser.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (in-package :html5-parser-tests)
22 |
23 | ;; Printing for tests
24 |
25 | (defun print-node (node stream)
26 | (ecase (node-type node)
27 | (:document-type
28 | (format stream ""))
35 | (:comment
36 | (format stream "" (node-value node)))
37 | (:element
38 | (if (and (node-namespace node)
39 | (string/= (node-namespace node)
40 | (html5-constants::find-namespace "html")))
41 | (format stream "<~A ~A>"
42 | (html5-constants::find-prefix (node-namespace node))
43 | (node-name node))
44 | (format stream "<~A>" (node-name node))))
45 | (:text
46 | (format stream "\"~A\"" (node-value node)))))
47 |
48 | (defun print-tree (node &key (stream *standard-output*) (indent 0))
49 | (ecase (node-type node)
50 | ((:document :document-fragment)
51 | (element-map-children (lambda (child)
52 | (print-tree child
53 | :stream stream
54 | :indent (+ indent 2)))
55 | node))
56 | (:element
57 | (format stream "~&|~vT" indent)
58 | (print-node node stream)
59 | (incf indent 2)
60 | (let ((attributes))
61 | (element-map-attributes* (lambda (name namespace value)
62 | (push (cons (cons name namespace) value) attributes))
63 | node)
64 | (when attributes
65 | (loop for (name . value) in (sort attributes #'string<
66 | :key (lambda (attr)
67 | (if (consp (car attr))
68 | (caar attr)
69 | (car attr))))
70 | do
71 | (format stream "~&|~vT" indent)
72 | (if (cdr name)
73 | (format stream "~A ~A" (html5-constants:find-prefix (cdr name)) (car name))
74 | (format stream "~A" (car name)))
75 | (format stream "=\"~A\"" value)))
76 | (element-map-children (lambda (child)
77 | (print-tree child
78 | :stream stream
79 | :indent indent))
80 | node)))
81 | ((:text :comment :document-type)
82 | (format stream "~&|~vT" indent)
83 | (print-node node stream)))
84 | node)
85 |
86 |
87 | (defparameter *parser-tests-to-skip*
88 | ())
89 |
90 | (defun do-parser-test (&key test-name data errors document document-fragment)
91 | (with-simple-restart (skip "Skip test ~A ~A"
92 | test-name
93 | data)
94 | (format t "~&Test ~A: ~A~%" test-name data)
95 | (setf document (string-right-trim '(#\Newline) document))
96 | (when (member data *parser-tests-to-skip* :test #'string=)
97 | (format t " skipped")
98 | (return-from do-parser-test))
99 | (multiple-value-bind (result-document got-errors)
100 | (if document-fragment
101 | (parse-html5-fragment data :container document-fragment)
102 | (parse-html5 data))
103 | (let ((result (with-output-to-string (out)
104 | (print-tree result-document :stream out))))
105 | (unless (string= document result)
106 | (error "Input:~%~A~%Got:~%~A~%Expected:~%~A" data result document))
107 | (setf errors (split-sequence:split-sequence #\Newline errors
108 | :remove-empty-subseqs t))
109 | (when (and errors
110 | (/= (length errors) (length got-errors)))
111 | (warn "Errors mismatch~&Input:~%~A~%Got:~%~{~&~A~}~%Expected:~%~{~&~A~}"
112 | data got-errors errors)))
113 | result-document)))
114 |
115 |
116 | (defun test-parser ()
117 | (let ((files (html5lib-test-files "tree-construction")))
118 | (dolist (file files)
119 | (let ((test-name (pathname-name file))
120 | (tests (parse-test-data file)))
121 | (dolist (test tests)
122 | (apply #'do-parser-test :test-name test-name test))))))
123 |
124 |
125 | (in-root-suite)
126 | (defsuite parser-tests)
127 | (in-suite parser-tests)
128 |
129 | (deftest test-parse-content-attr ()
130 | (is (eql nil (html5-parser::parse-content-attr "garble")))
131 | (is (eql nil (html5-parser::parse-content-attr "charset")))
132 | (is (string= "utf-8" (html5-parser::parse-content-attr "charset=utf-8")))
133 | (is (string= "utf-8" (html5-parser::parse-content-attr "charset = utf-8")))
134 | (is (string= "utf-8" (html5-parser::parse-content-attr " charset = utf-8 ")))
135 | (is (string= " utf-8 " (html5-parser::parse-content-attr " charset =' utf-8 '")))
136 | (is (eql nil (html5-parser::parse-content-attr " charset =\"utf-8 '")))
137 | (is (string= "utf-8" (html5-parser::parse-content-attr " charset =\"utf-8\"")))
138 | (is (string= "utf-8" (html5-parser::parse-content-attr " charset =\"utf-8\" "))))
139 |
--------------------------------------------------------------------------------
/tests/test-tree-builder.lisp:
--------------------------------------------------------------------------------
1 | ;;;; HTML5 parser for Common Lisp
2 | ;;;;
3 | ;;;; Copyright (C) 2012 Thomas Bakketun
4 | ;;;; Copyright (C) 2012 Asgeir Bjørlykke
5 | ;;;; Copyright (C) 2012 Mathias Hellevang
6 | ;;;; Copyright (C) 2012 Stian Sletner
7 | ;;;;
8 | ;;;; This library is free software: you can redistribute it and/or modify
9 | ;;;; it under the terms of the GNU Lesser General Public License as published
10 | ;;;; by the Free Software Foundation, either version 3 of the License, or
11 | ;;;; (at your option) any later version.
12 | ;;;;
13 | ;;;; This library is distributed in the hope that it will be useful,
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;;;; GNU General Public License for more details.
17 | ;;;;
18 | ;;;; You should have received a copy of the GNU General Public License
19 | ;;;; along with this library. If not, see .
20 |
21 | (in-package :html5-parser-tests)
22 |
23 | (in-root-suite)
24 | (defsuite tree-builder-tests)
25 | (in-suite tree-builder-tests)
26 |
27 | (deftest test-make-document ()
28 | (is (eq :document (node-type (make-document)))))
29 |
30 | (deftest test-append-child ()
31 | (let* ((doc (make-document))
32 | (child (make-element doc "test" nil)))
33 | (node-append-child doc child)
34 | (element-map-children (lambda (kid)
35 | (is (eq kid child)))
36 | doc)))
37 |
38 | (deftest test-reappend-child ()
39 | (let* ((doc (make-document))
40 | (parent1 (make-element doc "parent1" nil))
41 | (parent2 (make-element doc "parent2" nil))
42 | (child (make-element doc "child" nil)))
43 | (node-append-child parent1 child)
44 | (is (eq parent1 (node-parent child)))
45 | (node-append-child parent2 child)
46 | (is (eq parent2 (node-parent child)))
47 | (element-map-children (lambda (kid)
48 | (error "parent1 should not have children now ~S" kid))
49 | parent1)))
50 |
51 | (deftest test-navigate ()
52 | (let* ((doc (make-document))
53 | (parent (make-element doc "parent" nil))
54 | (child1 (make-element doc "child1" nil))
55 | (child2 (make-element doc "child2" nil))
56 | (child3 (make-element doc "child3" nil))
57 | (child4 (make-element doc "child4" nil)))
58 | (node-append-child parent child1)
59 | (node-append-child parent child2)
60 | (node-append-child parent child3)
61 | (node-append-child parent child4)
62 | (is (eq child1 (node-first-child parent)))
63 | (is (eq child4 (node-last-child parent)))
64 | (is (eq child2 (node-next-sibling child1)))
65 | (is (eq nil (node-next-sibling child4)))
66 | (is (eq child1 (node-previous-sibling child2)))
67 | (is (eq nil (node-previous-sibling child1)))))
68 |
69 | (deftest test-remove-child ()
70 | (let* ((doc (make-document))
71 | (parent (make-element doc "parent" nil))
72 | (child1 (make-element doc "child1" nil))
73 | (child2 (make-element doc "child2" nil))
74 | (child3 (make-element doc "child3" nil))
75 | (child4 (make-element doc "child4" nil)))
76 | (node-append-child parent child1)
77 | (node-append-child parent child2)
78 | (node-append-child parent child3)
79 | (node-append-child parent child4)
80 |
81 | (node-remove-child parent child2)
82 | (is (eq child3 (node-next-sibling child1)))))
83 |
84 | (deftest test-set-attribute ()
85 | (let* ((doc (make-document))
86 | (element (make-element doc "test" nil)))
87 | (setf (element-attribute element "hello") "world")
88 | (is (string= (element-attribute element "hello") "world"))))
89 |
90 | (deftest test-append-text ()
91 | (let* ((doc (make-document))
92 | (parent (make-element doc "parent" nil)))
93 | (html5-parser::node-append-child* parent (make-text-node doc "hello"))
94 | (html5-parser::node-append-child* parent (make-text-node doc "world"))
95 | (is (string= "helloworld" (node-value (node-first-child parent))))))
96 |
97 | ;; (deftest test-node-clone ()
98 | ;; (let* ((tree (make-tree))
99 | ;; (parent (tree-make-element tree "parent" nil))
100 | ;; (element (tree-make-element tree "test" nil)))
101 | ;; (node-append-child tree parent element)
102 | ;; (setf (node-attribute tree element "hello") "world")
103 | ;; (let ((clone (node-clone tree element)))
104 | ;; (is (null (node-parent tree clone)))
105 | ;; (is (string= (node-attribute tree clone "hello") "world")))))
106 |
--------------------------------------------------------------------------------
/tests/testdata/encoding/test-yahoo-jp.dat:
--------------------------------------------------------------------------------
1 | #data
2 |
3 |
4 |
5 |
6 | Yahoo! JAPAN
7 |
8 | "]
49 | }
50 |
51 | ]}
--------------------------------------------------------------------------------
/tests/testdata/tokenizer/aa-lisp-tests.test:
--------------------------------------------------------------------------------
1 | {"tests": [
2 |
3 | {"description":"Comment in script",
4 | "initialStates":["SCRIPT DATA state"],
5 | "lastStartTag":"plaintext",
6 | "input":"",
81 | "output":["ParseError", ["Comment", "--!\\uFFFD"]]
82 | },
83 | {
84 | "description":"space EOF after doctype ",
85 | "input":"-->",
7 | "output":[["Character", "foo"], ["EndTag", "xmp"]]},
8 |
9 | {"description":"Bogus comment in RCDATA or RAWTEXT",
10 | "initialStates":["RCDATA state", "RAWTEXT state"],
11 | "lastStartTag":"xmp",
12 | "input":"foobaz",
13 | "output":[["Character", "foobaz"], ["EndTag", "xmp"]]},
14 |
15 | {"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
16 | "initialStates":["RCDATA state", "RAWTEXT state"],
17 | "lastStartTag":"xmp",
18 | "input":"foobaz",
19 | "output":[["Character", "foo"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
20 |
21 | {"description":"Commented entities in RCDATA",
22 | "initialStates":["RCDATA state"],
23 | "lastStartTag":"xmp",
24 | "input":" & & ",
25 | "output":[["Character", " & & "], ["EndTag", "xmp"]]},
26 |
27 | {"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
28 | "initialStates":["RCDATA state", "RAWTEXT state"],
29 | "lastStartTag":"xmp",
30 | "input":"foox--<>",
31 | "output":[["Character", "foox--<>"], ["EndTag", "xmp"]]}
32 |
33 | ]}
34 |
--------------------------------------------------------------------------------
/tests/testdata/tokenizer/pendingSpecChanges.test:
--------------------------------------------------------------------------------
1 | {"tests": [
2 |
3 | {"description":"",
73 | "output":[["Comment", "comment"]]},
74 |
75 | {"description":"Comment, Central dash no space",
76 | "input":"",
77 | "output":["ParseError", ["Comment", "-"]]},
78 |
79 | {"description":"Comment, two central dashes",
80 | "input":"",
81 | "output":["ParseError", ["Comment", " --comment "]]},
82 |
83 | {"description":"Unfinished comment",
84 | "input":"",
93 | "output":["ParseError", ["Comment", ""]]},
94 |
95 | {"description":"Short comment two",
96 | "input":"",
97 | "output":["ParseError", ["Comment", ""]]},
98 |
99 | {"description":"Short comment three",
100 | "input":"",
101 | "output":[["Comment", ""]]},
102 |
103 |
104 | {"description":"Ampersand EOF",
105 | "input":"&",
106 | "output":[["Character", "&"]]},
107 |
108 | {"description":"Ampersand ampersand EOF",
109 | "input":"&&",
110 | "output":[["Character", "&&"]]},
111 |
112 | {"description":"Ampersand space EOF",
113 | "input":"& ",
114 | "output":[["Character", "& "]]},
115 |
116 | {"description":"Unfinished entity",
117 | "input":"&f",
118 | "output":["ParseError", ["Character", "&f"]]},
119 |
120 | {"description":"Ampersand, number sign",
121 | "input":"",
122 | "output":["ParseError", ["Character", ""]]},
123 |
124 | {"description":"Unfinished numeric entity",
125 | "input":"",
126 | "output":["ParseError", ["Character", ""]]},
127 |
128 | {"description":"Entity with trailing semicolon (1)",
129 | "input":"I'm ¬it",
130 | "output":[["Character","I'm \u00ACit"]]},
131 |
132 | {"description":"Entity with trailing semicolon (2)",
133 | "input":"I'm ∉",
134 | "output":[["Character","I'm \u2209"]]},
135 |
136 | {"description":"Entity without trailing semicolon (1)",
137 | "input":"I'm ¬it",
138 | "output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
139 |
140 | {"description":"Entity without trailing semicolon (2)",
141 | "input":"I'm ¬in",
142 | "output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
143 |
144 | {"description":"Partial entity match at end of file",
145 | "input":"I'm &no",
146 | "output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
147 |
148 | {"description":"Non-ASCII character reference name",
149 | "input":"&\u00AC;",
150 | "output":["ParseError", ["Character", "&\u00AC;"]]},
151 |
152 | {"description":"ASCII decimal entity",
153 | "input":"$",
154 | "output":[["Character","$"]]},
155 |
156 | {"description":"ASCII hexadecimal entity",
157 | "input":"?",
158 | "output":[["Character","?"]]},
159 |
160 | {"description":"Hexadecimal entity in attribute",
161 | "input":" ",
162 | "output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
163 |
164 | {"description":"Entity in attribute without semicolon ending in x",
165 | "input":"",
166 | "output":["ParseError", ["StartTag", "h", {"a":"¬x"}]]},
167 |
168 | {"description":"Entity in attribute without semicolon ending in 1",
169 | "input":"",
170 | "output":["ParseError", ["StartTag", "h", {"a":"¬1"}]]},
171 |
172 | {"description":"Entity in attribute without semicolon ending in i",
173 | "input":"",
174 | "output":["ParseError", ["StartTag", "h", {"a":"¬i"}]]},
175 |
176 | {"description":"Entity in attribute without semicolon",
177 | "input":"",
178 | "output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
179 |
180 | {"description":"Unquoted attribute ending in ampersand",
181 | "input":"",
182 | "output":[["StartTag","s",{"o":"&","t":""}]]},
183 |
184 | {"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
185 | "input":"foo",
186 | "output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
187 |
188 | {"description":"plaintext element",
189 | "input":"foobar",
190 | "output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
191 |
192 | {"description":"Open angled bracket in unquoted attribute value state",
193 | "input":"",
194 | "output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
195 |
196 | ]}
197 |
--------------------------------------------------------------------------------
/tests/testdata/tokenizer/test2.test:
--------------------------------------------------------------------------------
1 | {"tests": [
2 |
3 | {"description":"DOCTYPE without name",
4 | "input":"",
5 | "output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
6 |
7 | {"description":"DOCTYPE without space before name",
8 | "input":"",
9 | "output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
10 |
11 | {"description":"Incorrect DOCTYPE without a space before name",
12 | "input":"",
13 | "output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
14 |
15 | {"description":"DOCTYPE with publicId",
16 | "input":"",
17 | "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
18 |
19 | {"description":"DOCTYPE with EOF after PUBLIC",
20 | "input":"",
33 | "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
34 |
35 | {"description":"DOCTYPE with publicId and systemId",
36 | "input":"",
37 | "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
38 |
39 | {"description":"DOCTYPE with > in double-quoted publicId",
40 | "input":"x",
41 | "output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
42 |
43 | {"description":"DOCTYPE with > in single-quoted publicId",
44 | "input":"x",
53 | "output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
54 |
55 | {"description":"Incomplete doctype",
56 | "input":"",
89 | "output":[["StartTag", "h", { "a":"&" }]]},
90 |
91 | {"description":"StartTag containing <",
92 | "input":" ",
93 | "output":[["StartTag", "a ",
97 | "output":[["StartTag","h",{},true]]},
98 |
99 | {"description":"Void element with permitted slash",
100 | "input":" ",
101 | "output":[["StartTag","br",{},true]]},
102 |
103 | {"description":"Void element with permitted slash (with attribute)",
104 | "input":" ",
105 | "output":[["StartTag","br",{"foo":"bar"},true]]},
106 |
107 | {"description":"StartTag containing /",
108 | "input":"",
109 | "output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
110 |
111 | {"description":"Double-quoted attribute value",
112 | "input":"",
113 | "output":[["StartTag", "h", { "a":"b" }]]},
114 |
115 | {"description":"Unescaped ",
116 | "input":"",
117 | "output":["ParseError", ["Character", ""]]},
118 |
119 | {"description":"Illegal end tag name",
120 | "input":"1>",
121 | "output":["ParseError", ["Comment", "1"]]},
122 |
123 | {"description":"Simili processing instruction",
124 | "input":"",
125 | "output":["ParseError", ["Comment", "?namespace"]]},
126 |
127 | {"description":"A bogus comment stops at >, even if preceeded by two dashes",
128 | "input":"",
129 | "output":["ParseError", ["Comment", "?foo--"]]},
130 |
131 | {"description":"Unescaped <",
132 | "input":"foo < bar",
133 | "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
134 |
135 | {"description":"Null Byte Replacement",
136 | "input":"\u0000",
137 | "output":["ParseError", ["Character", "\u0000"]]},
138 |
139 | {"description":"Comment with dash",
140 | "input":"c",
173 | "output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
174 |
175 | {"description":"Empty end tag with following end tag",
176 | "input":"a>c",
177 | "output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
178 |
179 | ]}
180 |
--------------------------------------------------------------------------------
/tests/testdata/tokenizer/unicodeCharsProblematic.test:
--------------------------------------------------------------------------------
1 | {"tests" : [
2 | {"description": "Invalid Unicode character U+DFFF",
3 | "doubleEscaped":true,
4 | "input": "\\uDFFF",
5 | "output":["ParseError", ["Character", "\\uFFFD"]]},
6 |
7 | {"description": "Invalid Unicode character U+D800",
8 | "doubleEscaped":true,
9 | "input": "\\uD800",
10 | "output":["ParseError", ["Character", "\\uFFFD"]]},
11 |
12 | {"description": "Invalid Unicode character U+DFFF with valid preceding character",
13 | "doubleEscaped":true,
14 | "input": "a\\uDFFF",
15 | "output":["ParseError", ["Character", "a\\uFFFD"]]},
16 |
17 | {"description": "Invalid Unicode character U+D800 with valid following character",
18 | "doubleEscaped":true,
19 | "input": "\\uD800a",
20 | "output":["ParseError", ["Character", "\\uFFFDa"]]},
21 |
22 | {"description":"CR followed by U+0000",
23 | "input":"\r\u0000",
24 | "output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
25 | "ignoreErrorOrder":true}
26 | ]
27 | }
--------------------------------------------------------------------------------
/tests/testdata/tokenizer/xmlViolation.test:
--------------------------------------------------------------------------------
1 | {"xmlViolationTests": [
2 |
3 | {"description":"Non-XML character",
4 | "input":"a\uFFFFb",
5 | "ignoreErrorOrder":true,
6 | "output":["ParseError",["Character","a\uFFFDb"]]},
7 |
8 | {"description":"Non-XML space",
9 | "input":"a\u000Cb",
10 | "ignoreErrorOrder":true,
11 | "output":[["Character","a b"]]},
12 |
13 | {"description":"Double hyphen in comment",
14 | "input":"",
15 | "output":["ParseError",["Comment"," foo - - bar "]]},
16 |
17 | {"description":"FF between attributes",
18 | "input":"",
19 | "output":[["StartTag","a",{"b":"","c":""}]]}
20 | ]}
21 |
22 |
23 |
--------------------------------------------------------------------------------
/tests/testdata/tree-construction/adoption01.dat:
--------------------------------------------------------------------------------
1 | #data
2 |
3 | #errors
4 | #document
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 |
12 | #data
13 | 12
3
14 | #errors
15 | #document
16 | |
17 | |
18 | |
19 | |
20 | | "1"
21 | |
22 | |
23 | | "2"
24 | | "3"
25 |
26 | #data
27 | 12 3
28 | #errors
29 | #document
30 | |
31 | |
32 | |
33 | |
34 | | "1"
35 | |
36 | |
37 | | "2"
38 | | "3"
39 |
40 | #data
41 | 12 3
42 | #errors
43 | #document
44 | |
45 | |
46 | |
47 | |
48 | | "1"
49 | |
50 | | "2"
51 | |
52 | | "3"
53 |
54 | #data
55 | 1
56 | #errors
57 | #document
58 | |
59 | |
60 | |
61 | |
62 | | "1"
63 | |