├── .github
└── workflows
│ └── go.yml
├── CONTRIBUTORS
├── LICENSE
├── README.md
├── RELEASE_NOTES.md
├── etree.go
├── etree_test.go
├── example_test.go
├── go.mod
├── helpers.go
├── path.go
└── path_test.go
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
1 | name: Go
2 |
3 | on: [push, pull_request]
4 |
5 | permissions:
6 | contents: read
7 |
8 | jobs:
9 |
10 | analyze:
11 | name: Analyze
12 | runs-on: ubuntu-latest
13 |
14 | permissions:
15 | actions: read
16 | contents: read
17 | security-events: write
18 |
19 | strategy:
20 | fail-fast: false
21 | matrix:
22 | language: ["go"]
23 |
24 | steps:
25 | - name: Checkout repository
26 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
27 |
28 | - name: Initialize CodeQL
29 | uses: github/codeql-action/init@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12
30 | with:
31 | languages: ${{ matrix.language }}
32 |
33 | - name: Autobuild
34 | uses: github/codeql-action/autobuild@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12
35 |
36 | - name: Perform CodeQL Analysis
37 | uses: github/codeql-action/analyze@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12
38 | with:
39 | category: "/language:${{matrix.language}}"
40 |
41 | build:
42 | name: Build
43 | runs-on: ubuntu-latest
44 |
45 | strategy:
46 | matrix:
47 | go-version: [ '1.21', '1.22.x' ]
48 |
49 | steps:
50 | - name: Checkout repository
51 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
52 |
53 | - name: Setup Go ${{ matrix.go-version }}
54 | uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
55 | with:
56 | go-version: ${{ matrix.go-version }}
57 |
58 | - name: Build
59 | run: go build -v ./...
60 |
61 | - name: Test
62 | run: go test -v ./...
63 |
--------------------------------------------------------------------------------
/CONTRIBUTORS:
--------------------------------------------------------------------------------
1 | Brett Vickers (beevik)
2 | Felix Geisendörfer (felixge)
3 | Kamil Kisiel (kisielk)
4 | Graham King (grahamking)
5 | Matt Smith (ma314smith)
6 | Michal Jemala (michaljemala)
7 | Nicolas Piganeau (npiganeau)
8 | Chris Brown (ccbrown)
9 | Earncef Sequeira (earncef)
10 | Gabriel de Labachelerie (wuzuf)
11 | Martin Dosch (mdosch)
12 | Hugo Wetterberg (hugowetterberg)
13 | Tobias Theel (nerzal)
14 | Daniel Potapov (dpotapov)
15 | Mikhail Ferapontow (MikhailFerapontow)
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2015-2024 Brett Vickers. All rights reserved.
2 |
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions
5 | are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright
11 | notice, this list of conditions and the following disclaimer in the
12 | documentation and/or other materials provided with the distribution.
13 |
14 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY
15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR
18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://godoc.org/github.com/beevik/etree)
2 | [](https://github.com/beevik/etree/actions/workflows/go.yml)
3 |
4 | etree
5 | =====
6 |
7 | The etree package is a lightweight, pure go package that expresses XML in
8 | the form of an element tree. Its design was inspired by the Python
9 | [ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html)
10 | module.
11 |
12 | Some of the package's capabilities and features:
13 |
14 | * Represents XML documents as trees of elements for easy traversal.
15 | * Imports, serializes, modifies or creates XML documents from scratch.
16 | * Writes and reads XML to/from files, byte slices, strings and io interfaces.
17 | * Performs simple or complex searches with lightweight XPath-like query APIs.
18 | * Auto-indents XML using spaces or tabs for better readability.
19 | * Implemented in pure go; depends only on standard go libraries.
20 | * Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml)
21 | package.
22 |
23 | ### Creating an XML document
24 |
25 | The following example creates an XML document from scratch using the etree
26 | package and outputs its indented contents to stdout.
27 | ```go
28 | doc := etree.NewDocument()
29 | doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
30 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
31 |
32 | people := doc.CreateElement("People")
33 | people.CreateComment("These are all known people")
34 |
35 | jon := people.CreateElement("Person")
36 | jon.CreateAttr("name", "Jon")
37 |
38 | sally := people.CreateElement("Person")
39 | sally.CreateAttr("name", "Sally")
40 |
41 | doc.Indent(2)
42 | doc.WriteTo(os.Stdout)
43 | ```
44 |
45 | Output:
46 | ```xml
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | ```
55 |
56 | ### Reading an XML file
57 |
58 | Suppose you have a file on disk called `bookstore.xml` containing the
59 | following data:
60 |
61 | ```xml
62 |
63 |
64 |
65 | Everyday Italian
66 | Giada De Laurentiis
67 | 2005
68 | 30.00
69 |
70 |
71 |
72 | Harry Potter
73 | J K. Rowling
74 | 2005
75 | 29.99
76 |
77 |
78 |
79 | XQuery Kick Start
80 | James McGovern
81 | Per Bothner
82 | Kurt Cagle
83 | James Linn
84 | Vaidyanathan Nagarajan
85 | 2003
86 | 49.99
87 |
88 |
89 |
90 | Learning XML
91 | Erik T. Ray
92 | 2003
93 | 39.95
94 |
95 |
96 |
97 | ```
98 |
99 | This code reads the file's contents into an etree document.
100 | ```go
101 | doc := etree.NewDocument()
102 | if err := doc.ReadFromFile("bookstore.xml"); err != nil {
103 | panic(err)
104 | }
105 | ```
106 |
107 | You can also read XML from a string, a byte slice, or an `io.Reader`.
108 |
109 | ### Processing elements and attributes
110 |
111 | This example illustrates several ways to access elements and attributes using
112 | etree selection queries.
113 | ```go
114 | root := doc.SelectElement("bookstore")
115 | fmt.Println("ROOT element:", root.Tag)
116 |
117 | for _, book := range root.SelectElements("book") {
118 | fmt.Println("CHILD element:", book.Tag)
119 | if title := book.SelectElement("title"); title != nil {
120 | lang := title.SelectAttrValue("lang", "unknown")
121 | fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang)
122 | }
123 | for _, attr := range book.Attr {
124 | fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value)
125 | }
126 | }
127 | ```
128 | Output:
129 | ```
130 | ROOT element: bookstore
131 | CHILD element: book
132 | TITLE: Everyday Italian (en)
133 | ATTR: category=COOKING
134 | CHILD element: book
135 | TITLE: Harry Potter (en)
136 | ATTR: category=CHILDREN
137 | CHILD element: book
138 | TITLE: XQuery Kick Start (en)
139 | ATTR: category=WEB
140 | CHILD element: book
141 | TITLE: Learning XML (en)
142 | ATTR: category=WEB
143 | ```
144 |
145 | ### Path queries
146 |
147 | This example uses etree's path functions to select all book titles that fall
148 | into the category of 'WEB'. The double-slash prefix in the path causes the
149 | search for book elements to occur recursively; book elements may appear at any
150 | level of the XML hierarchy.
151 | ```go
152 | for _, t := range doc.FindElements("//book[@category='WEB']/title") {
153 | fmt.Println("Title:", t.Text())
154 | }
155 | ```
156 |
157 | Output:
158 | ```
159 | Title: XQuery Kick Start
160 | Title: Learning XML
161 | ```
162 |
163 | This example finds the first book element under the root bookstore element and
164 | outputs the tag and text of each of its child elements.
165 | ```go
166 | for _, e := range doc.FindElements("./bookstore/book[1]/*") {
167 | fmt.Printf("%s: %s\n", e.Tag, e.Text())
168 | }
169 | ```
170 |
171 | Output:
172 | ```
173 | title: Everyday Italian
174 | author: Giada De Laurentiis
175 | year: 2005
176 | price: 30.00
177 | ```
178 |
179 | This example finds all books with a price of 49.99 and outputs their titles.
180 | ```go
181 | path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title")
182 | for _, e := range doc.FindElementsPath(path) {
183 | fmt.Println(e.Text())
184 | }
185 | ```
186 |
187 | Output:
188 | ```
189 | XQuery Kick Start
190 | ```
191 |
192 | Note that this example uses the FindElementsPath function, which takes as an
193 | argument a pre-compiled path object. Use precompiled paths when you plan to
194 | search with the same path more than once.
195 |
196 | ### Other features
197 |
198 | These are just a few examples of the things the etree package can do. See the
199 | [documentation](http://godoc.org/github.com/beevik/etree) for a complete
200 | description of its capabilities.
201 |
202 | ### Contributing
203 |
204 | This project accepts contributions. Just fork the repo and submit a pull
205 | request!
206 |
--------------------------------------------------------------------------------
/RELEASE_NOTES.md:
--------------------------------------------------------------------------------
1 | Release 1.5.1
2 | =============
3 |
4 | **Fixes**
5 |
6 | * Fixed a bug in `InsertChildAt`.
7 |
8 | Release 1.5.0
9 | =============
10 |
11 | **Changes**
12 |
13 | * Added `Element` function `CreateChild`, which calls a continuation function
14 | after creating and adding a child element.
15 |
16 | **Fixes**
17 |
18 | * Removed a potential conflict between two `ReadSettings` values. When
19 | `AttrSingleQuote` is true, `CanonicalAttrVal` is forced to be false.
20 |
21 | Release 1.4.1
22 | =============
23 |
24 | **Changes**
25 |
26 | * Minimal go version updated to 1.21.
27 | * Default-initialized CharsetReader causes same result as NewDocument().
28 | * When reading an XML document, attributes are parsed more efficiently.
29 |
30 | Release v1.4.0
31 | ==============
32 |
33 | **New Features**
34 |
35 | * Add `AutoClose` option to `ReadSettings`.
36 | * Add `ValidateInput` to `ReadSettings`.
37 | * Add `NotNil` function to `Element`.
38 | * Add `NextSibling` and `PrevSibling` functions to `Element`.
39 |
40 | Release v1.3.0
41 | ==============
42 |
43 | **New Features**
44 |
45 | * Add support for double-quotes in filter path queries.
46 | * Add `PreserveDuplicateAttrs` to `ReadSettings`.
47 | * Add `ReindexChildren` to `Element`.
48 |
49 | Release v1.2.0
50 | ==============
51 |
52 | **New Features**
53 |
54 | * Add the ability to write XML fragments using Token WriteTo functions.
55 | * Add the ability to re-indent an XML element as though it were the root of
56 | the document.
57 | * Add a ReadSettings option to preserve CDATA blocks when reading and XML
58 | document.
59 |
60 | Release v1.1.4
61 | ==============
62 |
63 | **New Features**
64 |
65 | * Add the ability to preserve whitespace in leaf elements during indent.
66 | * Add the ability to suppress a document-trailing newline during indent.
67 | * Add choice of XML attribute quoting style (single-quote or double-quote).
68 |
69 | **Removed Features**
70 |
71 | * Removed the CDATA preservation change introduced in v1.1.3. It was
72 | implemented in a way that broke the ability to process XML documents
73 | encoded using non-UTF8 character sets.
74 |
75 | Release v1.1.3
76 | ==============
77 |
78 | * XML reads now preserve CDATA sections instead of converting them to
79 | standard character data.
80 |
81 | Release v1.1.2
82 | ==============
83 |
84 | * Fixed a path parsing bug.
85 | * The `Element.Text` function now handles comments embedded between
86 | character data spans.
87 |
88 | Release v1.1.1
89 | ==============
90 |
91 | * Updated go version in `go.mod` to 1.20
92 |
93 | Release v1.1.0
94 | ==============
95 |
96 | **New Features**
97 |
98 | * New attribute helpers.
99 | * Added the `Element.SortAttrs` method, which lexicographically sorts an
100 | element's attributes by key.
101 | * New `ReadSettings` properties.
102 | * Added `Entity` for the support of custom entity maps.
103 | * New `WriteSettings` properties.
104 | * Added `UseCRLF` to allow the output of CR-LF newlines instead of the
105 | default LF newlines. This is useful on Windows systems.
106 | * Additional support for text and CDATA sections.
107 | * The `Element.Text` method now returns the concatenation of all consecutive
108 | character data tokens immediately following an element's opening tag.
109 | * Added `Element.SetCData` to replace the character data immediately
110 | following an element's opening tag with a CDATA section.
111 | * Added `Element.CreateCData` to create and add a CDATA section child
112 | `CharData` token to an element.
113 | * Added `Element.CreateText` to create and add a child text `CharData` token
114 | to an element.
115 | * Added `NewCData` to create a parentless CDATA section `CharData` token.
116 | * Added `NewText` to create a parentless text `CharData`
117 | token.
118 | * Added `CharData.IsCData` to detect if the token contains a CDATA section.
119 | * Added `CharData.IsWhitespace` to detect if the token contains whitespace
120 | inserted by one of the document Indent functions.
121 | * Modified `Element.SetText` so that it replaces a run of consecutive
122 | character data tokens following the element's opening tag (instead of just
123 | the first one).
124 | * New "tail text" support.
125 | * Added the `Element.Tail` method, which returns the text immediately
126 | following an element's closing tag.
127 | * Added the `Element.SetTail` method, which modifies the text immediately
128 | following an element's closing tag.
129 | * New element child insertion and removal methods.
130 | * Added the `Element.InsertChildAt` method, which inserts a new child token
131 | before the specified child token index.
132 | * Added the `Element.RemoveChildAt` method, which removes the child token at
133 | the specified child token index.
134 | * New element and attribute queries.
135 | * Added the `Element.Index` method, which returns the element's index within
136 | its parent element's child token list.
137 | * Added the `Element.NamespaceURI` method to return the namespace URI
138 | associated with an element.
139 | * Added the `Attr.NamespaceURI` method to return the namespace URI
140 | associated with an element.
141 | * Added the `Attr.Element` method to return the element that an attribute
142 | belongs to.
143 | * New Path filter functions.
144 | * Added `[local-name()='val']` to keep elements whose unprefixed tag matches
145 | the desired value.
146 | * Added `[name()='val']` to keep elements whose full tag matches the desired
147 | value.
148 | * Added `[namespace-prefix()='val']` to keep elements whose namespace prefix
149 | matches the desired value.
150 | * Added `[namespace-uri()='val']` to keep elements whose namespace URI
151 | matches the desired value.
152 |
153 | **Bug Fixes**
154 |
155 | * A default XML `CharSetReader` is now used to prevent failed parsing of XML
156 | documents using certain encodings.
157 | ([Issue](https://github.com/beevik/etree/issues/53)).
158 | * All characters are now properly escaped according to XML parsing rules.
159 | ([Issue](https://github.com/beevik/etree/issues/55)).
160 | * The `Document.Indent` and `Document.IndentTabs` functions no longer insert
161 | empty string `CharData` tokens.
162 |
163 | **Deprecated**
164 |
165 | * `Element`
166 | * The `InsertChild` method is deprecated. Use `InsertChildAt` instead.
167 | * The `CreateCharData` method is deprecated. Use `CreateText` instead.
168 | * `CharData`
169 | * The `NewCharData` method is deprecated. Use `NewText` instead.
170 |
171 |
172 | Release v1.0.1
173 | ==============
174 |
175 | **Changes**
176 |
177 | * Added support for absolute etree Path queries. An absolute path begins with
178 | `/` or `//` and begins its search from the element's document root.
179 | * Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath)
180 | and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath)
181 | functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element)
182 | type.
183 |
184 | **Breaking changes**
185 |
186 | * A path starting with `//` is now interpreted as an absolute path.
187 | Previously, it was interpreted as a relative path starting from the element
188 | whose
189 | [`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement)
190 | method was called. To remain compatible with this release, all paths
191 | prefixed with `//` should be prefixed with `.//` when called from any
192 | element other than the document's root.
193 | * [**edit 2/1/2019**]: Minor releases should not contain breaking changes.
194 | Even though this breaking change was very minor, it was a mistake to include
195 | it in this minor release. In the future, all breaking changes will be
196 | limited to major releases (e.g., version 2.0.0).
197 |
198 | Release v1.0.0
199 | ==============
200 |
201 | Initial release.
202 |
--------------------------------------------------------------------------------
/etree.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Brett Vickers.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 |
5 | // Package etree provides XML services through an Element Tree
6 | // abstraction.
7 | package etree
8 |
9 | import (
10 | "bufio"
11 | "bytes"
12 | "encoding/xml"
13 | "errors"
14 | "io"
15 | "os"
16 | "slices"
17 | "strings"
18 | )
19 |
20 | const (
21 | // NoIndent is used with the IndentSettings record to remove all
22 | // indenting.
23 | NoIndent = -1
24 | )
25 |
26 | // ErrXML is returned when XML parsing fails due to incorrect formatting.
27 | var ErrXML = errors.New("etree: invalid XML format")
28 |
29 | // cdataPrefix is used to detect CDATA text when ReadSettings.PreserveCData is
30 | // true.
31 | var cdataPrefix = []byte(". If false, XML character references
111 | // are also produced for " and '. Default: false.
112 | CanonicalText bool
113 |
114 | // CanonicalAttrVal forces the production of XML character references for
115 | // attribute value characters &, < and ". If false, XML character
116 | // references are also produced for > and '. Ignored when AttrSingleQuote
117 | // is true. Default: false.
118 | CanonicalAttrVal bool
119 |
120 | // AttrSingleQuote causes attributes to use single quotes (attr='example')
121 | // instead of double quotes (attr = "example") when set to true. Default:
122 | // false.
123 | AttrSingleQuote bool
124 |
125 | // UseCRLF causes the document's Indent* functions to use a carriage return
126 | // followed by a linefeed ("\r\n") when outputting a newline. If false,
127 | // only a linefeed is used ("\n"). Default: false.
128 | //
129 | // Deprecated: UseCRLF is deprecated. Use IndentSettings.UseCRLF instead.
130 | UseCRLF bool
131 | }
132 |
133 | // dup creates a duplicate of the WriteSettings object.
134 | func (s *WriteSettings) dup() WriteSettings {
135 | return *s
136 | }
137 |
138 | // IndentSettings determine the behavior of the Document's Indent* functions.
139 | type IndentSettings struct {
140 | // Spaces indicates the number of spaces to insert for each level of
141 | // indentation. Set to etree.NoIndent to remove all indentation. Ignored
142 | // when UseTabs is true. Default: 4.
143 | Spaces int
144 |
145 | // UseTabs causes tabs to be used instead of spaces when indenting.
146 | // Default: false.
147 | UseTabs bool
148 |
149 | // UseCRLF causes newlines to be written as a carriage return followed by
150 | // a linefeed ("\r\n"). If false, only a linefeed character is output
151 | // for a newline ("\n"). Default: false.
152 | UseCRLF bool
153 |
154 | // PreserveLeafWhitespace causes indent functions to preserve whitespace
155 | // within XML elements containing only non-CDATA character data. Default:
156 | // false.
157 | PreserveLeafWhitespace bool
158 |
159 | // SuppressTrailingWhitespace suppresses the generation of a trailing
160 | // whitespace characters (such as newlines) at the end of the indented
161 | // document. Default: false.
162 | SuppressTrailingWhitespace bool
163 | }
164 |
165 | // NewIndentSettings creates a default IndentSettings record.
166 | func NewIndentSettings() *IndentSettings {
167 | return &IndentSettings{
168 | Spaces: 4,
169 | UseTabs: false,
170 | UseCRLF: false,
171 | PreserveLeafWhitespace: false,
172 | SuppressTrailingWhitespace: false,
173 | }
174 | }
175 |
176 | type indentFunc func(depth int) string
177 |
178 | func getIndentFunc(s *IndentSettings) indentFunc {
179 | if s.UseTabs {
180 | if s.UseCRLF {
181 | return func(depth int) string { return indentCRLF(depth, indentTabs) }
182 | } else {
183 | return func(depth int) string { return indentLF(depth, indentTabs) }
184 | }
185 | } else {
186 | if s.Spaces < 0 {
187 | return func(depth int) string { return "" }
188 | } else if s.UseCRLF {
189 | return func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) }
190 | } else {
191 | return func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) }
192 | }
193 | }
194 | }
195 |
196 | // Writer is the interface that wraps the Write* functions called by each token
197 | // type's WriteTo function.
198 | type Writer interface {
199 | io.StringWriter
200 | io.ByteWriter
201 | io.Writer
202 | }
203 |
204 | // A Token is an interface type used to represent XML elements, character
205 | // data, CDATA sections, XML comments, XML directives, and XML processing
206 | // instructions.
207 | type Token interface {
208 | Parent() *Element
209 | Index() int
210 | WriteTo(w Writer, s *WriteSettings)
211 | dup(parent *Element) Token
212 | setParent(parent *Element)
213 | setIndex(index int)
214 | }
215 |
216 | // A Document is a container holding a complete XML tree.
217 | //
218 | // A document has a single embedded element, which contains zero or more child
219 | // tokens, one of which is usually the root element. The embedded element may
220 | // include other children such as processing instruction tokens or character
221 | // data tokens. The document's embedded element is never directly serialized;
222 | // only its children are.
223 | //
224 | // A document also contains read and write settings, which influence the way
225 | // the document is deserialized, serialized, and indented.
226 | type Document struct {
227 | Element
228 | ReadSettings ReadSettings
229 | WriteSettings WriteSettings
230 | }
231 |
232 | // An Element represents an XML element, its attributes, and its child tokens.
233 | type Element struct {
234 | Space, Tag string // namespace prefix and tag
235 | Attr []Attr // key-value attribute pairs
236 | Child []Token // child tokens (elements, comments, etc.)
237 | parent *Element // parent element
238 | index int // token index in parent's children
239 | }
240 |
241 | // An Attr represents a key-value attribute within an XML element.
242 | type Attr struct {
243 | Space, Key string // The attribute's namespace prefix and key
244 | Value string // The attribute value string
245 | element *Element // element containing the attribute
246 | }
247 |
248 | // charDataFlags are used with CharData tokens to store additional settings.
249 | type charDataFlags uint8
250 |
251 | const (
252 | // The CharData contains only whitespace.
253 | whitespaceFlag charDataFlags = 1 << iota
254 |
255 | // The CharData contains a CDATA section.
256 | cdataFlag
257 | )
258 |
259 | // CharData may be used to represent simple text data or a CDATA section
260 | // within an XML document. The Data property should never be modified
261 | // directly; use the SetData function instead.
262 | type CharData struct {
263 | Data string // the simple text or CDATA section content
264 | parent *Element
265 | index int
266 | flags charDataFlags
267 | }
268 |
269 | // A Comment represents an XML comment.
270 | type Comment struct {
271 | Data string // the comment's text
272 | parent *Element
273 | index int
274 | }
275 |
276 | // A Directive represents an XML directive.
277 | type Directive struct {
278 | Data string // the directive string
279 | parent *Element
280 | index int
281 | }
282 |
283 | // A ProcInst represents an XML processing instruction.
284 | type ProcInst struct {
285 | Target string // the processing instruction target
286 | Inst string // the processing instruction value
287 | parent *Element
288 | index int
289 | }
290 |
291 | // NewDocument creates an XML document without a root element.
292 | func NewDocument() *Document {
293 | return &Document{
294 | Element: Element{Child: make([]Token, 0)},
295 | }
296 | }
297 |
298 | // NewDocumentWithRoot creates an XML document and sets the element 'e' as its
299 | // root element. If the element 'e' is already part of another document, it is
300 | // first removed from its existing document.
301 | func NewDocumentWithRoot(e *Element) *Document {
302 | d := NewDocument()
303 | d.SetRoot(e)
304 | return d
305 | }
306 |
307 | // Copy returns a recursive, deep copy of the document.
308 | func (d *Document) Copy() *Document {
309 | return &Document{
310 | Element: *(d.Element.dup(nil).(*Element)),
311 | ReadSettings: d.ReadSettings.dup(),
312 | WriteSettings: d.WriteSettings.dup(),
313 | }
314 | }
315 |
316 | // Root returns the root element of the document. It returns nil if there is
317 | // no root element.
318 | func (d *Document) Root() *Element {
319 | for _, t := range d.Child {
320 | if c, ok := t.(*Element); ok {
321 | return c
322 | }
323 | }
324 | return nil
325 | }
326 |
327 | // SetRoot replaces the document's root element with the element 'e'. If the
328 | // document already has a root element when this function is called, then the
329 | // existing root element is unbound from the document. If the element 'e' is
330 | // part of another document, then it is unbound from the other document.
331 | func (d *Document) SetRoot(e *Element) {
332 | if e.parent != nil {
333 | e.parent.RemoveChild(e)
334 | }
335 |
336 | // If there is already a root element, replace it.
337 | p := &d.Element
338 | for i, t := range p.Child {
339 | if _, ok := t.(*Element); ok {
340 | t.setParent(nil)
341 | t.setIndex(-1)
342 | p.Child[i] = e
343 | e.setParent(p)
344 | e.setIndex(i)
345 | return
346 | }
347 | }
348 |
349 | // No existing root element, so add it.
350 | p.addChild(e)
351 | }
352 |
353 | // ReadFrom reads XML from the reader 'r' into this document. The function
354 | // returns the number of bytes read and any error encountered.
355 | func (d *Document) ReadFrom(r io.Reader) (n int64, err error) {
356 | if d.ReadSettings.ValidateInput {
357 | b, err := io.ReadAll(r)
358 | if err != nil {
359 | return 0, err
360 | }
361 | if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil {
362 | return 0, err
363 | }
364 | r = bytes.NewReader(b)
365 | }
366 | return d.Element.readFrom(r, d.ReadSettings)
367 | }
368 |
369 | // ReadFromFile reads XML from a local file at path 'filepath' into this
370 | // document.
371 | func (d *Document) ReadFromFile(filepath string) error {
372 | f, err := os.Open(filepath)
373 | if err != nil {
374 | return err
375 | }
376 | defer f.Close()
377 |
378 | _, err = d.ReadFrom(f)
379 | return err
380 | }
381 |
382 | // ReadFromBytes reads XML from the byte slice 'b' into the this document.
383 | func (d *Document) ReadFromBytes(b []byte) error {
384 | if d.ReadSettings.ValidateInput {
385 | if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil {
386 | return err
387 | }
388 | }
389 | _, err := d.Element.readFrom(bytes.NewReader(b), d.ReadSettings)
390 | return err
391 | }
392 |
393 | // ReadFromString reads XML from the string 's' into this document.
394 | func (d *Document) ReadFromString(s string) error {
395 | if d.ReadSettings.ValidateInput {
396 | if err := validateXML(strings.NewReader(s), d.ReadSettings); err != nil {
397 | return err
398 | }
399 | }
400 | _, err := d.Element.readFrom(strings.NewReader(s), d.ReadSettings)
401 | return err
402 | }
403 |
404 | // validateXML determines if the data read from the reader 'r' contains
405 | // well-formed XML according to the rules set by the go xml package.
406 | func validateXML(r io.Reader, settings ReadSettings) error {
407 | dec := newDecoder(r, settings)
408 | err := dec.Decode(new(interface{}))
409 | if err != nil {
410 | return err
411 | }
412 |
413 | // If there are any trailing tokens after unmarshalling with Decode(),
414 | // then the XML input didn't terminate properly.
415 | _, err = dec.Token()
416 | if err == io.EOF {
417 | return nil
418 | }
419 | return ErrXML
420 | }
421 |
422 | // newDecoder creates an XML decoder for the reader 'r' configured using
423 | // the provided read settings.
424 | func newDecoder(r io.Reader, settings ReadSettings) *xml.Decoder {
425 | d := xml.NewDecoder(r)
426 | d.CharsetReader = settings.CharsetReader
427 | if d.CharsetReader == nil {
428 | d.CharsetReader = defaultCharsetReader
429 | }
430 | d.Strict = !settings.Permissive
431 | d.Entity = settings.Entity
432 | d.AutoClose = settings.AutoClose
433 | return d
434 | }
435 |
436 | // WriteTo serializes the document out to the writer 'w'. The function returns
437 | // the number of bytes written and any error encountered.
438 | func (d *Document) WriteTo(w io.Writer) (n int64, err error) {
439 | xw := newXmlWriter(w)
440 | b := bufio.NewWriter(xw)
441 | for _, c := range d.Child {
442 | c.WriteTo(b, &d.WriteSettings)
443 | }
444 | err, n = b.Flush(), xw.bytes
445 | return
446 | }
447 |
448 | // WriteToFile serializes the document out to the file at path 'filepath'.
449 | func (d *Document) WriteToFile(filepath string) error {
450 | f, err := os.Create(filepath)
451 | if err != nil {
452 | return err
453 | }
454 | defer f.Close()
455 | _, err = d.WriteTo(f)
456 | return err
457 | }
458 |
459 | // WriteToBytes serializes this document into a slice of bytes.
460 | func (d *Document) WriteToBytes() (b []byte, err error) {
461 | var buf bytes.Buffer
462 | if _, err = d.WriteTo(&buf); err != nil {
463 | return
464 | }
465 | return buf.Bytes(), nil
466 | }
467 |
468 | // WriteToString serializes this document into a string.
469 | func (d *Document) WriteToString() (s string, err error) {
470 | var b []byte
471 | if b, err = d.WriteToBytes(); err != nil {
472 | return
473 | }
474 | return string(b), nil
475 | }
476 |
477 | // Indent modifies the document's element tree by inserting character data
478 | // tokens containing newlines and spaces for indentation. The amount of
479 | // indentation per depth level is given by the 'spaces' parameter. Other than
480 | // the number of spaces, default IndentSettings are used.
481 | func (d *Document) Indent(spaces int) {
482 | s := NewIndentSettings()
483 | s.Spaces = spaces
484 | d.IndentWithSettings(s)
485 | }
486 |
487 | // IndentTabs modifies the document's element tree by inserting CharData
488 | // tokens containing newlines and tabs for indentation. One tab is used per
489 | // indentation level. Other than the use of tabs, default IndentSettings
490 | // are used.
491 | func (d *Document) IndentTabs() {
492 | s := NewIndentSettings()
493 | s.UseTabs = true
494 | d.IndentWithSettings(s)
495 | }
496 |
497 | // IndentWithSettings modifies the document's element tree by inserting
498 | // character data tokens containing newlines and indentation. The behavior
499 | // of the indentation algorithm is configured by the indent settings.
500 | func (d *Document) IndentWithSettings(s *IndentSettings) {
501 | // WriteSettings.UseCRLF is deprecated. Until removed from the package, it
502 | // overrides IndentSettings.UseCRLF when true.
503 | if d.WriteSettings.UseCRLF {
504 | s.UseCRLF = true
505 | }
506 |
507 | d.Element.indent(0, getIndentFunc(s), s)
508 |
509 | if s.SuppressTrailingWhitespace {
510 | d.Element.stripTrailingWhitespace()
511 | }
512 | }
513 |
514 | // Unindent modifies the document's element tree by removing character data
515 | // tokens containing only whitespace. Other than the removal of indentation,
516 | // default IndentSettings are used.
517 | func (d *Document) Unindent() {
518 | s := NewIndentSettings()
519 | s.Spaces = NoIndent
520 | d.IndentWithSettings(s)
521 | }
522 |
523 | // NewElement creates an unparented element with the specified tag (i.e.,
524 | // name). The tag may include a namespace prefix followed by a colon.
525 | func NewElement(tag string) *Element {
526 | space, stag := spaceDecompose(tag)
527 | return newElement(space, stag, nil)
528 | }
529 |
530 | // newElement is a helper function that creates an element and binds it to
531 | // a parent element if possible.
532 | func newElement(space, tag string, parent *Element) *Element {
533 | e := &Element{
534 | Space: space,
535 | Tag: tag,
536 | Attr: make([]Attr, 0),
537 | Child: make([]Token, 0),
538 | parent: parent,
539 | index: -1,
540 | }
541 | if parent != nil {
542 | parent.addChild(e)
543 | }
544 | return e
545 | }
546 |
547 | // Copy creates a recursive, deep copy of the element and all its attributes
548 | // and children. The returned element has no parent but can be parented to a
549 | // another element using AddChild, or added to a document with SetRoot or
550 | // NewDocumentWithRoot.
551 | func (e *Element) Copy() *Element {
552 | return e.dup(nil).(*Element)
553 | }
554 |
555 | // FullTag returns the element e's complete tag, including namespace prefix if
556 | // present.
557 | func (e *Element) FullTag() string {
558 | if e.Space == "" {
559 | return e.Tag
560 | }
561 | return e.Space + ":" + e.Tag
562 | }
563 |
564 | // NamespaceURI returns the XML namespace URI associated with the element. If
565 | // the element is part of the XML default namespace, NamespaceURI returns the
566 | // empty string.
567 | func (e *Element) NamespaceURI() string {
568 | if e.Space == "" {
569 | return e.findDefaultNamespaceURI()
570 | }
571 | return e.findLocalNamespaceURI(e.Space)
572 | }
573 |
574 | // findLocalNamespaceURI finds the namespace URI corresponding to the
575 | // requested prefix.
576 | func (e *Element) findLocalNamespaceURI(prefix string) string {
577 | for _, a := range e.Attr {
578 | if a.Space == "xmlns" && a.Key == prefix {
579 | return a.Value
580 | }
581 | }
582 |
583 | if e.parent == nil {
584 | return ""
585 | }
586 |
587 | return e.parent.findLocalNamespaceURI(prefix)
588 | }
589 |
590 | // findDefaultNamespaceURI finds the default namespace URI of the element.
591 | func (e *Element) findDefaultNamespaceURI() string {
592 | for _, a := range e.Attr {
593 | if a.Space == "" && a.Key == "xmlns" {
594 | return a.Value
595 | }
596 | }
597 |
598 | if e.parent == nil {
599 | return ""
600 | }
601 |
602 | return e.parent.findDefaultNamespaceURI()
603 | }
604 |
605 | // namespacePrefix returns the namespace prefix associated with the element.
606 | func (e *Element) namespacePrefix() string {
607 | return e.Space
608 | }
609 |
610 | // name returns the tag associated with the element.
611 | func (e *Element) name() string {
612 | return e.Tag
613 | }
614 |
615 | // ReindexChildren recalculates the index values of the element's child
616 | // tokens. This is necessary only if you have manually manipulated the
617 | // element's `Child` array.
618 | func (e *Element) ReindexChildren() {
619 | for i := 0; i < len(e.Child); i++ {
620 | e.Child[i].setIndex(i)
621 | }
622 | }
623 |
624 | // Text returns all character data immediately following the element's opening
625 | // tag.
626 | func (e *Element) Text() string {
627 | if len(e.Child) == 0 {
628 | return ""
629 | }
630 |
631 | text := ""
632 | for _, ch := range e.Child {
633 | if cd, ok := ch.(*CharData); ok {
634 | if text == "" {
635 | text = cd.Data
636 | } else {
637 | text += cd.Data
638 | }
639 | } else if _, ok := ch.(*Comment); ok {
640 | // ignore
641 | } else {
642 | break
643 | }
644 | }
645 | return text
646 | }
647 |
648 | // SetText replaces all character data immediately following an element's
649 | // opening tag with the requested string.
650 | func (e *Element) SetText(text string) {
651 | e.replaceText(0, text, 0)
652 | }
653 |
654 | // SetCData replaces all character data immediately following an element's
655 | // opening tag with a CDATA section.
656 | func (e *Element) SetCData(text string) {
657 | e.replaceText(0, text, cdataFlag)
658 | }
659 |
660 | // Tail returns all character data immediately following the element's end
661 | // tag.
662 | func (e *Element) Tail() string {
663 | if e.Parent() == nil {
664 | return ""
665 | }
666 |
667 | p := e.Parent()
668 | i := e.Index()
669 |
670 | text := ""
671 | for _, ch := range p.Child[i+1:] {
672 | if cd, ok := ch.(*CharData); ok {
673 | if text == "" {
674 | text = cd.Data
675 | } else {
676 | text += cd.Data
677 | }
678 | } else {
679 | break
680 | }
681 | }
682 | return text
683 | }
684 |
685 | // SetTail replaces all character data immediately following the element's end
686 | // tag with the requested string.
687 | func (e *Element) SetTail(text string) {
688 | if e.Parent() == nil {
689 | return
690 | }
691 |
692 | p := e.Parent()
693 | p.replaceText(e.Index()+1, text, 0)
694 | }
695 |
696 | // replaceText is a helper function that replaces a series of chardata tokens
697 | // starting at index i with the requested text.
698 | func (e *Element) replaceText(i int, text string, flags charDataFlags) {
699 | end := e.findTermCharDataIndex(i)
700 |
701 | switch {
702 | case end == i:
703 | if text != "" {
704 | // insert a new chardata token at index i
705 | cd := newCharData(text, flags, nil)
706 | e.InsertChildAt(i, cd)
707 | }
708 |
709 | case end == i+1:
710 | if text == "" {
711 | // remove the chardata token at index i
712 | e.RemoveChildAt(i)
713 | } else {
714 | // replace the first and only character token at index i
715 | cd := e.Child[i].(*CharData)
716 | cd.Data, cd.flags = text, flags
717 | }
718 |
719 | default:
720 | if text == "" {
721 | // remove all chardata tokens starting from index i
722 | copy(e.Child[i:], e.Child[end:])
723 | removed := end - i
724 | e.Child = e.Child[:len(e.Child)-removed]
725 | for j := i; j < len(e.Child); j++ {
726 | e.Child[j].setIndex(j)
727 | }
728 | } else {
729 | // replace the first chardata token at index i and remove all
730 | // subsequent chardata tokens
731 | cd := e.Child[i].(*CharData)
732 | cd.Data, cd.flags = text, flags
733 | copy(e.Child[i+1:], e.Child[end:])
734 | removed := end - (i + 1)
735 | e.Child = e.Child[:len(e.Child)-removed]
736 | for j := i + 1; j < len(e.Child); j++ {
737 | e.Child[j].setIndex(j)
738 | }
739 | }
740 | }
741 | }
742 |
743 | // findTermCharDataIndex finds the index of the first child token that isn't
744 | // a CharData token. It starts from the requested start index.
745 | func (e *Element) findTermCharDataIndex(start int) int {
746 | for i := start; i < len(e.Child); i++ {
747 | if _, ok := e.Child[i].(*CharData); !ok {
748 | return i
749 | }
750 | }
751 | return len(e.Child)
752 | }
753 |
754 | // CreateElement creates a new element with the specified tag (i.e., name) and
755 | // adds it as the last child of element 'e'. The tag may include a prefix
756 | // followed by a colon.
757 | func (e *Element) CreateElement(tag string) *Element {
758 | space, stag := spaceDecompose(tag)
759 | return newElement(space, stag, e)
760 | }
761 |
762 | // CreateChild performs the same task as CreateElement but calls a
763 | // continuation function after the child element is created, allowing
764 | // additional actions to be performed on the child element before returning.
765 | //
766 | // This method of element creation is particularly useful when building nested
767 | // XML documents from code. For example:
768 | //
769 | // org := doc.CreateChild("organization", func(e *Element) {
770 | // e.CreateComment("Mary")
771 | // e.CreateChild("person", func(e *Element) {
772 | // e.CreateAttr("name", "Mary")
773 | // e.CreateAttr("age", "30")
774 | // e.CreateAttr("hair", "brown")
775 | // })
776 | // })
777 | func (e *Element) CreateChild(tag string, cont func(e *Element)) *Element {
778 | child := e.CreateElement(tag)
779 | cont(child)
780 | return child
781 | }
782 |
783 | // AddChild adds the token 't' as the last child of the element. If token 't'
784 | // was already the child of another element, it is first removed from its
785 | // parent element.
786 | func (e *Element) AddChild(t Token) {
787 | if t.Parent() != nil {
788 | t.Parent().RemoveChild(t)
789 | }
790 | e.addChild(t)
791 | }
792 |
793 | // InsertChild inserts the token 't' into this element's list of children just
794 | // before the element's existing child token 'ex'. If the existing element
795 | // 'ex' does not appear in this element's list of child tokens, then 't' is
796 | // added to the end of this element's list of child tokens. If token 't' is
797 | // already the child of another element, it is first removed from the other
798 | // element's list of child tokens.
799 | //
800 | // Deprecated: InsertChild is deprecated. Use InsertChildAt instead.
801 | func (e *Element) InsertChild(ex Token, t Token) {
802 | if ex == nil || ex.Parent() != e {
803 | e.AddChild(t)
804 | return
805 | }
806 |
807 | if t.Parent() != nil {
808 | t.Parent().RemoveChild(t)
809 | }
810 |
811 | t.setParent(e)
812 |
813 | i := ex.Index()
814 | e.Child = append(e.Child, nil)
815 | copy(e.Child[i+1:], e.Child[i:])
816 | e.Child[i] = t
817 |
818 | for j := i; j < len(e.Child); j++ {
819 | e.Child[j].setIndex(j)
820 | }
821 | }
822 |
823 | // InsertChildAt inserts the token 't' into this element's list of child
824 | // tokens just before the requested 'index'. If the index is greater than or
825 | // equal to the length of the list of child tokens, then the token 't' is
826 | // added to the end of the list of child tokens.
827 | func (e *Element) InsertChildAt(index int, t Token) {
828 | if index >= len(e.Child) {
829 | e.AddChild(t)
830 | return
831 | }
832 |
833 | if t.Parent() != nil {
834 | if t.Parent() == e && t.Index() < index {
835 | index--
836 | }
837 | t.Parent().RemoveChild(t)
838 | }
839 |
840 | t.setParent(e)
841 |
842 | e.Child = append(e.Child, nil)
843 | copy(e.Child[index+1:], e.Child[index:])
844 | e.Child[index] = t
845 |
846 | for j := index; j < len(e.Child); j++ {
847 | e.Child[j].setIndex(j)
848 | }
849 | }
850 |
851 | // RemoveChild attempts to remove the token 't' from this element's list of
852 | // child tokens. If the token 't' was a child of this element, then it is
853 | // removed and returned. Otherwise, nil is returned.
854 | func (e *Element) RemoveChild(t Token) Token {
855 | if t.Parent() != e {
856 | return nil
857 | }
858 | return e.RemoveChildAt(t.Index())
859 | }
860 |
861 | // RemoveChildAt removes the child token appearing in slot 'index' of this
862 | // element's list of child tokens. The removed child token is then returned.
863 | // If the index is out of bounds, no child is removed and nil is returned.
864 | func (e *Element) RemoveChildAt(index int) Token {
865 | if index >= len(e.Child) {
866 | return nil
867 | }
868 |
869 | t := e.Child[index]
870 | for j := index + 1; j < len(e.Child); j++ {
871 | e.Child[j].setIndex(j - 1)
872 | }
873 | e.Child = append(e.Child[:index], e.Child[index+1:]...)
874 | t.setIndex(-1)
875 | t.setParent(nil)
876 | return t
877 | }
878 |
879 | // autoClose analyzes the stack's top element and the current token to decide
880 | // whether the top element should be closed.
881 | func (e *Element) autoClose(stack *stack[*Element], t xml.Token, tags []string) {
882 | if stack.empty() {
883 | return
884 | }
885 |
886 | top := stack.peek()
887 |
888 | for _, tag := range tags {
889 | if strings.EqualFold(tag, top.FullTag()) {
890 | if e, ok := t.(xml.EndElement); !ok ||
891 | !strings.EqualFold(e.Name.Space, top.Space) ||
892 | !strings.EqualFold(e.Name.Local, top.Tag) {
893 | stack.pop()
894 | }
895 | break
896 | }
897 | }
898 | }
899 |
900 | // ReadFrom reads XML from the reader 'ri' and stores the result as a new
901 | // child of this element.
902 | func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) {
903 | var r xmlReader
904 | var pr *xmlPeekReader
905 | if settings.PreserveCData {
906 | pr = newXmlPeekReader(ri)
907 | r = pr
908 | } else {
909 | r = newXmlSimpleReader(ri)
910 | }
911 |
912 | attrCheck := make(map[xml.Name]int)
913 | dec := newDecoder(r, settings)
914 |
915 | var stack stack[*Element]
916 | stack.push(e)
917 | for {
918 | if pr != nil {
919 | pr.PeekPrepare(dec.InputOffset(), len(cdataPrefix))
920 | }
921 |
922 | t, err := dec.RawToken()
923 |
924 | if settings.Permissive && settings.AutoClose != nil {
925 | e.autoClose(&stack, t, settings.AutoClose)
926 | }
927 |
928 | switch {
929 | case err == io.EOF:
930 | if len(stack.data) != 1 {
931 | return r.Bytes(), ErrXML
932 | }
933 | return r.Bytes(), nil
934 | case err != nil:
935 | return r.Bytes(), err
936 | case stack.empty():
937 | return r.Bytes(), ErrXML
938 | }
939 |
940 | top := stack.peek()
941 |
942 | switch t := t.(type) {
943 | case xml.StartElement:
944 | e := newElement(t.Name.Space, t.Name.Local, top)
945 | if settings.PreserveDuplicateAttrs || len(t.Attr) < 2 {
946 | for _, a := range t.Attr {
947 | e.addAttr(a.Name.Space, a.Name.Local, a.Value)
948 | }
949 | } else {
950 | for _, a := range t.Attr {
951 | if i, contains := attrCheck[a.Name]; contains {
952 | e.Attr[i].Value = a.Value
953 | } else {
954 | attrCheck[a.Name] = e.addAttr(a.Name.Space, a.Name.Local, a.Value)
955 | }
956 | }
957 | clear(attrCheck)
958 | }
959 | stack.push(e)
960 | case xml.EndElement:
961 | if top.Tag != t.Name.Local || top.Space != t.Name.Space {
962 | return r.Bytes(), ErrXML
963 | }
964 | stack.pop()
965 | case xml.CharData:
966 | data := string(t)
967 | var flags charDataFlags
968 | if pr != nil {
969 | peekBuf := pr.PeekFinalize()
970 | if bytes.Equal(peekBuf, cdataPrefix) {
971 | flags = cdataFlag
972 | } else if isWhitespace(data) {
973 | flags = whitespaceFlag
974 | }
975 | } else {
976 | if isWhitespace(data) {
977 | flags = whitespaceFlag
978 | }
979 | }
980 | newCharData(data, flags, top)
981 | case xml.Comment:
982 | newComment(string(t), top)
983 | case xml.Directive:
984 | newDirective(string(t), top)
985 | case xml.ProcInst:
986 | newProcInst(t.Target, string(t.Inst), top)
987 | }
988 | }
989 | }
990 |
991 | // SelectAttr finds an element attribute matching the requested 'key' and, if
992 | // found, returns a pointer to the matching attribute. The function returns
993 | // nil if no matching attribute is found. The key may include a namespace
994 | // prefix followed by a colon.
995 | func (e *Element) SelectAttr(key string) *Attr {
996 | space, skey := spaceDecompose(key)
997 | for i, a := range e.Attr {
998 | if spaceMatch(space, a.Space) && skey == a.Key {
999 | return &e.Attr[i]
1000 | }
1001 | }
1002 | return nil
1003 | }
1004 |
1005 | // SelectAttrValue finds an element attribute matching the requested 'key' and
1006 | // returns its value if found. If no matching attribute is found, the function
1007 | // returns the 'dflt' value instead. The key may include a namespace prefix
1008 | // followed by a colon.
1009 | func (e *Element) SelectAttrValue(key, dflt string) string {
1010 | space, skey := spaceDecompose(key)
1011 | for _, a := range e.Attr {
1012 | if spaceMatch(space, a.Space) && skey == a.Key {
1013 | return a.Value
1014 | }
1015 | }
1016 | return dflt
1017 | }
1018 |
1019 | // ChildElements returns all elements that are children of this element.
1020 | func (e *Element) ChildElements() []*Element {
1021 | var elements []*Element
1022 | for _, t := range e.Child {
1023 | if c, ok := t.(*Element); ok {
1024 | elements = append(elements, c)
1025 | }
1026 | }
1027 | return elements
1028 | }
1029 |
1030 | // SelectElement returns the first child element with the given 'tag' (i.e.,
1031 | // name). The function returns nil if no child element matching the tag is
1032 | // found. The tag may include a namespace prefix followed by a colon.
1033 | func (e *Element) SelectElement(tag string) *Element {
1034 | space, stag := spaceDecompose(tag)
1035 | for _, t := range e.Child {
1036 | if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
1037 | return c
1038 | }
1039 | }
1040 | return nil
1041 | }
1042 |
1043 | // SelectElements returns a slice of all child elements with the given 'tag'
1044 | // (i.e., name). The tag may include a namespace prefix followed by a colon.
1045 | func (e *Element) SelectElements(tag string) []*Element {
1046 | space, stag := spaceDecompose(tag)
1047 | var elements []*Element
1048 | for _, t := range e.Child {
1049 | if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
1050 | elements = append(elements, c)
1051 | }
1052 | }
1053 | return elements
1054 | }
1055 |
1056 | // FindElement returns the first element matched by the XPath-like 'path'
1057 | // string. The function returns nil if no child element is found using the
1058 | // path. It panics if an invalid path string is supplied.
1059 | func (e *Element) FindElement(path string) *Element {
1060 | return e.FindElementPath(MustCompilePath(path))
1061 | }
1062 |
1063 | // FindElementPath returns the first element matched by the 'path' object. The
1064 | // function returns nil if no element is found using the path.
1065 | func (e *Element) FindElementPath(path Path) *Element {
1066 | p := newPather()
1067 | elements := p.traverse(e, path)
1068 | if len(elements) > 0 {
1069 | return elements[0]
1070 | }
1071 | return nil
1072 | }
1073 |
1074 | // FindElements returns a slice of elements matched by the XPath-like 'path'
1075 | // string. The function returns nil if no child element is found using the
1076 | // path. It panics if an invalid path string is supplied.
1077 | func (e *Element) FindElements(path string) []*Element {
1078 | return e.FindElementsPath(MustCompilePath(path))
1079 | }
1080 |
1081 | // FindElementsPath returns a slice of elements matched by the 'path' object.
1082 | func (e *Element) FindElementsPath(path Path) []*Element {
1083 | p := newPather()
1084 | return p.traverse(e, path)
1085 | }
1086 |
1087 | // NotNil returns the receiver element if it isn't nil; otherwise, it returns
1088 | // an unparented element with an empty string tag. This function simplifies
1089 | // the task of writing code to ignore not-found results from element queries.
1090 | // For example, instead of writing this:
1091 | //
1092 | // if e := doc.SelectElement("enabled"); e != nil {
1093 | // e.SetText("true")
1094 | // }
1095 | //
1096 | // You could write this:
1097 | //
1098 | // doc.SelectElement("enabled").NotNil().SetText("true")
1099 | func (e *Element) NotNil() *Element {
1100 | if e == nil {
1101 | return NewElement("")
1102 | }
1103 | return e
1104 | }
1105 |
1106 | // GetPath returns the absolute path of the element. The absolute path is the
1107 | // full path from the document's root.
1108 | func (e *Element) GetPath() string {
1109 | path := []string{}
1110 | for seg := e; seg != nil; seg = seg.Parent() {
1111 | if seg.Tag != "" {
1112 | path = append(path, seg.Tag)
1113 | }
1114 | }
1115 |
1116 | // Reverse the path.
1117 | for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 {
1118 | path[i], path[j] = path[j], path[i]
1119 | }
1120 |
1121 | return "/" + strings.Join(path, "/")
1122 | }
1123 |
1124 | // GetRelativePath returns the path of this element relative to the 'source'
1125 | // element. If the two elements are not part of the same element tree, then
1126 | // the function returns the empty string.
1127 | func (e *Element) GetRelativePath(source *Element) string {
1128 | var path []*Element
1129 |
1130 | if source == nil {
1131 | return ""
1132 | }
1133 |
1134 | // Build a reverse path from the element toward the root. Stop if the
1135 | // source element is encountered.
1136 | var seg *Element
1137 | for seg = e; seg != nil && seg != source; seg = seg.Parent() {
1138 | path = append(path, seg)
1139 | }
1140 |
1141 | // If we found the source element, reverse the path and compose the
1142 | // string.
1143 | if seg == source {
1144 | if len(path) == 0 {
1145 | return "."
1146 | }
1147 | parts := []string{}
1148 | for i := len(path) - 1; i >= 0; i-- {
1149 | parts = append(parts, path[i].Tag)
1150 | }
1151 | return "./" + strings.Join(parts, "/")
1152 | }
1153 |
1154 | // The source wasn't encountered, so climb from the source element toward
1155 | // the root of the tree until an element in the reversed path is
1156 | // encountered.
1157 |
1158 | findPathIndex := func(e *Element, path []*Element) int {
1159 | for i, ee := range path {
1160 | if e == ee {
1161 | return i
1162 | }
1163 | }
1164 | return -1
1165 | }
1166 |
1167 | climb := 0
1168 | for seg = source; seg != nil; seg = seg.Parent() {
1169 | i := findPathIndex(seg, path)
1170 | if i >= 0 {
1171 | path = path[:i] // truncate at found segment
1172 | break
1173 | }
1174 | climb++
1175 | }
1176 |
1177 | // No element in the reversed path was encountered, so the two elements
1178 | // must not be part of the same tree.
1179 | if seg == nil {
1180 | return ""
1181 | }
1182 |
1183 | // Reverse the (possibly truncated) path and prepend ".." segments to
1184 | // climb.
1185 | parts := []string{}
1186 | for i := 0; i < climb; i++ {
1187 | parts = append(parts, "..")
1188 | }
1189 | for i := len(path) - 1; i >= 0; i-- {
1190 | parts = append(parts, path[i].Tag)
1191 | }
1192 | return strings.Join(parts, "/")
1193 | }
1194 |
1195 | // IndentWithSettings modifies the element and its child tree by inserting
1196 | // character data tokens containing newlines and indentation. The behavior of
1197 | // the indentation algorithm is configured by the indent settings. Because
1198 | // this function indents the element as if it were at the root of a document,
1199 | // it is most useful when called just before writing the element as an XML
1200 | // fragment using WriteTo.
1201 | func (e *Element) IndentWithSettings(s *IndentSettings) {
1202 | e.indent(1, getIndentFunc(s), s)
1203 | }
1204 |
1205 | // indent recursively inserts proper indentation between an XML element's
1206 | // child tokens.
1207 | func (e *Element) indent(depth int, indent indentFunc, s *IndentSettings) {
1208 | e.stripIndent(s)
1209 | n := len(e.Child)
1210 | if n == 0 {
1211 | return
1212 | }
1213 |
1214 | oldChild := e.Child
1215 | e.Child = make([]Token, 0, n*2+1)
1216 | isCharData, firstNonCharData := false, true
1217 | for _, c := range oldChild {
1218 | // Insert NL+indent before child if it's not character data.
1219 | // Exceptions: when it's the first non-character-data child, or when
1220 | // the child is at root depth.
1221 | _, isCharData = c.(*CharData)
1222 | if !isCharData {
1223 | if !firstNonCharData || depth > 0 {
1224 | s := indent(depth)
1225 | if s != "" {
1226 | newCharData(s, whitespaceFlag, e)
1227 | }
1228 | }
1229 | firstNonCharData = false
1230 | }
1231 |
1232 | e.addChild(c)
1233 |
1234 | // Recursively process child elements.
1235 | if ce, ok := c.(*Element); ok {
1236 | ce.indent(depth+1, indent, s)
1237 | }
1238 | }
1239 |
1240 | // Insert NL+indent before the last child.
1241 | if !isCharData {
1242 | if !firstNonCharData || depth > 0 {
1243 | s := indent(depth - 1)
1244 | if s != "" {
1245 | newCharData(s, whitespaceFlag, e)
1246 | }
1247 | }
1248 | }
1249 | }
1250 |
1251 | // stripIndent removes any previously inserted indentation.
1252 | func (e *Element) stripIndent(s *IndentSettings) {
1253 | // Count the number of non-indent child tokens
1254 | n := len(e.Child)
1255 | for _, c := range e.Child {
1256 | if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
1257 | n--
1258 | }
1259 | }
1260 | if n == len(e.Child) {
1261 | return
1262 | }
1263 | if n == 0 && len(e.Child) == 1 && s.PreserveLeafWhitespace {
1264 | return
1265 | }
1266 |
1267 | // Strip out indent CharData
1268 | newChild := make([]Token, n)
1269 | j := 0
1270 | for _, c := range e.Child {
1271 | if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
1272 | continue
1273 | }
1274 | newChild[j] = c
1275 | newChild[j].setIndex(j)
1276 | j++
1277 | }
1278 | e.Child = newChild
1279 | }
1280 |
1281 | // stripTrailingWhitespace removes any trailing whitespace CharData tokens
1282 | // from the element's children.
1283 | func (e *Element) stripTrailingWhitespace() {
1284 | for i := len(e.Child) - 1; i >= 0; i-- {
1285 | if cd, ok := e.Child[i].(*CharData); !ok || !cd.IsWhitespace() {
1286 | e.Child = e.Child[:i+1]
1287 | return
1288 | }
1289 | }
1290 | }
1291 |
1292 | // dup duplicates the element.
1293 | func (e *Element) dup(parent *Element) Token {
1294 | ne := &Element{
1295 | Space: e.Space,
1296 | Tag: e.Tag,
1297 | Attr: make([]Attr, len(e.Attr)),
1298 | Child: make([]Token, len(e.Child)),
1299 | parent: parent,
1300 | index: e.index,
1301 | }
1302 | for i, t := range e.Child {
1303 | ne.Child[i] = t.dup(ne)
1304 | }
1305 | copy(ne.Attr, e.Attr)
1306 | return ne
1307 | }
1308 |
1309 | // NextSibling returns this element's next sibling element. It returns nil if
1310 | // there is no next sibling element.
1311 | func (e *Element) NextSibling() *Element {
1312 | if e.parent == nil {
1313 | return nil
1314 | }
1315 | for i := e.index + 1; i < len(e.parent.Child); i++ {
1316 | if s, ok := e.parent.Child[i].(*Element); ok {
1317 | return s
1318 | }
1319 | }
1320 | return nil
1321 | }
1322 |
1323 | // PrevSibling returns this element's preceding sibling element. It returns
1324 | // nil if there is no preceding sibling element.
1325 | func (e *Element) PrevSibling() *Element {
1326 | if e.parent == nil {
1327 | return nil
1328 | }
1329 | for i := e.index - 1; i >= 0; i-- {
1330 | if s, ok := e.parent.Child[i].(*Element); ok {
1331 | return s
1332 | }
1333 | }
1334 | return nil
1335 | }
1336 |
1337 | // Parent returns this element's parent element. It returns nil if this
1338 | // element has no parent.
1339 | func (e *Element) Parent() *Element {
1340 | return e.parent
1341 | }
1342 |
1343 | // Index returns the index of this element within its parent element's
1344 | // list of child tokens. If this element has no parent, then the function
1345 | // returns -1.
1346 | func (e *Element) Index() int {
1347 | return e.index
1348 | }
1349 |
1350 | // WriteTo serializes the element to the writer w.
1351 | func (e *Element) WriteTo(w Writer, s *WriteSettings) {
1352 | w.WriteByte('<')
1353 | w.WriteString(e.FullTag())
1354 | for _, a := range e.Attr {
1355 | w.WriteByte(' ')
1356 | a.WriteTo(w, s)
1357 | }
1358 | if len(e.Child) > 0 {
1359 | w.WriteByte('>')
1360 | for _, c := range e.Child {
1361 | c.WriteTo(w, s)
1362 | }
1363 | w.Write([]byte{'<', '/'})
1364 | w.WriteString(e.FullTag())
1365 | w.WriteByte('>')
1366 | } else {
1367 | if s.CanonicalEndTags {
1368 | w.Write([]byte{'>', '<', '/'})
1369 | w.WriteString(e.FullTag())
1370 | w.WriteByte('>')
1371 | } else {
1372 | w.Write([]byte{'/', '>'})
1373 | }
1374 | }
1375 | }
1376 |
1377 | // setParent replaces this element token's parent.
1378 | func (e *Element) setParent(parent *Element) {
1379 | e.parent = parent
1380 | }
1381 |
1382 | // setIndex sets this element token's index within its parent's Child slice.
1383 | func (e *Element) setIndex(index int) {
1384 | e.index = index
1385 | }
1386 |
1387 | // addChild adds a child token to the element e.
1388 | func (e *Element) addChild(t Token) {
1389 | t.setParent(e)
1390 | t.setIndex(len(e.Child))
1391 | e.Child = append(e.Child, t)
1392 | }
1393 |
1394 | // CreateAttr creates an attribute with the specified 'key' and 'value' and
1395 | // adds it to this element. If an attribute with same key already exists on
1396 | // this element, then its value is replaced. The key may include a namespace
1397 | // prefix followed by a colon.
1398 | func (e *Element) CreateAttr(key, value string) *Attr {
1399 | space, skey := spaceDecompose(key)
1400 |
1401 | for i, a := range e.Attr {
1402 | if space == a.Space && skey == a.Key {
1403 | e.Attr[i].Value = value
1404 | return &e.Attr[i]
1405 | }
1406 | }
1407 |
1408 | i := e.addAttr(space, skey, value)
1409 | return &e.Attr[i]
1410 | }
1411 |
1412 | // addAttr is a helper function that adds an attribute to an element. Returns
1413 | // the index of the added attribute.
1414 | func (e *Element) addAttr(space, key, value string) int {
1415 | a := Attr{
1416 | Space: space,
1417 | Key: key,
1418 | Value: value,
1419 | element: e,
1420 | }
1421 | e.Attr = append(e.Attr, a)
1422 | return len(e.Attr) - 1
1423 | }
1424 |
1425 | // RemoveAttr removes the first attribute of this element whose key matches
1426 | // 'key'. It returns a copy of the removed attribute if a match is found. If
1427 | // no match is found, it returns nil. The key may include a namespace prefix
1428 | // followed by a colon.
1429 | func (e *Element) RemoveAttr(key string) *Attr {
1430 | space, skey := spaceDecompose(key)
1431 | for i, a := range e.Attr {
1432 | if space == a.Space && skey == a.Key {
1433 | e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...)
1434 | return &Attr{
1435 | Space: a.Space,
1436 | Key: a.Key,
1437 | Value: a.Value,
1438 | element: nil,
1439 | }
1440 | }
1441 | }
1442 | return nil
1443 | }
1444 |
1445 | // SortAttrs sorts this element's attributes lexicographically by key.
1446 | func (e *Element) SortAttrs() {
1447 | slices.SortFunc(e.Attr, func(a, b Attr) int {
1448 | if v := strings.Compare(a.Space, b.Space); v != 0 {
1449 | return v
1450 | }
1451 | return strings.Compare(a.Key, b.Key)
1452 | })
1453 | }
1454 |
1455 | // FullKey returns this attribute's complete key, including namespace prefix
1456 | // if present.
1457 | func (a *Attr) FullKey() string {
1458 | if a.Space == "" {
1459 | return a.Key
1460 | }
1461 | return a.Space + ":" + a.Key
1462 | }
1463 |
1464 | // Element returns a pointer to the element containing this attribute.
1465 | func (a *Attr) Element() *Element {
1466 | return a.element
1467 | }
1468 |
1469 | // NamespaceURI returns the XML namespace URI associated with this attribute.
1470 | // The function returns the empty string if the attribute is unprefixed or
1471 | // if the attribute is part of the XML default namespace.
1472 | func (a *Attr) NamespaceURI() string {
1473 | if a.Space == "" {
1474 | return ""
1475 | }
1476 | return a.element.findLocalNamespaceURI(a.Space)
1477 | }
1478 |
1479 | // WriteTo serializes the attribute to the writer.
1480 | func (a *Attr) WriteTo(w Writer, s *WriteSettings) {
1481 | w.WriteString(a.FullKey())
1482 | if s.AttrSingleQuote {
1483 | w.WriteString(`='`)
1484 | } else {
1485 | w.WriteString(`="`)
1486 | }
1487 | var m escapeMode
1488 | if s.CanonicalAttrVal && !s.AttrSingleQuote {
1489 | m = escapeCanonicalAttr
1490 | } else {
1491 | m = escapeNormal
1492 | }
1493 | escapeString(w, a.Value, m)
1494 | if s.AttrSingleQuote {
1495 | w.WriteByte('\'')
1496 | } else {
1497 | w.WriteByte('"')
1498 | }
1499 | }
1500 |
1501 | // NewText creates an unparented CharData token containing simple text data.
1502 | func NewText(text string) *CharData {
1503 | return newCharData(text, 0, nil)
1504 | }
1505 |
1506 | // NewCData creates an unparented XML character CDATA section with 'data' as
1507 | // its content.
1508 | func NewCData(data string) *CharData {
1509 | return newCharData(data, cdataFlag, nil)
1510 | }
1511 |
1512 | // NewCharData creates an unparented CharData token containing simple text
1513 | // data.
1514 | //
1515 | // Deprecated: NewCharData is deprecated. Instead, use NewText, which does the
1516 | // same thing.
1517 | func NewCharData(data string) *CharData {
1518 | return newCharData(data, 0, nil)
1519 | }
1520 |
1521 | // newCharData creates a character data token and binds it to a parent
1522 | // element. If parent is nil, the CharData token remains unbound.
1523 | func newCharData(data string, flags charDataFlags, parent *Element) *CharData {
1524 | c := &CharData{
1525 | Data: data,
1526 | parent: nil,
1527 | index: -1,
1528 | flags: flags,
1529 | }
1530 | if parent != nil {
1531 | parent.addChild(c)
1532 | }
1533 | return c
1534 | }
1535 |
1536 | // CreateText creates a CharData token containing simple text data and adds it
1537 | // to the end of this element's list of child tokens.
1538 | func (e *Element) CreateText(text string) *CharData {
1539 | return newCharData(text, 0, e)
1540 | }
1541 |
1542 | // CreateCData creates a CharData token containing a CDATA section with 'data'
1543 | // as its content and adds it to the end of this element's list of child
1544 | // tokens.
1545 | func (e *Element) CreateCData(data string) *CharData {
1546 | return newCharData(data, cdataFlag, e)
1547 | }
1548 |
1549 | // CreateCharData creates a CharData token containing simple text data and
1550 | // adds it to the end of this element's list of child tokens.
1551 | //
1552 | // Deprecated: CreateCharData is deprecated. Instead, use CreateText, which
1553 | // does the same thing.
1554 | func (e *Element) CreateCharData(data string) *CharData {
1555 | return e.CreateText(data)
1556 | }
1557 |
1558 | // SetData modifies the content of the CharData token. In the case of a
1559 | // CharData token containing simple text, the simple text is modified. In the
1560 | // case of a CharData token containing a CDATA section, the CDATA section's
1561 | // content is modified.
1562 | func (c *CharData) SetData(text string) {
1563 | c.Data = text
1564 | if isWhitespace(text) {
1565 | c.flags |= whitespaceFlag
1566 | } else {
1567 | c.flags &= ^whitespaceFlag
1568 | }
1569 | }
1570 |
1571 | // IsCData returns true if this CharData token is contains a CDATA section. It
1572 | // returns false if the CharData token contains simple text.
1573 | func (c *CharData) IsCData() bool {
1574 | return (c.flags & cdataFlag) != 0
1575 | }
1576 |
1577 | // IsWhitespace returns true if this CharData token contains only whitespace.
1578 | func (c *CharData) IsWhitespace() bool {
1579 | return (c.flags & whitespaceFlag) != 0
1580 | }
1581 |
1582 | // Parent returns this CharData token's parent element, or nil if it has no
1583 | // parent.
1584 | func (c *CharData) Parent() *Element {
1585 | return c.parent
1586 | }
1587 |
1588 | // Index returns the index of this CharData token within its parent element's
1589 | // list of child tokens. If this CharData token has no parent, then the
1590 | // function returns -1.
1591 | func (c *CharData) Index() int {
1592 | return c.index
1593 | }
1594 |
1595 | // WriteTo serializes character data to the writer.
1596 | func (c *CharData) WriteTo(w Writer, s *WriteSettings) {
1597 | if c.IsCData() {
1598 | w.WriteString(``)
1601 | } else {
1602 | var m escapeMode
1603 | if s.CanonicalText {
1604 | m = escapeCanonicalText
1605 | } else {
1606 | m = escapeNormal
1607 | }
1608 | escapeString(w, c.Data, m)
1609 | }
1610 | }
1611 |
1612 | // dup duplicates the character data.
1613 | func (c *CharData) dup(parent *Element) Token {
1614 | return &CharData{
1615 | Data: c.Data,
1616 | flags: c.flags,
1617 | parent: parent,
1618 | index: c.index,
1619 | }
1620 | }
1621 |
1622 | // setParent replaces the character data token's parent.
1623 | func (c *CharData) setParent(parent *Element) {
1624 | c.parent = parent
1625 | }
1626 |
1627 | // setIndex sets the CharData token's index within its parent element's Child
1628 | // slice.
1629 | func (c *CharData) setIndex(index int) {
1630 | c.index = index
1631 | }
1632 |
1633 | // NewComment creates an unparented comment token.
1634 | func NewComment(comment string) *Comment {
1635 | return newComment(comment, nil)
1636 | }
1637 |
1638 | // NewComment creates a comment token and sets its parent element to 'parent'.
1639 | func newComment(comment string, parent *Element) *Comment {
1640 | c := &Comment{
1641 | Data: comment,
1642 | parent: nil,
1643 | index: -1,
1644 | }
1645 | if parent != nil {
1646 | parent.addChild(c)
1647 | }
1648 | return c
1649 | }
1650 |
1651 | // CreateComment creates a comment token using the specified 'comment' string
1652 | // and adds it as the last child token of this element.
1653 | func (e *Element) CreateComment(comment string) *Comment {
1654 | return newComment(comment, e)
1655 | }
1656 |
1657 | // dup duplicates the comment.
1658 | func (c *Comment) dup(parent *Element) Token {
1659 | return &Comment{
1660 | Data: c.Data,
1661 | parent: parent,
1662 | index: c.index,
1663 | }
1664 | }
1665 |
1666 | // Parent returns comment token's parent element, or nil if it has no parent.
1667 | func (c *Comment) Parent() *Element {
1668 | return c.parent
1669 | }
1670 |
1671 | // Index returns the index of this Comment token within its parent element's
1672 | // list of child tokens. If this Comment token has no parent, then the
1673 | // function returns -1.
1674 | func (c *Comment) Index() int {
1675 | return c.index
1676 | }
1677 |
1678 | // WriteTo serialies the comment to the writer.
1679 | func (c *Comment) WriteTo(w Writer, s *WriteSettings) {
1680 | w.WriteString("")
1683 | }
1684 |
1685 | // setParent replaces the comment token's parent.
1686 | func (c *Comment) setParent(parent *Element) {
1687 | c.parent = parent
1688 | }
1689 |
1690 | // setIndex sets the Comment token's index within its parent element's Child
1691 | // slice.
1692 | func (c *Comment) setIndex(index int) {
1693 | c.index = index
1694 | }
1695 |
1696 | // NewDirective creates an unparented XML directive token.
1697 | func NewDirective(data string) *Directive {
1698 | return newDirective(data, nil)
1699 | }
1700 |
1701 | // newDirective creates an XML directive and binds it to a parent element. If
1702 | // parent is nil, the Directive remains unbound.
1703 | func newDirective(data string, parent *Element) *Directive {
1704 | d := &Directive{
1705 | Data: data,
1706 | parent: nil,
1707 | index: -1,
1708 | }
1709 | if parent != nil {
1710 | parent.addChild(d)
1711 | }
1712 | return d
1713 | }
1714 |
1715 | // CreateDirective creates an XML directive token with the specified 'data'
1716 | // value and adds it as the last child token of this element.
1717 | func (e *Element) CreateDirective(data string) *Directive {
1718 | return newDirective(data, e)
1719 | }
1720 |
1721 | // dup duplicates the directive.
1722 | func (d *Directive) dup(parent *Element) Token {
1723 | return &Directive{
1724 | Data: d.Data,
1725 | parent: parent,
1726 | index: d.index,
1727 | }
1728 | }
1729 |
1730 | // Parent returns directive token's parent element, or nil if it has no
1731 | // parent.
1732 | func (d *Directive) Parent() *Element {
1733 | return d.parent
1734 | }
1735 |
1736 | // Index returns the index of this Directive token within its parent element's
1737 | // list of child tokens. If this Directive token has no parent, then the
1738 | // function returns -1.
1739 | func (d *Directive) Index() int {
1740 | return d.index
1741 | }
1742 |
1743 | // WriteTo serializes the XML directive to the writer.
1744 | func (d *Directive) WriteTo(w Writer, s *WriteSettings) {
1745 | w.WriteString("")
1748 | }
1749 |
1750 | // setParent replaces the directive token's parent.
1751 | func (d *Directive) setParent(parent *Element) {
1752 | d.parent = parent
1753 | }
1754 |
1755 | // setIndex sets the Directive token's index within its parent element's Child
1756 | // slice.
1757 | func (d *Directive) setIndex(index int) {
1758 | d.index = index
1759 | }
1760 |
1761 | // NewProcInst creates an unparented XML processing instruction.
1762 | func NewProcInst(target, inst string) *ProcInst {
1763 | return newProcInst(target, inst, nil)
1764 | }
1765 |
1766 | // newProcInst creates an XML processing instruction and binds it to a parent
1767 | // element. If parent is nil, the ProcInst remains unbound.
1768 | func newProcInst(target, inst string, parent *Element) *ProcInst {
1769 | p := &ProcInst{
1770 | Target: target,
1771 | Inst: inst,
1772 | parent: nil,
1773 | index: -1,
1774 | }
1775 | if parent != nil {
1776 | parent.addChild(p)
1777 | }
1778 | return p
1779 | }
1780 |
1781 | // CreateProcInst creates an XML processing instruction token with the
1782 | // specified 'target' and instruction 'inst'. It is then added as the last
1783 | // child token of this element.
1784 | func (e *Element) CreateProcInst(target, inst string) *ProcInst {
1785 | return newProcInst(target, inst, e)
1786 | }
1787 |
1788 | // dup duplicates the procinst.
1789 | func (p *ProcInst) dup(parent *Element) Token {
1790 | return &ProcInst{
1791 | Target: p.Target,
1792 | Inst: p.Inst,
1793 | parent: parent,
1794 | index: p.index,
1795 | }
1796 | }
1797 |
1798 | // Parent returns processing instruction token's parent element, or nil if it
1799 | // has no parent.
1800 | func (p *ProcInst) Parent() *Element {
1801 | return p.parent
1802 | }
1803 |
1804 | // Index returns the index of this ProcInst token within its parent element's
1805 | // list of child tokens. If this ProcInst token has no parent, then the
1806 | // function returns -1.
1807 | func (p *ProcInst) Index() int {
1808 | return p.index
1809 | }
1810 |
1811 | // WriteTo serializes the processing instruction to the writer.
1812 | func (p *ProcInst) WriteTo(w Writer, s *WriteSettings) {
1813 | w.WriteString("")
1814 | w.WriteString(p.Target)
1815 | if p.Inst != "" {
1816 | w.WriteByte(' ')
1817 | w.WriteString(p.Inst)
1818 | }
1819 | w.WriteString("?>")
1820 | }
1821 |
1822 | // setParent replaces the processing instruction token's parent.
1823 | func (p *ProcInst) setParent(parent *Element) {
1824 | p.parent = parent
1825 | }
1826 |
1827 | // setIndex sets the processing instruction token's index within its parent
1828 | // element's Child slice.
1829 | func (p *ProcInst) setIndex(index int) {
1830 | p.index = index
1831 | }
1832 |
--------------------------------------------------------------------------------
/etree_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Brett Vickers.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 |
5 | package etree
6 |
7 | import (
8 | "bytes"
9 | "encoding/xml"
10 | "errors"
11 | "io"
12 | "io/fs"
13 | "math/rand"
14 | "os"
15 | "path"
16 | "strings"
17 | "testing"
18 | )
19 |
20 | func newDocumentFromString(t *testing.T, s string) *Document {
21 | return newDocumentFromString2(t, s, ReadSettings{})
22 | }
23 |
24 | func newDocumentFromString2(t *testing.T, s string, settings ReadSettings) *Document {
25 | t.Helper()
26 | doc := NewDocument()
27 | doc.ReadSettings = settings
28 | err := doc.ReadFromString(s)
29 | if err != nil {
30 | t.Fatal("etree: failed to parse document")
31 | }
32 | return doc
33 | }
34 |
35 | func checkStrEq(t *testing.T, got, want string) {
36 | t.Helper()
37 | if got != want {
38 | t.Errorf("etree: unexpected result.\nGot:\n%s\nWanted:\n%s\n", got, want)
39 | }
40 | }
41 |
42 | func checkStrBinaryEq(t *testing.T, got, want string) {
43 | t.Helper()
44 | if got != want {
45 | t.Errorf("etree: unexpected result.\nGot:\n%v\nWanted:\n%v\n", []byte(got), []byte(want))
46 | }
47 | }
48 |
49 | func checkIntEq(t *testing.T, got, want int) {
50 | t.Helper()
51 | if got != want {
52 | t.Errorf("etree: unexpected integer. Got: %d. Wanted: %d\n", got, want)
53 | }
54 | }
55 |
56 | func checkBoolEq(t *testing.T, got, want bool) {
57 | t.Helper()
58 | if got != want {
59 | t.Errorf("etree: unexpected boolean. Got: %v. Wanted: %v\n", got, want)
60 | }
61 | }
62 |
63 | func checkElementEq(t *testing.T, got, want *Element) {
64 | t.Helper()
65 | if got != want {
66 | t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want)
67 | }
68 | }
69 |
70 | func checkDocEq(t *testing.T, doc *Document, expected string) {
71 | t.Helper()
72 | doc.Indent(NoIndent)
73 | s, err := doc.WriteToString()
74 | if err != nil {
75 | t.Error("etree: failed to serialize document")
76 | }
77 | if s != expected {
78 | t.Errorf("etree: unexpected document.\nGot:\n%s\nWanted:\n%s\n", s, expected)
79 | }
80 | }
81 |
82 | func checkIndexes(t *testing.T, e *Element) {
83 | t.Helper()
84 | for i := 0; i < len(e.Child); i++ {
85 | c := e.Child[i]
86 | if c.Index() != i {
87 | t.Errorf("Child index mismatch. Got %d, expected %d.", c.Index(), i)
88 | }
89 | if ce, ok := c.(*Element); ok {
90 | checkIndexes(t, ce)
91 | }
92 | }
93 | }
94 |
95 | func TestDocument(t *testing.T) {
96 | // Create a document
97 | doc := NewDocument()
98 | doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
99 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
100 | store := doc.CreateElement("store")
101 | store.CreateAttr("xmlns:t", "urn:books-com:titles")
102 | store.CreateDirective("Directive")
103 | store.CreateComment("This is a comment")
104 | book := store.CreateElement("book")
105 | book.CreateAttr("lang", "fr")
106 | book.CreateAttr("lang", "en")
107 | title := book.CreateElement("t:title")
108 | title.SetText("Nicholas Nickleby")
109 | title.SetText("Great Expectations")
110 | author := book.CreateElement("author")
111 | author.CreateCharData("Charles Dickens")
112 | review := book.CreateElement("review")
113 | review.CreateCData("<<< Will be replaced")
114 | review.SetCData(">>> Excellent book")
115 | doc.IndentTabs()
116 |
117 | checkIndexes(t, &doc.Element)
118 |
119 | // Serialize the document to a string
120 | s, err := doc.WriteToString()
121 | if err != nil {
122 | t.Error("etree: failed to serialize document")
123 | }
124 |
125 | // Make sure the serialized XML matches expectation.
126 | expected := `
127 |
128 |
129 |
130 |
131 |
132 | Great Expectations
133 | Charles Dickens
134 | >> Excellent book]]>
135 |
136 |
137 | `
138 | checkStrEq(t, s, expected)
139 |
140 | // Test the structure of the XML
141 | if doc.Root() != store {
142 | t.Error("etree: root mismatch")
143 | }
144 | if len(store.ChildElements()) != 1 || len(store.Child) != 7 {
145 | t.Error("etree: incorrect tree structure")
146 | }
147 | if len(book.ChildElements()) != 3 || len(book.Attr) != 1 || len(book.Child) != 7 {
148 | t.Error("etree: incorrect tree structure")
149 | }
150 | if len(title.ChildElements()) != 0 || len(title.Child) != 1 || len(title.Attr) != 0 {
151 | t.Error("etree: incorrect tree structure")
152 | }
153 | if len(author.ChildElements()) != 0 || len(author.Child) != 1 || len(author.Attr) != 0 {
154 | t.Error("etree: incorrect tree structure")
155 | }
156 | if len(review.ChildElements()) != 0 || len(review.Child) != 1 || len(review.Attr) != 0 {
157 | t.Error("etree: incorrect tree structure")
158 | }
159 | if book.parent != store || store.parent != &doc.Element || doc.parent != nil {
160 | t.Error("etree: incorrect tree structure")
161 | }
162 | if title.parent != book || author.parent != book {
163 | t.Error("etree: incorrect tree structure")
164 | }
165 |
166 | // Perform some basic queries on the document
167 | elements := doc.SelectElements("store")
168 | if len(elements) != 1 || elements[0] != store {
169 | t.Error("etree: incorrect SelectElements result")
170 | }
171 | element := doc.SelectElement("store")
172 | if element != store {
173 | t.Error("etree: incorrect SelectElement result")
174 | }
175 | elements = store.SelectElements("book")
176 | if len(elements) != 1 || elements[0] != book {
177 | t.Error("etree: incorrect SelectElements result")
178 | }
179 | element = store.SelectElement("book")
180 | if element != book {
181 | t.Error("etree: incorrect SelectElement result")
182 | }
183 | attr := book.SelectAttr("lang")
184 | if attr == nil || attr.Key != "lang" || attr.Value != "en" {
185 | t.Error("etree: incorrect SelectAttr result")
186 | }
187 | if book.SelectAttrValue("lang", "unknown") != "en" {
188 | t.Error("etree: incorrect SelectAttrValue result")
189 | }
190 | if book.SelectAttrValue("t:missing", "unknown") != "unknown" {
191 | t.Error("etree: incorrect SelectAttrValue result")
192 | }
193 | attr = book.RemoveAttr("lang")
194 | if attr.Value != "en" {
195 | t.Error("etree: incorrect RemoveAttr result")
196 | }
197 | book.CreateAttr("lang", "de")
198 | attr = book.RemoveAttr("lang")
199 | if attr.Value != "de" {
200 | t.Error("etree: incorrect RemoveAttr result")
201 | }
202 | element = book.SelectElement("t:title")
203 | if element != title || element.Text() != "Great Expectations" || len(element.Attr) != 0 {
204 | t.Error("etree: incorrect SelectElement result")
205 | }
206 | element = book.SelectElement("title")
207 | if element != title {
208 | t.Error("etree: incorrect SelectElement result")
209 | }
210 | element = book.SelectElement("p:title")
211 | if element != nil {
212 | t.Error("etree: incorrect SelectElement result")
213 | }
214 | element = book.RemoveChildAt(title.Index()).(*Element)
215 | if element != title {
216 | t.Error("etree: incorrect RemoveElement result")
217 | }
218 | element = book.SelectElement("title")
219 | if element != nil {
220 | t.Error("etree: incorrect SelectElement result")
221 | }
222 | element = book.SelectElement("review")
223 | if element != review || element.Text() != ">>> Excellent book" || len(element.Attr) != 0 {
224 | t.Error("etree: incorrect SelectElement result")
225 | }
226 | }
227 |
228 | func TestImbalancedXML(t *testing.T) {
229 | cases := []string{
230 | ``,
231 | ``,
232 | ``,
233 | ``,
234 | ``,
235 | `malformed`,
236 | `malformed`,
237 | ``,
238 | ``,
239 | ``,
240 | ``,
241 | }
242 | for _, c := range cases {
243 | doc := NewDocument()
244 | err := doc.ReadFromString(c)
245 | if err == nil {
246 | t.Errorf("etree: imbalanced XML should have failed:\n%s", c)
247 | }
248 | }
249 | }
250 |
251 | func TestDocumentCharsetReader(t *testing.T) {
252 | s := `
253 |
254 |
255 | Great Expectations
256 | Charles Dickens
257 |
258 | `
259 |
260 | doc := newDocumentFromString2(t, s, ReadSettings{
261 | CharsetReader: func(label string, input io.Reader) (io.Reader, error) {
262 | if label == "lowercase" {
263 | return &lowercaseCharsetReader{input}, nil
264 | }
265 | return nil, errors.New("unknown charset")
266 | },
267 | })
268 |
269 | cases := []struct {
270 | path string
271 | text string
272 | }{
273 | {"/store/book/title", "great expectations"},
274 | {"/store/book/author", "charles dickens"},
275 | }
276 | for _, c := range cases {
277 | e := doc.FindElement(c.path)
278 | if e == nil {
279 | t.Errorf("etree: failed to find element '%s'", c.path)
280 | } else if e.Text() != c.text {
281 | t.Errorf("etree: expected path '%s' to contain '%s', got '%s'", c.path, c.text, e.Text())
282 | }
283 | }
284 | }
285 |
286 | type lowercaseCharsetReader struct {
287 | r io.Reader
288 | }
289 |
290 | func (c *lowercaseCharsetReader) Read(p []byte) (n int, err error) {
291 | n, err = c.r.Read(p)
292 | if err != nil {
293 | return n, err
294 | }
295 | for i := 0; i < n; i++ {
296 | if p[i] >= 'A' && p[i] <= 'Z' {
297 | p[i] = p[i] - 'A' + 'a'
298 | }
299 | }
300 | return n, nil
301 | }
302 |
303 | func TestDocumentReadPermissive(t *testing.T) {
304 | s := ""
305 |
306 | doc := NewDocument()
307 | err := doc.ReadFromString(s)
308 | if err == nil {
309 | t.Fatal("etree: incorrect ReadFromString result")
310 | }
311 |
312 | doc.ReadSettings.Permissive = true
313 | err = doc.ReadFromString(s)
314 | if err != nil {
315 | t.Fatal("etree: incorrect ReadFromString result")
316 | }
317 | }
318 |
319 | func TestEmbeddedComment(t *testing.T) {
320 | s := `123456`
321 |
322 | doc := NewDocument()
323 | err := doc.ReadFromString(s)
324 | if err != nil {
325 | t.Fatal("etree: incorrect ReadFromString result")
326 | }
327 |
328 | a := doc.SelectElement("a")
329 | checkStrEq(t, a.Text(), "123456")
330 | }
331 |
332 | func TestDocumentReadHTMLEntities(t *testing.T) {
333 | s := `
334 |
335 | → Great Expectations
336 | Charles Dickens
337 |
338 | `
339 |
340 | doc := NewDocument()
341 | err := doc.ReadFromString(s)
342 | if err == nil {
343 | t.Fatal("etree: incorrect ReadFromString result")
344 | }
345 |
346 | doc.ReadSettings.Entity = xml.HTMLEntity
347 | err = doc.ReadFromString(s)
348 | if err != nil {
349 | t.Fatal("etree: incorrect ReadFromString result")
350 | }
351 | }
352 |
353 | func TestDocumentReadHTMLAutoClose(t *testing.T) {
354 | cases := []struct {
355 | name string
356 | input string
357 | want string
358 | }{
359 | {"empty", ``, ``},
360 | {"oneSelfClosing", `
`, `
`},
361 | {"twoSelfClosingAndText", `
some text
`, `
some text
`},
362 | {
363 | name: "largerExample",
364 | input: `
365 |
366 | Author: Charles Dickens
367 | Book: Great Expectations
`,
368 | want: `
369 |
370 | Author: Charles Dickens
371 | Book: Great Expectations
`},
372 | }
373 |
374 | for _, c := range cases {
375 | t.Run(c.name, func(t *testing.T) {
376 | doc := NewDocument()
377 | doc.ReadSettings.Permissive = true
378 | doc.ReadSettings.AutoClose = xml.HTMLAutoClose
379 | err := doc.ReadFromString(c.input)
380 | if err != nil {
381 | t.Fatal("etree: ReadFromString() error = ", err)
382 | }
383 | s, err := doc.WriteToString()
384 | if err != nil {
385 | t.Fatal("etree: WriteToString() error = ", err)
386 | }
387 | checkStrEq(t, s, c.want)
388 | })
389 | }
390 | }
391 |
392 | func TestEscapeCodes(t *testing.T) {
393 | cases := []struct {
394 | input string
395 | normal string
396 | attrCanonical string
397 | textCanonical string
398 | }{
399 | {
400 | "&<>'\"\t\n\r",
401 | "&<>'"\t\n\r",
402 | "'"
\">&<>'"\t\n\r",
403 | "&<>'\"\t\n
",
404 | },
405 | {
406 | "\x00\x1f\x08\x09\x0a\x0d",
407 | "���\t\n\r",
408 | "���\t\n\r",
409 | "���\t\n
",
410 | },
411 | }
412 | for _, c := range cases {
413 | doc := NewDocument()
414 |
415 | e := doc.CreateElement("e")
416 | e.SetText(c.input)
417 | e.CreateAttr("a", c.input)
418 |
419 | doc.WriteSettings.CanonicalText = false
420 | doc.WriteSettings.CanonicalAttrVal = false
421 | s, err := doc.WriteToString()
422 | if err != nil {
423 | t.Error("etree: Escape test produced inocrrect result.")
424 | }
425 | checkStrEq(t, s, c.normal)
426 |
427 | doc.WriteSettings.CanonicalText = false
428 | doc.WriteSettings.CanonicalAttrVal = true
429 | s, err = doc.WriteToString()
430 | if err != nil {
431 | t.Error("etree: Escape test produced inocrrect result.")
432 | }
433 | checkStrEq(t, s, c.attrCanonical)
434 |
435 | doc.WriteSettings.CanonicalText = true
436 | doc.WriteSettings.CanonicalAttrVal = false
437 | s, err = doc.WriteToString()
438 | if err != nil {
439 | t.Error("etree: Escape test produced inocrrect result.")
440 | }
441 | checkStrEq(t, s, c.textCanonical)
442 | }
443 | }
444 |
445 | func TestCanonical(t *testing.T) {
446 | BOM := "\xef\xbb\xbf"
447 |
448 | doc := NewDocument()
449 | doc.WriteSettings.CanonicalEndTags = true
450 | doc.WriteSettings.CanonicalText = true
451 | doc.WriteSettings.CanonicalAttrVal = true
452 | doc.CreateCharData(BOM)
453 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
454 |
455 | people := doc.CreateElement("People")
456 | people.CreateComment("These are all known people")
457 |
458 | jon := people.CreateElement("Person")
459 | jon.CreateAttr("name", "Jon O'Reilly")
460 | jon.SetText("\r<'\">&\u0004\u0005\u001f�")
461 |
462 | sally := people.CreateElement("Person")
463 | sally.CreateAttr("name", "Sally")
464 | sally.CreateAttr("escape", "\r\n\t<'\">&")
465 |
466 | doc.Indent(2)
467 | s, err := doc.WriteToString()
468 | if err != nil {
469 | t.Error("etree: WriteSettings WriteTo produced incorrect result.")
470 | }
471 |
472 | expected := BOM + `
473 |
474 |
475 |
<'">&����
476 |
477 |
478 | `
479 | checkStrEq(t, s, expected)
480 | }
481 |
482 | func TestCopy(t *testing.T) {
483 | s := `
484 |
485 | Great Expectations
486 | Charles Dickens
487 |
488 | `
489 |
490 | doc := newDocumentFromString(t, s)
491 |
492 | s1, err := doc.WriteToString()
493 | if err != nil {
494 | t.Error("etree: incorrect WriteToString result")
495 | }
496 |
497 | doc2 := doc.Copy()
498 | checkIndexes(t, &doc2.Element)
499 | s2, err := doc2.WriteToString()
500 | if err != nil {
501 | t.Error("etree: incorrect Copy result")
502 | }
503 |
504 | if s1 != s2 {
505 | t.Error("etree: mismatched Copy result")
506 | t.Error("wanted:\n" + s1)
507 | t.Error("got:\n" + s2)
508 | }
509 |
510 | e1 := doc.FindElement("./store/book/title")
511 | e2 := doc2.FindElement("./store/book/title")
512 | if e1 == nil || e2 == nil || e1.parent == nil || e1 == e2 {
513 | t.Error("etree: incorrect FindElement result")
514 | }
515 |
516 | e1.parent.RemoveChildAt(e1.Index())
517 | s1, _ = doc.WriteToString()
518 | s2, _ = doc2.WriteToString()
519 | if s1 == s2 {
520 | t.Error("etree: incorrect result after RemoveElement")
521 | }
522 | }
523 |
524 | func TestGetPath(t *testing.T) {
525 | s := `
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 | `
538 |
539 | doc := newDocumentFromString(t, s)
540 |
541 | cases := []struct {
542 | from string
543 | to string
544 | relpath string
545 | topath string
546 | }{
547 | {"a", ".", "..", "/"},
548 | {".", "a", "./a", "/a"},
549 | {"a/b1/c1/d1", ".", "../../../..", "/"},
550 | {".", "a/b1/c1/d1", "./a/b1/c1/d1", "/a/b1/c1/d1"},
551 | {"a", "a", ".", "/a"},
552 | {"a/b1", "a/b1/c1", "./c1", "/a/b1/c1"},
553 | {"a/b1/c1", "a/b1", "..", "/a/b1"},
554 | {"a/b1/c1", "a/b1/c1", ".", "/a/b1/c1"},
555 | {"a", "a/b1", "./b1", "/a/b1"},
556 | {"a/b1", "a", "..", "/a"},
557 | {"a", "a/b1/c1", "./b1/c1", "/a/b1/c1"},
558 | {"a/b1/c1", "a", "../..", "/a"},
559 | {"a/b1/c1/d1", "a", "../../..", "/a"},
560 | {"a", "a/b1/c1/d1", "./b1/c1/d1", "/a/b1/c1/d1"},
561 | {"a/b1", "a/b2", "../b2", "/a/b2"},
562 | {"a/b2", "a/b1", "../b1", "/a/b1"},
563 | {"a/b1/c1/d1", "a/b2/c2/d2", "../../../b2/c2/d2", "/a/b2/c2/d2"},
564 | {"a/b2/c2/d2", "a/b1/c1/d1", "../../../b1/c1/d1", "/a/b1/c1/d1"},
565 | {"a/b1/c1/d1", "a/b1/c1/d1a", "../d1a", "/a/b1/c1/d1a"},
566 | }
567 |
568 | for _, c := range cases {
569 | fe := doc.FindElement(c.from)
570 | te := doc.FindElement(c.to)
571 |
572 | rp := te.GetRelativePath(fe)
573 | if rp != c.relpath {
574 | t.Errorf("GetRelativePath from '%s' to '%s'. Expected '%s', got '%s'.\n", c.from, c.to, c.relpath, rp)
575 | }
576 |
577 | p := te.GetPath()
578 | if p != c.topath {
579 | t.Errorf("GetPath for '%s'. Expected '%s', got '%s'.\n", c.to, c.topath, p)
580 | }
581 | }
582 | }
583 |
584 | func TestInsertChild(t *testing.T) {
585 | s := `
586 | Great Expectations
587 | Charles Dickens
588 |
589 | `
590 |
591 | doc := newDocumentFromString(t, s)
592 |
593 | year := NewElement("year")
594 | year.SetText("1861")
595 |
596 | book := doc.FindElement("//book")
597 | book.InsertChildAt(book.SelectElement("t:title").Index(), year)
598 |
599 | expected1 := `
600 | 1861
601 | Great Expectations
602 | Charles Dickens
603 |
604 | `
605 | doc.Indent(2)
606 | s1, _ := doc.WriteToString()
607 | checkStrEq(t, s1, expected1)
608 |
609 | book.RemoveChildAt(year.Index())
610 | book.InsertChildAt(book.SelectElement("author").Index(), year)
611 |
612 | expected2 := `
613 | Great Expectations
614 | 1861
615 | Charles Dickens
616 |
617 | `
618 | doc.Indent(2)
619 | s2, _ := doc.WriteToString()
620 | checkStrEq(t, s2, expected2)
621 |
622 | book.RemoveChildAt(year.Index())
623 | book.InsertChildAt(len(book.Child), year)
624 |
625 | expected3 := `
626 | Great Expectations
627 | Charles Dickens
628 | 1861
629 |
630 | `
631 | doc.Indent(2)
632 | s3, _ := doc.WriteToString()
633 | checkStrEq(t, s3, expected3)
634 |
635 | book.RemoveChildAt(year.Index())
636 | book.InsertChildAt(999, year)
637 |
638 | expected4 := `
639 | Great Expectations
640 | Charles Dickens
641 | 1861
642 |
643 | `
644 | doc.Indent(2)
645 | s4, _ := doc.WriteToString()
646 | checkStrEq(t, s4, expected4)
647 |
648 | year = doc.FindElement("//book/year")
649 | book.InsertChildAt(0, year)
650 |
651 | expected5 := `
652 | 1861
653 | Great Expectations
654 | Charles Dickens
655 |
656 | `
657 |
658 | doc.Indent(2)
659 | s5, _ := doc.WriteToString()
660 | checkStrEq(t, s5, expected5)
661 |
662 | author := doc.FindElement("//book/author")
663 | year = doc.FindElement("//book/year")
664 | book.InsertChildAt(author.Index(), year)
665 |
666 | expected6 := `
667 | Great Expectations
668 | 1861
669 | Charles Dickens
670 |
671 | `
672 | doc.Indent(2)
673 | s6, _ := doc.WriteToString()
674 | checkStrEq(t, s6, expected6)
675 | }
676 |
677 | func TestCdata(t *testing.T) {
678 | var tests = []struct {
679 | in, out string
680 | }{
681 | {`1234567`, "1234567"},
682 | {``, "1234567"},
683 | {`1357`, "1234567"},
684 | {`13457`, "123"},
685 | {`1457`, "1"},
686 | {`457`, "1"},
687 | }
688 |
689 | for _, test := range tests {
690 | doc := NewDocument()
691 | err := doc.ReadFromString(test.in)
692 | if err != nil {
693 | t.Fatal("etree ReadFromString: " + err.Error())
694 | }
695 |
696 | tag := doc.FindElement("tag")
697 | if tag.Text() != test.out {
698 | t.Fatalf("etree invalid cdata. Expected: %v. Got: %v\n", test.out, tag.Text())
699 | }
700 | }
701 | }
702 |
703 | func TestAddChild(t *testing.T) {
704 | s := `
705 | Great Expectations
706 | Charles Dickens
707 |
708 | `
709 | doc1 := newDocumentFromString(t, s)
710 |
711 | doc2 := NewDocument()
712 | root := doc2.CreateElement("root")
713 |
714 | for _, e := range doc1.FindElements("//book/*") {
715 | root.AddChild(e)
716 | }
717 |
718 | expected1 := `
719 | `
720 | doc1.Indent(2)
721 | s1, _ := doc1.WriteToString()
722 | checkStrEq(t, s1, expected1)
723 |
724 | expected2 := `
725 | Great Expectations
726 | Charles Dickens
727 |
728 | `
729 | doc2.Indent(2)
730 | s2, _ := doc2.WriteToString()
731 | checkStrEq(t, s2, expected2)
732 | }
733 |
734 | func TestSetRoot(t *testing.T) {
735 | s := `
736 |
737 | Great Expectations
738 | Charles Dickens
739 |
740 | `
741 | doc := newDocumentFromString(t, s)
742 |
743 | origroot := doc.Root()
744 | if origroot.Parent() != &doc.Element {
745 | t.Error("Root incorrect")
746 | }
747 |
748 | newroot := NewElement("root")
749 | doc.SetRoot(newroot)
750 |
751 | if doc.Root() != newroot {
752 | t.Error("doc.Root() != newroot")
753 | }
754 | if origroot.Parent() != nil {
755 | t.Error("origroot.Parent() != nil")
756 | }
757 |
758 | expected1 := `
759 |
760 | `
761 | doc.Indent(2)
762 | s1, _ := doc.WriteToString()
763 | checkStrEq(t, s1, expected1)
764 |
765 | doc.SetRoot(origroot)
766 | doc.Indent(2)
767 | expected2 := s
768 | s2, _ := doc.WriteToString()
769 | checkStrEq(t, s2, expected2)
770 |
771 | doc2 := NewDocument()
772 | doc2.CreateProcInst("test", `a="wow"`)
773 | doc2.SetRoot(NewElement("root"))
774 | doc2.Indent(2)
775 | expected3 := expected1
776 | s3, _ := doc2.WriteToString()
777 | checkStrEq(t, s3, expected3)
778 |
779 | doc2.SetRoot(doc.Root())
780 | doc2.Indent(2)
781 | expected4 := s
782 | s4, _ := doc2.WriteToString()
783 | checkStrEq(t, s4, expected4)
784 |
785 | expected5 := `
786 | `
787 | doc.Indent(2)
788 | s5, _ := doc.WriteToString()
789 | checkStrEq(t, s5, expected5)
790 | }
791 |
792 | func TestSortAttrs(t *testing.T) {
793 | s := ``
794 | doc := newDocumentFromString(t, s)
795 | doc.Root().SortAttrs()
796 | doc.Indent(2)
797 | out, _ := doc.WriteToString()
798 | checkStrEq(t, out, ``+"\n")
799 | }
800 |
801 | func TestCharsetReaderDefaultSetting(t *testing.T) {
802 | // Test encodings where the default pass-through charset conversion
803 | // should work for common single-byte character encodings.
804 | cases := []string{
805 | ``,
806 | ``,
807 | ``,
808 | ``,
809 | ``,
810 | }
811 |
812 | for _, c := range cases {
813 | doc := NewDocument()
814 | if err := doc.ReadFromBytes([]byte(c)); err != nil {
815 | t.Error(err)
816 | }
817 | }
818 | }
819 |
820 | func TestCharData(t *testing.T) {
821 | doc := NewDocument()
822 | root := doc.CreateElement("root")
823 | root.CreateCharData("This ")
824 | root.CreateCData("is ")
825 | e1 := NewText("a ")
826 | e2 := NewCData("text ")
827 | root.AddChild(e1)
828 | root.AddChild(e2)
829 | root.CreateCharData("Element!!")
830 |
831 | s, err := doc.WriteToString()
832 | if err != nil {
833 | t.Error("etree: failed to serialize document")
834 | }
835 |
836 | checkStrEq(t, s, `This a Element!!`)
837 |
838 | // Check we can parse the output
839 | err = doc.ReadFromString(s)
840 | if err != nil {
841 | t.Fatal("etree: incorrect ReadFromString result")
842 | }
843 | if doc.Root().Text() != "This is a text Element!!" {
844 | t.Error("etree: invalid text")
845 | }
846 | }
847 |
848 | func TestIndentSimple(t *testing.T) {
849 | doc := NewDocument()
850 | root := doc.CreateElement("root")
851 | ch1 := root.CreateElement("child1")
852 | ch1.CreateElement("child2")
853 |
854 | // First test Unindent.
855 | doc.Unindent()
856 | s, err := doc.WriteToString()
857 | if err != nil {
858 | t.Error("etree: failed to serialize document")
859 | }
860 | expected := ""
861 | checkStrEq(t, s, expected)
862 |
863 | // Now test Indent with NoIndent (which should produce the same result
864 | // as Unindent).
865 | doc.Indent(NoIndent)
866 | s, err = doc.WriteToString()
867 | if err != nil {
868 | t.Error("etree: failed to serialize document")
869 | }
870 | checkStrEq(t, s, expected)
871 |
872 | // Run all indent test cases.
873 | tests := []struct {
874 | useTabs, useCRLF bool
875 | ws, nl string
876 | }{
877 | {false, false, " ", "\n"},
878 | {false, true, " ", "\r\n"},
879 | {true, false, "\t", "\n"},
880 | {true, true, "\t", "\r\n"},
881 | }
882 |
883 | for _, test := range tests {
884 | doc.WriteSettings.UseCRLF = test.useCRLF
885 | if test.useTabs {
886 | doc.IndentTabs()
887 | s, err := doc.WriteToString()
888 | if err != nil {
889 | t.Error("etree: failed to serialize document")
890 | }
891 | tab := test.ws
892 | expected := "" + test.nl + tab + "" + test.nl +
893 | tab + tab + "" + test.nl + tab +
894 | "" + test.nl + "" + test.nl
895 | checkStrEq(t, s, expected)
896 | } else {
897 | for i := 0; i < 256; i++ {
898 | doc.Indent(i)
899 | s, err := doc.WriteToString()
900 | if err != nil {
901 | t.Error("etree: failed to serialize document")
902 | }
903 | tab := strings.Repeat(test.ws, i)
904 | expected := "" + test.nl + tab + "" + test.nl +
905 | tab + tab + "" + test.nl + tab +
906 | "" + test.nl + "" + test.nl
907 | checkStrEq(t, s, expected)
908 | }
909 | }
910 | }
911 | }
912 |
913 | func TestIndentWithDefaultSettings(t *testing.T) {
914 | input := `
915 |
916 |
917 |
918 | `
919 |
920 | doc := NewDocument()
921 | err := doc.ReadFromString(input)
922 | if err != nil {
923 | t.Error("etree: failed to read string")
924 | }
925 |
926 | settings := NewIndentSettings()
927 | doc.IndentWithSettings(settings)
928 | s, err := doc.WriteToString()
929 | if err != nil {
930 | t.Error("etree: failed to serialize document")
931 | }
932 | expected := "\n \n \n \n\n"
933 | checkStrEq(t, s, expected)
934 | }
935 |
936 | func TestIndentWithSettings(t *testing.T) {
937 | doc := NewDocument()
938 | root := doc.CreateElement("root")
939 | ch1 := root.CreateElement("child1")
940 | ch1.CreateElement("child2")
941 |
942 | // First test with NoIndent.
943 | settings := NewIndentSettings()
944 | settings.UseCRLF = false
945 | settings.UseTabs = false
946 | settings.Spaces = NoIndent
947 | doc.IndentWithSettings(settings)
948 | s, err := doc.WriteToString()
949 | if err != nil {
950 | t.Error("etree: failed to serialize document")
951 | }
952 | expected := ""
953 | checkStrEq(t, s, expected)
954 |
955 | // Run all indent test cases.
956 | tests := []struct {
957 | useTabs, useCRLF bool
958 | ws, nl string
959 | }{
960 | {false, false, " ", "\n"},
961 | {false, true, " ", "\r\n"},
962 | {true, false, "\t", "\n"},
963 | {true, true, "\t", "\r\n"},
964 | }
965 |
966 | for _, test := range tests {
967 | if test.useTabs {
968 | settings := NewIndentSettings()
969 | settings.UseTabs = true
970 | settings.UseCRLF = test.useCRLF
971 | doc.IndentWithSettings(settings)
972 | s, err := doc.WriteToString()
973 | if err != nil {
974 | t.Error("etree: failed to serialize document")
975 | }
976 | tab := test.ws
977 | expected := "" + test.nl + tab + "" + test.nl +
978 | tab + tab + "" + test.nl + tab +
979 | "" + test.nl + "" + test.nl
980 | checkStrEq(t, s, expected)
981 | } else {
982 | for i := 0; i < 256; i++ {
983 | settings := NewIndentSettings()
984 | settings.Spaces = i
985 | settings.UseTabs = false
986 | settings.UseCRLF = test.useCRLF
987 | doc.IndentWithSettings(settings)
988 | s, err := doc.WriteToString()
989 | if err != nil {
990 | t.Error("etree: failed to serialize document")
991 | }
992 | tab := strings.Repeat(test.ws, i)
993 | expected := "" + test.nl + tab + "" + test.nl +
994 | tab + tab + "" + test.nl + tab +
995 | "" + test.nl + "" + test.nl
996 | checkStrEq(t, s, expected)
997 | }
998 | }
999 | }
1000 | }
1001 |
1002 | func TestIndentPreserveWhitespace(t *testing.T) {
1003 | tests := []struct {
1004 | input string
1005 | expected string
1006 | }{
1007 | {"", ""},
1008 | {" ", " "},
1009 | {"\t", "\t"},
1010 | {"\t\n \t", "\t\n \t"},
1011 | {"", " "},
1012 | {" ", ""},
1013 | {" ", "\n \n"},
1014 | }
1015 |
1016 | for _, test := range tests {
1017 | doc := NewDocument()
1018 | err := doc.ReadFromString(test.input)
1019 | if err != nil {
1020 | t.Error("etree: failed to read string")
1021 | }
1022 |
1023 | s := NewIndentSettings()
1024 | s.Spaces = 2
1025 | s.PreserveLeafWhitespace = true
1026 | s.SuppressTrailingWhitespace = true
1027 | doc.IndentWithSettings(s)
1028 |
1029 | output, err := doc.WriteToString()
1030 | if err != nil {
1031 | t.Error("etree: failed to read string")
1032 | }
1033 | checkStrEq(t, output, test.expected)
1034 | }
1035 | }
1036 |
1037 | func TestPreserveCData(t *testing.T) {
1038 | tests := []struct {
1039 | input string
1040 | expectedWithPreserve string
1041 | expectedWithoutPreserve string
1042 | }{
1043 | {
1044 | "",
1045 | "",
1046 | "x",
1047 | },
1048 | {
1049 | "foo]]>",
1050 | "foo]]>",
1051 | "x <b>foo</b>",
1052 | },
1053 | {
1054 | " name ",
1055 | " name ",
1056 | "My name is",
1057 | },
1058 | }
1059 |
1060 | for _, test := range tests {
1061 | doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: true})
1062 | output, _ := doc.WriteToString()
1063 | checkStrEq(t, output, test.expectedWithPreserve)
1064 | }
1065 |
1066 | for _, test := range tests {
1067 | doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: false})
1068 | output, _ := doc.WriteToString()
1069 | checkStrEq(t, output, test.expectedWithoutPreserve)
1070 | }
1071 | }
1072 |
1073 | func TestTokenIndexing(t *testing.T) {
1074 | s := `
1075 |
1076 |
1077 |
1078 |
1079 |
1080 | Great Expectations
1081 | Charles Dickens
1082 |
1083 |
1084 | `
1085 |
1086 | doc := newDocumentFromString(t, s)
1087 | review := doc.FindElement("/store/book/review")
1088 | review.SetText("Excellent")
1089 |
1090 | checkIndexes(t, &doc.Element)
1091 |
1092 | doc.Indent(4)
1093 | checkIndexes(t, &doc.Element)
1094 |
1095 | doc.Indent(NoIndent)
1096 | checkIndexes(t, &doc.Element)
1097 |
1098 | e := NewElement("foo")
1099 | store := doc.SelectElement("store")
1100 | store.InsertChildAt(0, e)
1101 | checkIndexes(t, &doc.Element)
1102 |
1103 | store.RemoveChildAt(0)
1104 | checkIndexes(t, &doc.Element)
1105 | }
1106 |
1107 | func TestSetText(t *testing.T) {
1108 | doc := NewDocument()
1109 | root := doc.CreateElement("root")
1110 |
1111 | checkDocEq(t, doc, ``)
1112 | checkStrEq(t, root.Text(), "")
1113 | checkIntEq(t, len(root.Child), 0)
1114 |
1115 | root.SetText("foo")
1116 | checkDocEq(t, doc, `foo`)
1117 | checkStrEq(t, root.Text(), "foo")
1118 | checkIntEq(t, len(root.Child), 1)
1119 |
1120 | root.SetText("bar")
1121 | checkDocEq(t, doc, `bar`)
1122 | checkStrEq(t, root.Text(), "bar")
1123 | checkIntEq(t, len(root.Child), 1)
1124 |
1125 | root.CreateCData("cdata")
1126 | checkDocEq(t, doc, `bar`)
1127 | checkStrEq(t, root.Text(), "barcdata")
1128 | checkIntEq(t, len(root.Child), 2)
1129 |
1130 | root.SetText("qux")
1131 | checkDocEq(t, doc, `qux`)
1132 | checkStrEq(t, root.Text(), "qux")
1133 | checkIntEq(t, len(root.Child), 1)
1134 |
1135 | root.CreateCData("cdata")
1136 | checkDocEq(t, doc, `qux`)
1137 | checkStrEq(t, root.Text(), "quxcdata")
1138 | checkIntEq(t, len(root.Child), 2)
1139 |
1140 | root.SetCData("baz")
1141 | checkDocEq(t, doc, ``)
1142 | checkStrEq(t, root.Text(), "baz")
1143 | checkIntEq(t, len(root.Child), 1)
1144 |
1145 | root.CreateText("corge")
1146 | root.CreateCData("grault")
1147 | root.CreateText("waldo")
1148 | root.CreateCData("fred")
1149 | root.CreateElement("child")
1150 | checkDocEq(t, doc, `corgewaldo`)
1151 | checkStrEq(t, root.Text(), "bazcorgegraultwaldofred")
1152 | checkIntEq(t, len(root.Child), 6)
1153 |
1154 | root.SetText("plugh")
1155 | checkDocEq(t, doc, `plugh`)
1156 | checkStrEq(t, root.Text(), "plugh")
1157 | checkIntEq(t, len(root.Child), 2)
1158 |
1159 | root.SetText("")
1160 | checkDocEq(t, doc, ``)
1161 | checkStrEq(t, root.Text(), "")
1162 | checkIntEq(t, len(root.Child), 1)
1163 |
1164 | root.SetText("")
1165 | checkDocEq(t, doc, ``)
1166 | checkStrEq(t, root.Text(), "")
1167 | checkIntEq(t, len(root.Child), 1)
1168 |
1169 | root.RemoveChildAt(0)
1170 | root.CreateText("corge")
1171 | root.CreateCData("grault")
1172 | root.CreateText("waldo")
1173 | root.CreateCData("fred")
1174 | root.CreateElement("child")
1175 | checkDocEq(t, doc, `corgewaldo`)
1176 | checkStrEq(t, root.Text(), "corgegraultwaldofred")
1177 | checkIntEq(t, len(root.Child), 5)
1178 |
1179 | root.SetText("")
1180 | checkDocEq(t, doc, ``)
1181 | checkStrEq(t, root.Text(), "")
1182 | checkIntEq(t, len(root.Child), 1)
1183 | }
1184 |
1185 | func TestSetTail(t *testing.T) {
1186 | doc := NewDocument()
1187 | root := doc.CreateElement("root")
1188 | child := root.CreateElement("child")
1189 | root.CreateText("\n\t")
1190 | child.SetText("foo")
1191 | checkDocEq(t, doc, "foo\n\t")
1192 | checkStrEq(t, child.Tail(), "\n\t")
1193 | checkIntEq(t, len(root.Child), 2)
1194 | checkIntEq(t, len(child.Child), 1)
1195 |
1196 | root.CreateCData(" ")
1197 | checkDocEq(t, doc, "foo\n\t")
1198 | checkStrEq(t, child.Tail(), "\n\t ")
1199 | checkIntEq(t, len(root.Child), 3)
1200 | checkIntEq(t, len(child.Child), 1)
1201 |
1202 | child.SetTail("")
1203 | checkDocEq(t, doc, "foo")
1204 | checkStrEq(t, child.Tail(), "")
1205 | checkIntEq(t, len(root.Child), 1)
1206 | checkIntEq(t, len(child.Child), 1)
1207 |
1208 | child.SetTail("\t\t\t")
1209 | checkDocEq(t, doc, "foo\t\t\t")
1210 | checkStrEq(t, child.Tail(), "\t\t\t")
1211 | checkIntEq(t, len(root.Child), 2)
1212 | checkIntEq(t, len(child.Child), 1)
1213 |
1214 | child.SetTail("\t\n\n\t")
1215 | checkDocEq(t, doc, "foo\t\n\n\t")
1216 | checkStrEq(t, child.Tail(), "\t\n\n\t")
1217 | checkIntEq(t, len(root.Child), 2)
1218 | checkIntEq(t, len(child.Child), 1)
1219 |
1220 | child.SetTail("")
1221 | checkDocEq(t, doc, "foo")
1222 | checkStrEq(t, child.Tail(), "")
1223 | checkIntEq(t, len(root.Child), 1)
1224 | checkIntEq(t, len(child.Child), 1)
1225 | }
1226 |
1227 | func TestAttrParent(t *testing.T) {
1228 | doc := NewDocument()
1229 | root := doc.CreateElement("root")
1230 | attr1 := root.CreateAttr("bar", "1")
1231 | attr2 := root.CreateAttr("qux", "2")
1232 |
1233 | checkIntEq(t, len(root.Attr), 2)
1234 | checkElementEq(t, attr1.Element(), root)
1235 | checkElementEq(t, attr2.Element(), root)
1236 |
1237 | attr1 = root.RemoveAttr("bar")
1238 | attr2 = root.RemoveAttr("qux")
1239 | checkElementEq(t, attr1.Element(), nil)
1240 | checkElementEq(t, attr2.Element(), nil)
1241 |
1242 | s := ``
1243 | err := doc.ReadFromString(s)
1244 | if err != nil {
1245 | t.Error("etree: failed to parse document")
1246 | }
1247 |
1248 | root = doc.SelectElement("root")
1249 | for i := range root.Attr {
1250 | checkElementEq(t, root.Attr[i].Element(), root)
1251 | }
1252 | }
1253 |
1254 | func TestDefaultNamespaceURI(t *testing.T) {
1255 | s := `
1256 |
1257 |
1258 |
1259 |
1260 |
1261 |
1262 |
1263 |
1264 |
1265 | `
1266 |
1267 | doc := newDocumentFromString(t, s)
1268 | root := doc.SelectElement("root")
1269 | child1 := root.SelectElement("child1")
1270 | child2 := root.SelectElement("child2")
1271 | grandchild1 := child1.SelectElement("grandchild1")
1272 | grandchild2 := child1.SelectElement("grandchild2")
1273 | greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
1274 |
1275 | checkStrEq(t, doc.NamespaceURI(), "")
1276 | checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
1277 | checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
1278 | checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
1279 | checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
1280 | checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
1281 | checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://child.example.com")
1282 |
1283 | checkStrEq(t, root.Attr[0].NamespaceURI(), "")
1284 | checkStrEq(t, root.Attr[1].NamespaceURI(), "")
1285 | checkStrEq(t, root.Attr[2].NamespaceURI(), "https://attrib.example.com")
1286 | checkStrEq(t, root.Attr[3].NamespaceURI(), "")
1287 | checkStrEq(t, child1.Attr[0].NamespaceURI(), "")
1288 | checkStrEq(t, child1.Attr[1].NamespaceURI(), "https://attrib.example.com")
1289 | checkStrEq(t, child2.Attr[0].NamespaceURI(), "")
1290 | checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "")
1291 | checkStrEq(t, grandchild1.Attr[1].NamespaceURI(), "")
1292 | checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "")
1293 | checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "https://attrib.example.com")
1294 |
1295 | f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
1296 | if len(f) != 2 || f[0] != root || f[1] != child2 {
1297 | t.Error("etree: failed namespace-uri test")
1298 | }
1299 |
1300 | f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
1301 | if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 {
1302 | t.Error("etree: failed namespace-uri test")
1303 | }
1304 |
1305 | f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
1306 | if len(f) != 1 || f[0] != grandchild1 {
1307 | t.Error("etree: failed namespace-uri test")
1308 | }
1309 |
1310 | f = doc.FindElements("//*[namespace-uri()='']")
1311 | if len(f) != 0 {
1312 | t.Error("etree: failed namespace-uri test")
1313 | }
1314 |
1315 | f = doc.FindElements("//*[namespace-uri()='foo']")
1316 | if len(f) != 0 {
1317 | t.Error("etree: failed namespace-uri test")
1318 | }
1319 | }
1320 |
1321 | func TestLocalNamespaceURI(t *testing.T) {
1322 | s := `
1323 |
1324 |
1325 |
1326 |
1327 |
1328 |
1329 |
1330 |
1331 |
1332 |
1333 |
1334 |
1335 |
1336 | `
1337 |
1338 | doc := newDocumentFromString(t, s)
1339 | root := doc.SelectElement("root")
1340 | child1 := root.SelectElement("child1")
1341 | child2 := root.SelectElement("child2")
1342 | child3 := root.SelectElement("child3")
1343 | grandchild1 := child1.SelectElement("grandchild1")
1344 | grandchild2 := child1.SelectElement("grandchild2")
1345 | grandchild3 := child1.SelectElement("grandchild3")
1346 | grandchild4 := child1.SelectElement("grandchild4")
1347 | greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
1348 |
1349 | checkStrEq(t, doc.NamespaceURI(), "")
1350 | checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
1351 | checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
1352 | checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
1353 | checkStrEq(t, child3.NamespaceURI(), "")
1354 | checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
1355 | checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
1356 | checkStrEq(t, grandchild3.NamespaceURI(), "https://root.example.com")
1357 | checkStrEq(t, grandchild4.NamespaceURI(), "")
1358 | checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://root.example.com")
1359 |
1360 | f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
1361 | if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 {
1362 | t.Error("etree: failed namespace-uri test")
1363 | }
1364 |
1365 | f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
1366 | if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 {
1367 | t.Error("etree: failed namespace-uri test")
1368 | }
1369 |
1370 | f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
1371 | if len(f) != 1 || f[0] != grandchild1 {
1372 | t.Error("etree: failed namespace-uri test")
1373 | }
1374 |
1375 | f = doc.FindElements("//*[namespace-uri()='']")
1376 | if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 {
1377 | t.Error("etree: failed namespace-uri test")
1378 | }
1379 |
1380 | f = doc.FindElements("//*[namespace-uri()='foo']")
1381 | if len(f) != 0 {
1382 | t.Error("etree: failed namespace-uri test")
1383 | }
1384 | }
1385 |
1386 | func TestWhitespace(t *testing.T) {
1387 | s := "\n\t\n\t\t x\n \n"
1388 |
1389 | doc := newDocumentFromString(t, s)
1390 | root := doc.Root()
1391 | checkIntEq(t, len(root.Child), 3)
1392 |
1393 | cd := root.Child[0].(*CharData)
1394 | checkBoolEq(t, cd.IsWhitespace(), true)
1395 | checkStrBinaryEq(t, cd.Data, "\n\t")
1396 |
1397 | cd = root.Child[2].(*CharData)
1398 | checkBoolEq(t, cd.IsWhitespace(), true)
1399 | checkStrBinaryEq(t, cd.Data, "\n")
1400 |
1401 | child := root.SelectElement("child")
1402 | checkIntEq(t, len(child.Child), 3)
1403 |
1404 | cd = child.Child[0].(*CharData)
1405 | checkBoolEq(t, cd.IsWhitespace(), true)
1406 | checkStrBinaryEq(t, cd.Data, "\n\t\t")
1407 |
1408 | cd = child.Child[2].(*CharData)
1409 | checkBoolEq(t, cd.IsWhitespace(), true)
1410 | checkStrBinaryEq(t, cd.Data, "\n ")
1411 |
1412 | grandchild := child.SelectElement("grandchild")
1413 | checkIntEq(t, len(grandchild.Child), 1)
1414 |
1415 | cd = grandchild.Child[0].(*CharData)
1416 | checkBoolEq(t, cd.IsWhitespace(), false)
1417 |
1418 | cd.SetData(" ")
1419 | checkBoolEq(t, cd.IsWhitespace(), true)
1420 |
1421 | cd.SetData(" x")
1422 | checkBoolEq(t, cd.IsWhitespace(), false)
1423 |
1424 | cd.SetData("\t\n\r ")
1425 | checkBoolEq(t, cd.IsWhitespace(), true)
1426 |
1427 | cd.SetData("\uFFFD")
1428 | checkBoolEq(t, cd.IsWhitespace(), false)
1429 |
1430 | cd.SetData("")
1431 | checkBoolEq(t, cd.IsWhitespace(), true)
1432 | }
1433 |
1434 | func TestTokenWriteTo(t *testing.T) {
1435 | s := `
1436 |
1437 |
1438 | Great Expectations
1439 |
1440 | `
1441 | doc := newDocumentFromString(t, s)
1442 |
1443 | writeSettings := WriteSettings{}
1444 | indentSettings := IndentSettings{UseTabs: true}
1445 |
1446 | tests := []struct {
1447 | path string
1448 | expected string
1449 | }{
1450 | {"//store", "\n\t\n\t\n\t\tGreat Expectations\n\t\n"},
1451 | {"//store/book", "\n\tGreat Expectations\n"},
1452 | {"//store/book/title", "Great Expectations"},
1453 | }
1454 | for _, test := range tests {
1455 | var buffer bytes.Buffer
1456 |
1457 | c := doc.FindElement(test.path)
1458 | c.IndentWithSettings(&indentSettings)
1459 | c.WriteTo(&buffer, &writeSettings)
1460 | checkStrEq(t, buffer.String(), test.expected)
1461 | }
1462 | }
1463 |
1464 | func TestReindexChildren(t *testing.T) {
1465 | s := `
1466 |
1467 |
1468 |
1469 |
1470 |
1471 | `
1472 | doc := newDocumentFromString(t, s)
1473 | doc.Unindent()
1474 |
1475 | root := doc.Root()
1476 | if root == nil || root.Tag != "root" || len(root.Child) != 5 {
1477 | t.Error("etree: expected root element not found")
1478 | }
1479 |
1480 | for i := 0; i < len(root.Child); i++ {
1481 | if root.Child[i].Index() != i {
1482 | t.Error("etree: incorrect child index found in root element child")
1483 | }
1484 | }
1485 |
1486 | rand.Shuffle(len(root.Child), func(i, j int) {
1487 | root.Child[i], root.Child[j] = root.Child[j], root.Child[i]
1488 | })
1489 |
1490 | root.ReindexChildren()
1491 |
1492 | for i := 0; i < len(root.Child); i++ {
1493 | if root.Child[i].Index() != i {
1494 | t.Error("etree: incorrect child index found in root element child")
1495 | }
1496 | }
1497 | }
1498 |
1499 | func TestPreserveDuplicateAttrs(t *testing.T) {
1500 | s := ``
1501 |
1502 | checkAttrCount := func(e *Element, n int) {
1503 | if len(e.Attr) != n {
1504 | t.Errorf("etree: expected %d attributes, got %d", n, len(e.Attr))
1505 | }
1506 | }
1507 | checkAttr := func(e *Element, i int, key, value string) {
1508 | if i >= len(e.Attr) {
1509 | t.Errorf("etree: attr[%d] out of bounds", i)
1510 | return
1511 | }
1512 | if e.Attr[i].Key != key {
1513 | t.Errorf("etree: attr[%d] expected key %s, got %s", i, key, e.Attr[i].Key)
1514 | }
1515 | if e.Attr[i].Value != value {
1516 | t.Errorf("etree: attr[%d] expected value %s, got %s", i, value, e.Attr[i].Value)
1517 | }
1518 | }
1519 |
1520 | t.Run("enabled", func(t *testing.T) {
1521 | doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: true})
1522 | e := doc.FindElement("element")
1523 | checkAttrCount(e, 5)
1524 | checkAttr(e, 0, "x", "value1")
1525 | checkAttr(e, 1, "y", "value2")
1526 | checkAttr(e, 2, "x", "value3")
1527 | checkAttr(e, 3, "x", "value4")
1528 | checkAttr(e, 4, "y", "value5")
1529 | })
1530 |
1531 | t.Run("disabled", func(t *testing.T) {
1532 | doc := newDocumentFromString2(t, s, ReadSettings{})
1533 | e := doc.FindElement("element")
1534 | checkAttrCount(e, 2)
1535 | checkAttr(e, 0, "x", "value4")
1536 | checkAttr(e, 1, "y", "value5")
1537 | })
1538 | }
1539 |
1540 | func TestNotNil(t *testing.T) {
1541 | s := `true`
1542 |
1543 | doc := newDocumentFromString(t, s)
1544 | doc.SelectElement("enabled").NotNil().SetText("false")
1545 | doc.SelectElement("visible").NotNil().SetText("true")
1546 |
1547 | want := `false`
1548 | got, err := doc.WriteToString()
1549 | if err != nil {
1550 | t.Fatal("etree: failed to write document to string")
1551 | }
1552 | if got != want {
1553 | t.Error("etree: unexpected NotNil result")
1554 | t.Error("wanted:\n" + want)
1555 | t.Error("got:\n" + got)
1556 | }
1557 | }
1558 |
1559 | func TestValidateInput(t *testing.T) {
1560 | tests := []struct {
1561 | s string
1562 | err string
1563 | }{
1564 | {`x`, ""},
1565 | {``, ""},
1566 | {`x`, `XML syntax error on line 1: unexpected EOF`},
1567 | {``, `XML syntax error on line 1: unexpected end element `},
1568 | {`<>`, `XML syntax error on line 1: expected element name after <`},
1569 | {`xtrailing`, "etree: invalid XML format"},
1570 | {`x<`, "etree: invalid XML format"},
1571 | {`x`, `XML syntax error on line 1: element closed by `},
1572 | }
1573 |
1574 | type readFunc func(doc *Document, s string) error
1575 | runTests := func(t *testing.T, read readFunc) {
1576 | for i, test := range tests {
1577 | doc := NewDocument()
1578 | doc.ReadSettings.ValidateInput = true
1579 | err := read(doc, test.s)
1580 | if err == nil {
1581 | if test.err != "" {
1582 | t.Errorf("etree: test #%d:\nExpected error:\n %s\nReceived error:\n nil", i, test.err)
1583 | }
1584 | root := doc.Root()
1585 | if root == nil || root.Tag != "root" {
1586 | t.Errorf("etree: test #%d: failed to read document after input validation", i)
1587 | }
1588 | } else {
1589 | te := err.Error()
1590 | if te != test.err {
1591 | t.Errorf("etree: test #%d:\nExpected error;\n %s\nReceived error:\n %s", i, test.err, te)
1592 | }
1593 | }
1594 | }
1595 | }
1596 |
1597 | readFromString := func(doc *Document, s string) error {
1598 | return doc.ReadFromString(s)
1599 | }
1600 | t.Run("ReadFromString", func(t *testing.T) { runTests(t, readFromString) })
1601 |
1602 | readFromBytes := func(doc *Document, s string) error {
1603 | return doc.ReadFromBytes([]byte(s))
1604 | }
1605 | t.Run("ReadFromBytes", func(t *testing.T) { runTests(t, readFromBytes) })
1606 |
1607 | readFromFile := func(doc *Document, s string) error {
1608 | pathtmp := path.Join(t.TempDir(), "etree-test")
1609 | err := os.WriteFile(pathtmp, []byte(s), fs.ModePerm)
1610 | if err != nil {
1611 | return errors.New("unable to write tmp file for input validation")
1612 | }
1613 | return doc.ReadFromFile(pathtmp)
1614 | }
1615 | t.Run("ReadFromFile", func(t *testing.T) { runTests(t, readFromFile) })
1616 | }
1617 |
1618 | func TestSiblingElement(t *testing.T) {
1619 | doc := newDocumentFromString(t, ` `)
1620 |
1621 | root := doc.SelectElement("root")
1622 | a := root.SelectElement("a")
1623 | b := root.SelectElement("b")
1624 | c := root.SelectElement("c")
1625 | b1 := b.SelectElement("b1")
1626 |
1627 | tests := []struct {
1628 | e *Element
1629 | next *Element
1630 | prev *Element
1631 | }{
1632 | {root, nil, nil},
1633 | {a, b, nil},
1634 | {b, c, a},
1635 | {c, nil, b},
1636 | {b1, nil, nil},
1637 | }
1638 |
1639 | toString := func(e *Element) string {
1640 | if e == nil {
1641 | return "nil"
1642 | }
1643 | return e.Tag
1644 | }
1645 |
1646 | for i, test := range tests {
1647 | next := test.e.NextSibling()
1648 | if next != test.next {
1649 | t.Errorf("etree: test #%d unexpected NextSibling result.\n Expected: %s\n Received: %s\n",
1650 | i, toString(next), toString(test.next))
1651 | }
1652 |
1653 | prev := test.e.PrevSibling()
1654 | if prev != test.prev {
1655 | t.Errorf("etree: test #%d unexpected PrevSibling result.\n Expected: %s\n Received: %s\n",
1656 | i, toString(prev), toString(test.prev))
1657 | }
1658 | }
1659 | }
1660 |
1661 | func TestContinuations(t *testing.T) {
1662 | doc := NewDocument()
1663 | root := doc.CreateChild("root", func(e *Element) {
1664 | e.CreateChild("child1", func(e *Element) {
1665 | e.CreateComment("Grandchildren of child #1")
1666 | e.CreateChild("grandchild1", func(e *Element) {
1667 | e.CreateAttr("attr1", "1")
1668 | e.CreateAttr("attr2", "2")
1669 | })
1670 | e.CreateChild("grandchild2", func(e *Element) {
1671 | e.CreateAttr("attr1", "3")
1672 | e.CreateAttr("attr2", "4")
1673 | })
1674 | })
1675 | e.CreateChild("child2", func(e *Element) {
1676 | e.CreateComment("Grandchildren of child #2")
1677 | e.CreateChild("grandchild1", func(e *Element) {
1678 | e.CreateAttr("attr1", "5")
1679 | e.CreateAttr("attr2", "6")
1680 | })
1681 | e.CreateChild("grandchild2", func(e *Element) {
1682 | e.CreateAttr("attr1", "7")
1683 | e.CreateAttr("attr2", "8")
1684 | })
1685 | })
1686 | })
1687 | checkStrEq(t, root.Tag, "root")
1688 |
1689 | // Serialize the document to a string
1690 | doc.IndentTabs()
1691 | s, err := doc.WriteToString()
1692 | if err != nil {
1693 | t.Error("etree: failed to serialize document")
1694 | }
1695 |
1696 | // Make sure the serialized XML matches expectation.
1697 | expected := `
1698 |
1699 |
1700 |
1701 |
1702 |
1703 |
1704 |
1705 |
1706 |
1707 |
1708 |
1709 | `
1710 |
1711 | checkStrEq(t, s, expected)
1712 | }
1713 |
--------------------------------------------------------------------------------
/example_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Brett Vickers.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 |
5 | package etree
6 |
7 | import "os"
8 |
9 | // Create an etree Document, add XML entities to it, and serialize it
10 | // to stdout.
11 | func ExampleDocument_creating() {
12 | doc := NewDocument()
13 | doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
14 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
15 |
16 | people := doc.CreateElement("People")
17 | people.CreateComment("These are all known people")
18 |
19 | jon := people.CreateElement("Person")
20 | jon.CreateAttr("name", "Jon O'Reilly")
21 |
22 | sally := people.CreateElement("Person")
23 | sally.CreateAttr("name", "Sally")
24 |
25 | doc.Indent(2)
26 | doc.WriteTo(os.Stdout)
27 | // Output:
28 | //
29 | //
30 | //
31 | //
32 | //
33 | //
34 | //
35 | }
36 |
37 | func ExampleDocument_reading() {
38 | doc := NewDocument()
39 | if err := doc.ReadFromFile("document.xml"); err != nil {
40 | panic(err)
41 | }
42 | }
43 |
44 | func ExamplePath() {
45 | xml := `
46 |
47 |
48 | Great Expectations
49 | Charles Dickens
50 |
51 |
52 | Ulysses
53 | James Joyce
54 |
55 | `
56 |
57 | doc := NewDocument()
58 | doc.ReadFromString(xml)
59 | for _, e := range doc.FindElements(".//book[author='Charles Dickens']") {
60 | doc := NewDocumentWithRoot(e.Copy())
61 | doc.Indent(2)
62 | doc.WriteTo(os.Stdout)
63 | }
64 | // Output:
65 | //
66 | // Great Expectations
67 | // Charles Dickens
68 | //
69 | }
70 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/beevik/etree
2 |
3 | go 1.21.0
4 |
--------------------------------------------------------------------------------
/helpers.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Brett Vickers.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 |
5 | package etree
6 |
7 | import (
8 | "io"
9 | "strings"
10 | "unicode/utf8"
11 | )
12 |
13 | type stack[E any] struct {
14 | data []E
15 | }
16 |
17 | func (s *stack[E]) empty() bool {
18 | return len(s.data) == 0
19 | }
20 |
21 | func (s *stack[E]) push(value E) {
22 | s.data = append(s.data, value)
23 | }
24 |
25 | func (s *stack[E]) pop() E {
26 | value := s.data[len(s.data)-1]
27 | var empty E
28 | s.data[len(s.data)-1] = empty
29 | s.data = s.data[:len(s.data)-1]
30 | return value
31 | }
32 |
33 | func (s *stack[E]) peek() E {
34 | return s.data[len(s.data)-1]
35 | }
36 |
37 | type queue[E any] struct {
38 | data []E
39 | head, tail int
40 | }
41 |
42 | func (f *queue[E]) add(value E) {
43 | if f.len()+1 >= len(f.data) {
44 | f.grow()
45 | }
46 | f.data[f.tail] = value
47 | if f.tail++; f.tail == len(f.data) {
48 | f.tail = 0
49 | }
50 | }
51 |
52 | func (f *queue[E]) remove() E {
53 | value := f.data[f.head]
54 | var empty E
55 | f.data[f.head] = empty
56 | if f.head++; f.head == len(f.data) {
57 | f.head = 0
58 | }
59 | return value
60 | }
61 |
62 | func (f *queue[E]) len() int {
63 | if f.tail >= f.head {
64 | return f.tail - f.head
65 | }
66 | return len(f.data) - f.head + f.tail
67 | }
68 |
69 | func (f *queue[E]) grow() {
70 | c := len(f.data) * 2
71 | if c == 0 {
72 | c = 4
73 | }
74 | buf, count := make([]E, c), f.len()
75 | if f.tail >= f.head {
76 | copy(buf[:count], f.data[f.head:f.tail])
77 | } else {
78 | hindex := len(f.data) - f.head
79 | copy(buf[:hindex], f.data[f.head:])
80 | copy(buf[hindex:count], f.data[:f.tail])
81 | }
82 | f.data, f.head, f.tail = buf, 0, count
83 | }
84 |
85 | // xmlReader provides the interface by which an XML byte stream is
86 | // processed and decoded.
87 | type xmlReader interface {
88 | Bytes() int64
89 | Read(p []byte) (n int, err error)
90 | }
91 |
92 | // xmlSimpleReader implements a proxy reader that counts the number of
93 | // bytes read from its encapsulated reader.
94 | type xmlSimpleReader struct {
95 | r io.Reader
96 | bytes int64
97 | }
98 |
99 | func newXmlSimpleReader(r io.Reader) xmlReader {
100 | return &xmlSimpleReader{r, 0}
101 | }
102 |
103 | func (xr *xmlSimpleReader) Bytes() int64 {
104 | return xr.bytes
105 | }
106 |
107 | func (xr *xmlSimpleReader) Read(p []byte) (n int, err error) {
108 | n, err = xr.r.Read(p)
109 | xr.bytes += int64(n)
110 | return n, err
111 | }
112 |
113 | // xmlPeekReader implements a proxy reader that counts the number of
114 | // bytes read from its encapsulated reader. It also allows the caller to
115 | // "peek" at the previous portions of the buffer after they have been
116 | // parsed.
117 | type xmlPeekReader struct {
118 | r io.Reader
119 | bytes int64 // total bytes read by the Read function
120 | buf []byte // internal read buffer
121 | bufSize int // total bytes used in the read buffer
122 | bufOffset int64 // total bytes read when buf was last filled
123 | window []byte // current read buffer window
124 | peekBuf []byte // buffer used to store data to be peeked at later
125 | peekOffset int64 // total read offset of the start of the peek buffer
126 | }
127 |
128 | func newXmlPeekReader(r io.Reader) *xmlPeekReader {
129 | buf := make([]byte, 4096)
130 | return &xmlPeekReader{
131 | r: r,
132 | bytes: 0,
133 | buf: buf,
134 | bufSize: 0,
135 | bufOffset: 0,
136 | window: buf[0:0],
137 | peekBuf: make([]byte, 0),
138 | peekOffset: -1,
139 | }
140 | }
141 |
142 | func (xr *xmlPeekReader) Bytes() int64 {
143 | return xr.bytes
144 | }
145 |
146 | func (xr *xmlPeekReader) Read(p []byte) (n int, err error) {
147 | if len(xr.window) == 0 {
148 | err = xr.fill()
149 | if err != nil {
150 | return 0, err
151 | }
152 | if len(xr.window) == 0 {
153 | return 0, nil
154 | }
155 | }
156 |
157 | if len(xr.window) < len(p) {
158 | n = len(xr.window)
159 | } else {
160 | n = len(p)
161 | }
162 |
163 | copy(p, xr.window)
164 | xr.window = xr.window[n:]
165 | xr.bytes += int64(n)
166 |
167 | return n, err
168 | }
169 |
170 | func (xr *xmlPeekReader) PeekPrepare(offset int64, maxLen int) {
171 | if maxLen > cap(xr.peekBuf) {
172 | xr.peekBuf = make([]byte, 0, maxLen)
173 | }
174 | xr.peekBuf = xr.peekBuf[0:0]
175 | xr.peekOffset = offset
176 | xr.updatePeekBuf()
177 | }
178 |
179 | func (xr *xmlPeekReader) PeekFinalize() []byte {
180 | xr.updatePeekBuf()
181 | return xr.peekBuf
182 | }
183 |
184 | func (xr *xmlPeekReader) fill() error {
185 | xr.bufOffset = xr.bytes
186 | xr.bufSize = 0
187 | n, err := xr.r.Read(xr.buf)
188 | if err != nil {
189 | xr.window, xr.bufSize = xr.buf[0:0], 0
190 | return err
191 | }
192 | xr.window, xr.bufSize = xr.buf[:n], n
193 | xr.updatePeekBuf()
194 | return nil
195 | }
196 |
197 | func (xr *xmlPeekReader) updatePeekBuf() {
198 | peekRemain := cap(xr.peekBuf) - len(xr.peekBuf)
199 | if xr.peekOffset >= 0 && peekRemain > 0 {
200 | rangeMin := xr.peekOffset
201 | rangeMax := xr.peekOffset + int64(cap(xr.peekBuf))
202 | bufMin := xr.bufOffset
203 | bufMax := xr.bufOffset + int64(xr.bufSize)
204 | if rangeMin < bufMin {
205 | rangeMin = bufMin
206 | }
207 | if rangeMax > bufMax {
208 | rangeMax = bufMax
209 | }
210 | if rangeMax > rangeMin {
211 | rangeMin -= xr.bufOffset
212 | rangeMax -= xr.bufOffset
213 | if int(rangeMax-rangeMin) > peekRemain {
214 | rangeMax = rangeMin + int64(peekRemain)
215 | }
216 | xr.peekBuf = append(xr.peekBuf, xr.buf[rangeMin:rangeMax]...)
217 | }
218 | }
219 | }
220 |
221 | // xmlWriter implements a proxy writer that counts the number of
222 | // bytes written by its encapsulated writer.
223 | type xmlWriter struct {
224 | w io.Writer
225 | bytes int64
226 | }
227 |
228 | func newXmlWriter(w io.Writer) *xmlWriter {
229 | return &xmlWriter{w: w}
230 | }
231 |
232 | func (xw *xmlWriter) Write(p []byte) (n int, err error) {
233 | n, err = xw.w.Write(p)
234 | xw.bytes += int64(n)
235 | return n, err
236 | }
237 |
238 | // isWhitespace returns true if the byte slice contains only
239 | // whitespace characters.
240 | func isWhitespace(s string) bool {
241 | for i := 0; i < len(s); i++ {
242 | if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
243 | return false
244 | }
245 | }
246 | return true
247 | }
248 |
249 | // spaceMatch returns true if namespace a is the empty string
250 | // or if namespace a equals namespace b.
251 | func spaceMatch(a, b string) bool {
252 | switch {
253 | case a == "":
254 | return true
255 | default:
256 | return a == b
257 | }
258 | }
259 |
260 | // spaceDecompose breaks a namespace:tag identifier at the ':'
261 | // and returns the two parts.
262 | func spaceDecompose(str string) (space, key string) {
263 | colon := strings.IndexByte(str, ':')
264 | if colon == -1 {
265 | return "", str
266 | }
267 | return str[:colon], str[colon+1:]
268 | }
269 |
270 | // Strings used by indentCRLF and indentLF
271 | const (
272 | indentSpaces = "\r\n "
273 | indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
274 | )
275 |
276 | // indentCRLF returns a CRLF newline followed by n copies of the first
277 | // non-CRLF character in the source string.
278 | func indentCRLF(n int, source string) string {
279 | switch {
280 | case n < 0:
281 | return source[:2]
282 | case n < len(source)-1:
283 | return source[:n+2]
284 | default:
285 | return source + strings.Repeat(source[2:3], n-len(source)+2)
286 | }
287 | }
288 |
289 | // indentLF returns a LF newline followed by n copies of the first non-LF
290 | // character in the source string.
291 | func indentLF(n int, source string) string {
292 | switch {
293 | case n < 0:
294 | return source[1:2]
295 | case n < len(source)-1:
296 | return source[1 : n+2]
297 | default:
298 | return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
299 | }
300 | }
301 |
302 | // nextIndex returns the index of the next occurrence of byte ch in s,
303 | // starting from offset. It returns -1 if the byte is not found.
304 | func nextIndex(s string, ch byte, offset int) int {
305 | switch i := strings.IndexByte(s[offset:], ch); i {
306 | case -1:
307 | return -1
308 | default:
309 | return offset + i
310 | }
311 | }
312 |
313 | // isInteger returns true if the string s contains an integer.
314 | func isInteger(s string) bool {
315 | for i := 0; i < len(s); i++ {
316 | if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
317 | return false
318 | }
319 | }
320 | return true
321 | }
322 |
323 | type escapeMode byte
324 |
325 | const (
326 | escapeNormal escapeMode = iota
327 | escapeCanonicalText
328 | escapeCanonicalAttr
329 | )
330 |
331 | // escapeString writes an escaped version of a string to the writer.
332 | func escapeString(w Writer, s string, m escapeMode) {
333 | var esc []byte
334 | last := 0
335 | for i := 0; i < len(s); {
336 | r, width := utf8.DecodeRuneInString(s[i:])
337 | i += width
338 | switch r {
339 | case '&':
340 | esc = []byte("&")
341 | case '<':
342 | esc = []byte("<")
343 | case '>':
344 | if m == escapeCanonicalAttr {
345 | continue
346 | }
347 | esc = []byte(">")
348 | case '\'':
349 | if m != escapeNormal {
350 | continue
351 | }
352 | esc = []byte("'")
353 | case '"':
354 | if m == escapeCanonicalText {
355 | continue
356 | }
357 | esc = []byte(""")
358 | case '\t':
359 | if m != escapeCanonicalAttr {
360 | continue
361 | }
362 | esc = []byte(" ")
363 | case '\n':
364 | if m != escapeCanonicalAttr {
365 | continue
366 | }
367 | esc = []byte("
")
368 | case '\r':
369 | if m == escapeNormal {
370 | continue
371 | }
372 | esc = []byte("
")
373 | default:
374 | if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
375 | esc = []byte("\uFFFD")
376 | break
377 | }
378 | continue
379 | }
380 | w.WriteString(s[last : i-width])
381 | w.Write(esc)
382 | last = i
383 | }
384 | w.WriteString(s[last:])
385 | }
386 |
387 | func isInCharacterRange(r rune) bool {
388 | return r == 0x09 ||
389 | r == 0x0A ||
390 | r == 0x0D ||
391 | r >= 0x20 && r <= 0xD7FF ||
392 | r >= 0xE000 && r <= 0xFFFD ||
393 | r >= 0x10000 && r <= 0x10FFFF
394 | }
395 |
--------------------------------------------------------------------------------
/path.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Brett Vickers.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 |
5 | package etree
6 |
7 | import (
8 | "strconv"
9 | "strings"
10 | )
11 |
12 | /*
13 | A Path is a string that represents a search path through an etree starting
14 | from the document root or an arbitrary element. Paths are used with the
15 | Element object's Find* methods to locate and return desired elements.
16 |
17 | A Path consists of a series of slash-separated "selectors", each of which may
18 | be modified by one or more bracket-enclosed "filters". Selectors are used to
19 | traverse the etree from element to element, while filters are used to narrow
20 | the list of candidate elements at each node.
21 |
22 | Although etree Path strings are structurally and behaviorally similar to XPath
23 | strings (https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more
24 | limited set of selectors and filtering options.
25 |
26 | The following selectors are supported by etree paths:
27 |
28 | . Select the current element.
29 | .. Select the parent of the current element.
30 | * Select all child elements of the current element.
31 | / Select the root element when used at the start of a path.
32 | // Select all descendants of the current element.
33 | tag Select all child elements with a name matching the tag.
34 |
35 | The following basic filters are supported:
36 |
37 | [@attrib] Keep elements with an attribute named attrib.
38 | [@attrib='val'] Keep elements with an attribute named attrib and value matching val.
39 | [tag] Keep elements with a child element named tag.
40 | [tag='val'] Keep elements with a child element named tag and text matching val.
41 | [n] Keep the n-th element, where n is a numeric index starting from 1.
42 |
43 | The following function-based filters are supported:
44 |
45 | [text()] Keep elements with non-empty text.
46 | [text()='val'] Keep elements whose text matches val.
47 | [local-name()='val'] Keep elements whose un-prefixed tag matches val.
48 | [name()='val'] Keep elements whose full tag exactly matches val.
49 | [namespace-prefix()] Keep elements with non-empty namespace prefixes.
50 | [namespace-prefix()='val'] Keep elements whose namespace prefix matches val.
51 | [namespace-uri()] Keep elements with non-empty namespace URIs.
52 | [namespace-uri()='val'] Keep elements whose namespace URI matches val.
53 |
54 | Below are some examples of etree path strings.
55 |
56 | Select the bookstore child element of the root element:
57 |
58 | /bookstore
59 |
60 | Beginning from the root element, select the title elements of all descendant
61 | book elements having a 'category' attribute of 'WEB':
62 |
63 | //book[@category='WEB']/title
64 |
65 | Beginning from the current element, select the first descendant book element
66 | with a title child element containing the text 'Great Expectations':
67 |
68 | .//book[title='Great Expectations'][1]
69 |
70 | Beginning from the current element, select all child elements of book elements
71 | with an attribute 'language' set to 'english':
72 |
73 | ./book/*[@language='english']
74 |
75 | Beginning from the current element, select all child elements of book elements
76 | containing the text 'special':
77 |
78 | ./book/*[text()='special']
79 |
80 | Beginning from the current element, select all descendant book elements whose
81 | title child element has a 'language' attribute of 'french':
82 |
83 | .//book/title[@language='french']/..
84 |
85 | Beginning from the current element, select all descendant book elements
86 | belonging to the http://www.w3.org/TR/html4/ namespace:
87 |
88 | .//book[namespace-uri()='http://www.w3.org/TR/html4/']
89 | */
90 | type Path struct {
91 | segments []segment
92 | }
93 |
94 | // ErrPath is returned by path functions when an invalid etree path is provided.
95 | type ErrPath string
96 |
97 | // Error returns the string describing a path error.
98 | func (err ErrPath) Error() string {
99 | return "etree: " + string(err)
100 | }
101 |
102 | // CompilePath creates an optimized version of an XPath-like string that
103 | // can be used to query elements in an element tree.
104 | func CompilePath(path string) (Path, error) {
105 | var comp compiler
106 | segments := comp.parsePath(path)
107 | if comp.err != ErrPath("") {
108 | return Path{nil}, comp.err
109 | }
110 | return Path{segments}, nil
111 | }
112 |
113 | // MustCompilePath creates an optimized version of an XPath-like string that
114 | // can be used to query elements in an element tree. Panics if an error
115 | // occurs. Use this function to create Paths when you know the path is
116 | // valid (i.e., if it's hard-coded).
117 | func MustCompilePath(path string) Path {
118 | p, err := CompilePath(path)
119 | if err != nil {
120 | panic(err)
121 | }
122 | return p
123 | }
124 |
125 | // A segment is a portion of a path between "/" characters.
126 | // It contains one selector and zero or more [filters].
127 | type segment struct {
128 | sel selector
129 | filters []filter
130 | }
131 |
132 | func (seg *segment) apply(e *Element, p *pather) {
133 | seg.sel.apply(e, p)
134 | for _, f := range seg.filters {
135 | f.apply(p)
136 | }
137 | }
138 |
139 | // A selector selects XML elements for consideration by the
140 | // path traversal.
141 | type selector interface {
142 | apply(e *Element, p *pather)
143 | }
144 |
145 | // A filter pares down a list of candidate XML elements based
146 | // on a path filter in [brackets].
147 | type filter interface {
148 | apply(p *pather)
149 | }
150 |
151 | // A pather is helper object that traverses an element tree using
152 | // a Path object. It collects and deduplicates all elements matching
153 | // the path query.
154 | type pather struct {
155 | queue queue[node]
156 | results []*Element
157 | inResults map[*Element]bool
158 | candidates []*Element
159 | scratch []*Element // used by filters
160 | }
161 |
162 | // A node represents an element and the remaining path segments that
163 | // should be applied against it by the pather.
164 | type node struct {
165 | e *Element
166 | segments []segment
167 | }
168 |
169 | func newPather() *pather {
170 | return &pather{
171 | results: make([]*Element, 0),
172 | inResults: make(map[*Element]bool),
173 | candidates: make([]*Element, 0),
174 | scratch: make([]*Element, 0),
175 | }
176 | }
177 |
178 | // traverse follows the path from the element e, collecting
179 | // and then returning all elements that match the path's selectors
180 | // and filters.
181 | func (p *pather) traverse(e *Element, path Path) []*Element {
182 | for p.queue.add(node{e, path.segments}); p.queue.len() > 0; {
183 | p.eval(p.queue.remove())
184 | }
185 | return p.results
186 | }
187 |
188 | // eval evaluates the current path node by applying the remaining
189 | // path's selector rules against the node's element.
190 | func (p *pather) eval(n node) {
191 | p.candidates = p.candidates[0:0]
192 | seg, remain := n.segments[0], n.segments[1:]
193 | seg.apply(n.e, p)
194 |
195 | if len(remain) == 0 {
196 | for _, c := range p.candidates {
197 | if in := p.inResults[c]; !in {
198 | p.inResults[c] = true
199 | p.results = append(p.results, c)
200 | }
201 | }
202 | } else {
203 | for _, c := range p.candidates {
204 | p.queue.add(node{c, remain})
205 | }
206 | }
207 | }
208 |
209 | // A compiler generates a compiled path from a path string.
210 | type compiler struct {
211 | err ErrPath
212 | }
213 |
214 | // parsePath parses an XPath-like string describing a path
215 | // through an element tree and returns a slice of segment
216 | // descriptors.
217 | func (c *compiler) parsePath(path string) []segment {
218 | // If path ends with //, fix it
219 | if strings.HasSuffix(path, "//") {
220 | path += "*"
221 | }
222 |
223 | var segments []segment
224 |
225 | // Check for an absolute path
226 | if strings.HasPrefix(path, "/") {
227 | segments = append(segments, segment{new(selectRoot), []filter{}})
228 | path = path[1:]
229 | }
230 |
231 | // Split path into segments
232 | for _, s := range splitPath(path) {
233 | segments = append(segments, c.parseSegment(s))
234 | if c.err != ErrPath("") {
235 | break
236 | }
237 | }
238 | return segments
239 | }
240 |
241 | func splitPath(path string) []string {
242 | var pieces []string
243 | start := 0
244 | inquote := false
245 | var quote byte
246 | for i := 0; i+1 <= len(path); i++ {
247 | if !inquote {
248 | if path[i] == '\'' || path[i] == '"' {
249 | inquote, quote = true, path[i]
250 | } else if path[i] == '/' {
251 | pieces = append(pieces, path[start:i])
252 | start = i + 1
253 | }
254 | } else if path[i] == quote {
255 | inquote = false
256 | }
257 | }
258 | return append(pieces, path[start:])
259 | }
260 |
261 | // parseSegment parses a path segment between / characters.
262 | func (c *compiler) parseSegment(path string) segment {
263 | pieces := strings.Split(path, "[")
264 | seg := segment{
265 | sel: c.parseSelector(pieces[0]),
266 | filters: []filter{},
267 | }
268 | for i := 1; i < len(pieces); i++ {
269 | fpath := pieces[i]
270 | if len(fpath) == 0 || fpath[len(fpath)-1] != ']' {
271 | c.err = ErrPath("path has invalid filter [brackets].")
272 | break
273 | }
274 | seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1]))
275 | }
276 | return seg
277 | }
278 |
279 | // parseSelector parses a selector at the start of a path segment.
280 | func (c *compiler) parseSelector(path string) selector {
281 | switch path {
282 | case ".":
283 | return new(selectSelf)
284 | case "..":
285 | return new(selectParent)
286 | case "*":
287 | return new(selectChildren)
288 | case "":
289 | return new(selectDescendants)
290 | default:
291 | return newSelectChildrenByTag(path)
292 | }
293 | }
294 |
295 | var fnTable = map[string]func(e *Element) string{
296 | "local-name": (*Element).name,
297 | "name": (*Element).FullTag,
298 | "namespace-prefix": (*Element).namespacePrefix,
299 | "namespace-uri": (*Element).NamespaceURI,
300 | "text": (*Element).Text,
301 | }
302 |
303 | // parseFilter parses a path filter contained within [brackets].
304 | func (c *compiler) parseFilter(path string) filter {
305 | if len(path) == 0 {
306 | c.err = ErrPath("path contains an empty filter expression.")
307 | return nil
308 | }
309 |
310 | // Filter contains [@attr='val'], [@attr="val"], [fn()='val'],
311 | // [fn()="val"], [tag='val'] or [tag="val"]?
312 | eqindex := strings.IndexByte(path, '=')
313 | if eqindex >= 0 && eqindex+1 < len(path) {
314 | quote := path[eqindex+1]
315 | if quote == '\'' || quote == '"' {
316 | rindex := nextIndex(path, quote, eqindex+2)
317 | if rindex != len(path)-1 {
318 | c.err = ErrPath("path has mismatched filter quotes.")
319 | return nil
320 | }
321 |
322 | key := path[:eqindex]
323 | value := path[eqindex+2 : rindex]
324 |
325 | switch {
326 | case key[0] == '@':
327 | return newFilterAttrVal(key[1:], value)
328 | case strings.HasSuffix(key, "()"):
329 | name := key[:len(key)-2]
330 | if fn, ok := fnTable[name]; ok {
331 | return newFilterFuncVal(fn, value)
332 | }
333 | c.err = ErrPath("path has unknown function " + name)
334 | return nil
335 | default:
336 | return newFilterChildText(key, value)
337 | }
338 | }
339 | }
340 |
341 | // Filter contains [@attr], [N], [tag] or [fn()]
342 | switch {
343 | case path[0] == '@':
344 | return newFilterAttr(path[1:])
345 | case strings.HasSuffix(path, "()"):
346 | name := path[:len(path)-2]
347 | if fn, ok := fnTable[name]; ok {
348 | return newFilterFunc(fn)
349 | }
350 | c.err = ErrPath("path has unknown function " + name)
351 | return nil
352 | case isInteger(path):
353 | pos, _ := strconv.Atoi(path)
354 | switch {
355 | case pos > 0:
356 | return newFilterPos(pos - 1)
357 | default:
358 | return newFilterPos(pos)
359 | }
360 | default:
361 | return newFilterChild(path)
362 | }
363 | }
364 |
365 | // selectSelf selects the current element into the candidate list.
366 | type selectSelf struct{}
367 |
368 | func (s *selectSelf) apply(e *Element, p *pather) {
369 | p.candidates = append(p.candidates, e)
370 | }
371 |
372 | // selectRoot selects the element's root node.
373 | type selectRoot struct{}
374 |
375 | func (s *selectRoot) apply(e *Element, p *pather) {
376 | root := e
377 | for root.parent != nil {
378 | root = root.parent
379 | }
380 | p.candidates = append(p.candidates, root)
381 | }
382 |
383 | // selectParent selects the element's parent into the candidate list.
384 | type selectParent struct{}
385 |
386 | func (s *selectParent) apply(e *Element, p *pather) {
387 | if e.parent != nil {
388 | p.candidates = append(p.candidates, e.parent)
389 | }
390 | }
391 |
392 | // selectChildren selects the element's child elements into the
393 | // candidate list.
394 | type selectChildren struct{}
395 |
396 | func (s *selectChildren) apply(e *Element, p *pather) {
397 | for _, c := range e.Child {
398 | if c, ok := c.(*Element); ok {
399 | p.candidates = append(p.candidates, c)
400 | }
401 | }
402 | }
403 |
404 | // selectDescendants selects all descendant child elements
405 | // of the element into the candidate list.
406 | type selectDescendants struct{}
407 |
408 | func (s *selectDescendants) apply(e *Element, p *pather) {
409 | var queue queue[*Element]
410 | for queue.add(e); queue.len() > 0; {
411 | e := queue.remove()
412 | p.candidates = append(p.candidates, e)
413 | for _, c := range e.Child {
414 | if c, ok := c.(*Element); ok {
415 | queue.add(c)
416 | }
417 | }
418 | }
419 | }
420 |
421 | // selectChildrenByTag selects into the candidate list all child
422 | // elements of the element having the specified tag.
423 | type selectChildrenByTag struct {
424 | space, tag string
425 | }
426 |
427 | func newSelectChildrenByTag(path string) *selectChildrenByTag {
428 | s, l := spaceDecompose(path)
429 | return &selectChildrenByTag{s, l}
430 | }
431 |
432 | func (s *selectChildrenByTag) apply(e *Element, p *pather) {
433 | for _, c := range e.Child {
434 | if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag {
435 | p.candidates = append(p.candidates, c)
436 | }
437 | }
438 | }
439 |
440 | // filterPos filters the candidate list, keeping only the
441 | // candidate at the specified index.
442 | type filterPos struct {
443 | index int
444 | }
445 |
446 | func newFilterPos(pos int) *filterPos {
447 | return &filterPos{pos}
448 | }
449 |
450 | func (f *filterPos) apply(p *pather) {
451 | if f.index >= 0 {
452 | if f.index < len(p.candidates) {
453 | p.scratch = append(p.scratch, p.candidates[f.index])
454 | }
455 | } else {
456 | if -f.index <= len(p.candidates) {
457 | p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index])
458 | }
459 | }
460 | p.candidates, p.scratch = p.scratch, p.candidates[0:0]
461 | }
462 |
463 | // filterAttr filters the candidate list for elements having
464 | // the specified attribute.
465 | type filterAttr struct {
466 | space, key string
467 | }
468 |
469 | func newFilterAttr(str string) *filterAttr {
470 | s, l := spaceDecompose(str)
471 | return &filterAttr{s, l}
472 | }
473 |
474 | func (f *filterAttr) apply(p *pather) {
475 | for _, c := range p.candidates {
476 | for _, a := range c.Attr {
477 | if spaceMatch(f.space, a.Space) && f.key == a.Key {
478 | p.scratch = append(p.scratch, c)
479 | break
480 | }
481 | }
482 | }
483 | p.candidates, p.scratch = p.scratch, p.candidates[0:0]
484 | }
485 |
486 | // filterAttrVal filters the candidate list for elements having
487 | // the specified attribute with the specified value.
488 | type filterAttrVal struct {
489 | space, key, val string
490 | }
491 |
492 | func newFilterAttrVal(str, value string) *filterAttrVal {
493 | s, l := spaceDecompose(str)
494 | return &filterAttrVal{s, l, value}
495 | }
496 |
497 | func (f *filterAttrVal) apply(p *pather) {
498 | for _, c := range p.candidates {
499 | for _, a := range c.Attr {
500 | if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value {
501 | p.scratch = append(p.scratch, c)
502 | break
503 | }
504 | }
505 | }
506 | p.candidates, p.scratch = p.scratch, p.candidates[0:0]
507 | }
508 |
509 | // filterFunc filters the candidate list for elements satisfying a custom
510 | // boolean function.
511 | type filterFunc struct {
512 | fn func(e *Element) string
513 | }
514 |
515 | func newFilterFunc(fn func(e *Element) string) *filterFunc {
516 | return &filterFunc{fn}
517 | }
518 |
519 | func (f *filterFunc) apply(p *pather) {
520 | for _, c := range p.candidates {
521 | if f.fn(c) != "" {
522 | p.scratch = append(p.scratch, c)
523 | }
524 | }
525 | p.candidates, p.scratch = p.scratch, p.candidates[0:0]
526 | }
527 |
528 | // filterFuncVal filters the candidate list for elements containing a value
529 | // matching the result of a custom function.
530 | type filterFuncVal struct {
531 | fn func(e *Element) string
532 | val string
533 | }
534 |
535 | func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal {
536 | return &filterFuncVal{fn, value}
537 | }
538 |
539 | func (f *filterFuncVal) apply(p *pather) {
540 | for _, c := range p.candidates {
541 | if f.fn(c) == f.val {
542 | p.scratch = append(p.scratch, c)
543 | }
544 | }
545 | p.candidates, p.scratch = p.scratch, p.candidates[0:0]
546 | }
547 |
548 | // filterChild filters the candidate list for elements having
549 | // a child element with the specified tag.
550 | type filterChild struct {
551 | space, tag string
552 | }
553 |
554 | func newFilterChild(str string) *filterChild {
555 | s, l := spaceDecompose(str)
556 | return &filterChild{s, l}
557 | }
558 |
559 | func (f *filterChild) apply(p *pather) {
560 | for _, c := range p.candidates {
561 | for _, cc := range c.Child {
562 | if cc, ok := cc.(*Element); ok &&
563 | spaceMatch(f.space, cc.Space) &&
564 | f.tag == cc.Tag {
565 | p.scratch = append(p.scratch, c)
566 | }
567 | }
568 | }
569 | p.candidates, p.scratch = p.scratch, p.candidates[0:0]
570 | }
571 |
572 | // filterChildText filters the candidate list for elements having
573 | // a child element with the specified tag and text.
574 | type filterChildText struct {
575 | space, tag, text string
576 | }
577 |
578 | func newFilterChildText(str, text string) *filterChildText {
579 | s, l := spaceDecompose(str)
580 | return &filterChildText{s, l, text}
581 | }
582 |
583 | func (f *filterChildText) apply(p *pather) {
584 | for _, c := range p.candidates {
585 | for _, cc := range c.Child {
586 | if cc, ok := cc.(*Element); ok &&
587 | spaceMatch(f.space, cc.Space) &&
588 | f.tag == cc.Tag &&
589 | f.text == cc.Text() {
590 | p.scratch = append(p.scratch, c)
591 | }
592 | }
593 | }
594 | p.candidates, p.scratch = p.scratch, p.candidates[0:0]
595 | }
596 |
--------------------------------------------------------------------------------
/path_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Brett Vickers.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 |
5 | package etree
6 |
7 | import "testing"
8 |
9 | var testXML = `
10 |
11 |
12 |
13 |
14 |
15 |
16 | Everyday Italian
17 | Giada De Laurentiis
18 | 2005
19 | 30.00
20 | Clarkson Potter
21 |
22 |
23 |
24 | Harry Potter
25 | J K. Rowling
26 | 2005
27 | 29.99
28 |
29 |
30 |
31 |
32 |
33 | XQuery Kick Start
34 | James McGovern
35 | Per Bothner
36 | Kurt Cagle
37 | James Linn
38 | Vaidyanathan Nagarajan
39 | 2003
40 | 49.99
41 |
42 |
43 |
44 |
45 |
46 |
47 | Learning XML
48 | Erik T. Ray
49 | 2003
50 | 39.95
51 |
52 |
53 |
54 | `
55 |
56 | type test struct {
57 | path string
58 | result interface{}
59 | }
60 |
61 | type errorResult string
62 |
63 | var tests = []test{
64 | // basic queries
65 | {"./bookstore/book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
66 | {"./bookstore/book/author", []string{"Giada De Laurentiis", "J K. Rowling", "James McGovern", "Per Bothner", "Kurt Cagle", "James Linn", "Vaidyanathan Nagarajan", "Erik T. Ray"}},
67 | {"./bookstore/book/year", []string{"2005", "2005", "2003", "2003"}},
68 | {"./bookstore/book/p:price", []string{"30.00", "29.99", "39.95"}},
69 | {"./bookstore/book/isbn", nil},
70 |
71 | // descendant queries
72 | {"//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
73 | {"//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
74 | {".//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
75 | {".//bookstore//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
76 | {".//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
77 | {".//p:price/.", []string{"30.00", "29.99", "39.95"}},
78 | {".//price", []string{"30.00", "29.99", "49.99", "39.95"}},
79 |
80 | // positional queries
81 | {"./bookstore/book[1]/title", "Everyday Italian"},
82 | {"./bookstore/book[4]/title", "Learning XML"},
83 | {"./bookstore/book[5]/title", nil},
84 | {"./bookstore/book[3]/author[0]", "James McGovern"},
85 | {"./bookstore/book[3]/author[1]", "James McGovern"},
86 | {"./bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
87 | {"./bookstore/book[3]/author[6]", nil},
88 | {"./bookstore/book[-1]/title", "Learning XML"},
89 | {"./bookstore/book[-4]/title", "Everyday Italian"},
90 | {"./bookstore/book[-5]/title", nil},
91 |
92 | // text function queries
93 | {"./bookstore/book[author='James McGovern']/title", "XQuery Kick Start"},
94 | {"./bookstore/book[author='Per Bothner']/title", "XQuery Kick Start"},
95 | {"./bookstore/book[author='Kurt Cagle']/title", "XQuery Kick Start"},
96 | {"./bookstore/book[author='James Linn']/title", "XQuery Kick Start"},
97 | {"./bookstore/book[author='Vaidyanathan Nagarajan']/title", "XQuery Kick Start"},
98 | {"//book[p:price='29.99']/title", "Harry Potter"},
99 | {"//book[price='29.99']/title", "Harry Potter"},
100 | {"//book/price[text()='29.99']", "29.99"},
101 | {"//book/author[text()='Kurt Cagle']", "Kurt Cagle"},
102 | {"//book/editor[text()]", []string{"Clarkson Potter", "\n\t\t"}},
103 |
104 | // namespace function queries
105 | {"//*[namespace-uri()]", []string{"30.00", "29.99", "39.95"}},
106 | {"//*[namespace-uri()='urn:books-com:prices']", []string{"30.00", "29.99", "39.95"}},
107 | {"//*[namespace-uri()='foo']", nil},
108 | {"//*[namespace-prefix()]", []string{"30.00", "29.99", "39.95"}},
109 | {"//*[namespace-prefix()='p']", []string{"30.00", "29.99", "39.95"}},
110 | {"//*[name()='p:price']", []string{"30.00", "29.99", "39.95"}},
111 | {"//*[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
112 | {"//price[namespace-uri()='']", []string{"49.99"}},
113 | {"//price[namespace-prefix()='']", []string{"49.99"}},
114 | {"//price[name()='price']", []string{"49.99"}},
115 | {"//price[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
116 |
117 | // attribute queries
118 | {"./bookstore/book[@category='WEB']/title", []string{"XQuery Kick Start", "Learning XML"}},
119 | {"./bookstore/book[@path='/books/xml']/title", []string{"Learning XML"}},
120 | {"./bookstore/book[@category='COOKING']/title[@lang='en']", "Everyday Italian"},
121 | {`./bookstore/book[@category="COOKING"]/title[@lang="en"]`, "Everyday Italian"},
122 | {"./bookstore/book/title[@lang='en'][@sku='150']", "Harry Potter"},
123 | {"./bookstore/book/title[@lang='fr']", nil},
124 | {"//p:price[@p:tax='1.99']", []string{"29.99"}},
125 | {"//p:price[@tax='1.99']", []string{"29.99"}},
126 | {"//p:price[@p:tax]", []string{"29.99"}},
127 | {"//p:price[@tax]", []string{"29.99"}},
128 |
129 | // parent queries
130 | {"./bookstore/book[@category='COOKING']/title/../../book[4]/title", "Learning XML"},
131 |
132 | // root queries
133 | {"/bookstore/book[1]/title", "Everyday Italian"},
134 | {"/bookstore/book[4]/title", "Learning XML"},
135 | {"/bookstore/book[5]/title", nil},
136 | {"/bookstore/book[3]/author[0]", "James McGovern"},
137 | {"/bookstore/book[3]/author[1]", "James McGovern"},
138 | {"/bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
139 | {"/bookstore/book[3]/author[6]", nil},
140 | {"/bookstore/book[-1]/title", "Learning XML"},
141 | {"/bookstore/book[-4]/title", "Everyday Italian"},
142 | {"/bookstore/book[-5]/title", nil},
143 |
144 | // bad paths
145 | {"./bookstore/book[]", errorResult("etree: path contains an empty filter expression.")},
146 | {"./bookstore/book[@category='WEB'", errorResult("etree: path has invalid filter [brackets].")},
147 | {"./bookstore/book[@category='WEB]", errorResult("etree: path has mismatched filter quotes.")},
148 | {`./bookstore/book[@category='WEB"]`, errorResult("etree: path has mismatched filter quotes.")},
149 | {`./bookstore/book[@category="WEB']`, errorResult("etree: path has mismatched filter quotes.")},
150 | {"./bookstore/book[author]a", errorResult("etree: path has invalid filter [brackets].")},
151 | {"/][", errorResult("etree: path has invalid filter [brackets].")},
152 | }
153 |
154 | func TestPath(t *testing.T) {
155 | doc := NewDocument()
156 | err := doc.ReadFromString(testXML)
157 | if err != nil {
158 | t.Error(err)
159 | }
160 |
161 | for _, test := range tests {
162 | path, err := CompilePath(test.path)
163 | if err != nil {
164 | if r, ok := test.result.(errorResult); !ok || err.Error() != string(r) {
165 | fail(t, test)
166 | }
167 | continue
168 | }
169 |
170 | // Test both FindElementsPath and FindElementPath
171 | element := doc.FindElementPath(path)
172 | elements := doc.FindElementsPath(path)
173 |
174 | switch s := test.result.(type) {
175 | case errorResult:
176 | fail(t, test)
177 | case nil:
178 | if element != nil || len(elements) != 0 {
179 | fail(t, test)
180 | }
181 | case string:
182 | if element == nil || element.Text() != s ||
183 | len(elements) != 1 || elements[0].Text() != s {
184 | fail(t, test)
185 | }
186 | case []string:
187 | if element == nil || element.Text() != s[0] || len(elements) != len(s) {
188 | fail(t, test)
189 | continue
190 | }
191 | for i := 0; i < len(elements); i++ {
192 | if elements[i].Text() != s[i] {
193 | fail(t, test)
194 | break
195 | }
196 | }
197 | }
198 |
199 | }
200 | }
201 |
202 | func fail(t *testing.T, test test) {
203 | t.Helper()
204 | t.Errorf("etree: failed test '%s'\n", test.path)
205 | }
206 |
207 | func TestAbsolutePath(t *testing.T) {
208 | doc := NewDocument()
209 | err := doc.ReadFromString(testXML)
210 | if err != nil {
211 | t.Error(err)
212 | }
213 |
214 | elements := doc.FindElements("//book/author")
215 | for _, e := range elements {
216 | title := e.FindElement("/bookstore/book[1]/title")
217 | if title == nil || title.Text() != "Everyday Italian" {
218 | t.Errorf("etree: absolute path test failed")
219 | }
220 |
221 | title = e.FindElement("//book[p:price='29.99']/title")
222 | if title == nil || title.Text() != "Harry Potter" {
223 | t.Errorf("etree: absolute path test failed")
224 | }
225 | }
226 | }
227 |
--------------------------------------------------------------------------------