├── .github
    └── workflows
    │   └── go.yml
├── CONTRIBUTORS
├── LICENSE
├── README.md
├── RELEASE_NOTES.md
├── etree.go
├── etree_test.go
├── example_test.go
├── go.mod
├── helpers.go
├── path.go
└── path_test.go


/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | permissions:
 6 |   contents: read
 7 | 
 8 | jobs:
 9 | 
10 |   analyze:
11 |     name: Analyze
12 |     runs-on: ubuntu-latest
13 | 
14 |     permissions:
15 |       actions: read
16 |       contents: read
17 |       security-events: write
18 | 
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         language: ["go"]
23 | 
24 |     steps:
25 |       - name: Checkout repository
26 |         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
27 | 
28 |       - name: Initialize CodeQL
29 |         uses: github/codeql-action/init@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12
30 |         with:
31 |           languages: ${{ matrix.language }}
32 | 
33 |       - name: Autobuild
34 |         uses: github/codeql-action/autobuild@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12
35 | 
36 |       - name: Perform CodeQL Analysis
37 |         uses: github/codeql-action/analyze@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12
38 |         with:
39 |           category: "/language:${{matrix.language}}"
40 | 
41 |   build:
42 |     name: Build
43 |     runs-on: ubuntu-latest
44 | 
45 |     strategy:
46 |       matrix:
47 |         go-version: [ '1.21', '1.22.x' ]
48 | 
49 |     steps:
50 |       - name: Checkout repository
51 |         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
52 | 
53 |       - name: Setup Go ${{ matrix.go-version }}
54 |         uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
55 |         with:
56 |           go-version: ${{ matrix.go-version }}
57 | 
58 |       - name: Build
59 |         run: go build -v ./...
60 | 
61 |       - name: Test
62 |         run: go test -v ./...
63 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS:
--------------------------------------------------------------------------------
 1 | Brett Vickers (beevik)
 2 | Felix Geisendörfer (felixge)
 3 | Kamil Kisiel (kisielk)
 4 | Graham King (grahamking)
 5 | Matt Smith (ma314smith)
 6 | Michal Jemala (michaljemala)
 7 | Nicolas Piganeau (npiganeau)
 8 | Chris Brown (ccbrown)
 9 | Earncef Sequeira (earncef)
10 | Gabriel de Labachelerie (wuzuf)
11 | Martin Dosch (mdosch)
12 | Hugo Wetterberg (hugowetterberg)
13 | Tobias Theel (nerzal)
14 | Daniel Potapov (dpotapov)
15 | Mikhail Ferapontow (MikhailFerapontow)
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2015-2024 Brett Vickers. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions
 5 | are met:
 6 | 
 7 |    1. Redistributions of source code must retain the above copyright
 8 |       notice, this list of conditions and the following disclaimer.
 9 | 
10 |    2. Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY
15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR
18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![GoDoc](https://godoc.org/github.com/beevik/etree?status.svg)](https://godoc.org/github.com/beevik/etree)
  2 | [![Go](https://github.com/beevik/etree/actions/workflows/go.yml/badge.svg)](https://github.com/beevik/etree/actions/workflows/go.yml)
  3 | 
  4 | etree
  5 | =====
  6 | 
  7 | The etree package is a lightweight, pure go package that expresses XML in
  8 | the form of an element tree.  Its design was inspired by the Python
  9 | [ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html)
 10 | module.
 11 | 
 12 | Some of the package's capabilities and features:
 13 | 
 14 | * Represents XML documents as trees of elements for easy traversal.
 15 | * Imports, serializes, modifies or creates XML documents from scratch.
 16 | * Writes and reads XML to/from files, byte slices, strings and io interfaces.
 17 | * Performs simple or complex searches with lightweight XPath-like query APIs.
 18 | * Auto-indents XML using spaces or tabs for better readability.
 19 | * Implemented in pure go; depends only on standard go libraries.
 20 | * Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml)
 21 |   package.
 22 | 
 23 | ### Creating an XML document
 24 | 
 25 | The following example creates an XML document from scratch using the etree
 26 | package and outputs its indented contents to stdout.
 27 | ```go
 28 | doc := etree.NewDocument()
 29 | doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
 30 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
 31 | 
 32 | people := doc.CreateElement("People")
 33 | people.CreateComment("These are all known people")
 34 | 
 35 | jon := people.CreateElement("Person")
 36 | jon.CreateAttr("name", "Jon")
 37 | 
 38 | sally := people.CreateElement("Person")
 39 | sally.CreateAttr("name", "Sally")
 40 | 
 41 | doc.Indent(2)
 42 | doc.WriteTo(os.Stdout)
 43 | ```
 44 | 
 45 | Output:
 46 | ```xml
 47 | <?xml version="1.0" encoding="UTF-8"?>
 48 | <?xml-stylesheet type="text/xsl" href="style.xsl"?>
 49 | <People>
 50 |   <!--These are all known people-->
 51 |   <Person name="Jon"/>
 52 |   <Person name="Sally"/>
 53 | </People>
 54 | ```
 55 | 
 56 | ### Reading an XML file
 57 | 
 58 | Suppose you have a file on disk called `bookstore.xml` containing the
 59 | following data:
 60 | 
 61 | ```xml
 62 | <bookstore xmlns:p="urn:schemas-books-com:prices">
 63 | 
 64 |   <book category="COOKING">
 65 |     <title lang="en">Everyday Italian</title>
 66 |     <author>Giada De Laurentiis</author>
 67 |     <year>2005</year>
 68 |     <p:price>30.00</p:price>
 69 |   </book>
 70 | 
 71 |   <book category="CHILDREN">
 72 |     <title lang="en">Harry Potter</title>
 73 |     <author>J K. Rowling</author>
 74 |     <year>2005</year>
 75 |     <p:price>29.99</p:price>
 76 |   </book>
 77 | 
 78 |   <book category="WEB">
 79 |     <title lang="en">XQuery Kick Start</title>
 80 |     <author>James McGovern</author>
 81 |     <author>Per Bothner</author>
 82 |     <author>Kurt Cagle</author>
 83 |     <author>James Linn</author>
 84 |     <author>Vaidyanathan Nagarajan</author>
 85 |     <year>2003</year>
 86 |     <p:price>49.99</p:price>
 87 |   </book>
 88 | 
 89 |   <book category="WEB">
 90 |     <title lang="en">Learning XML</title>
 91 |     <author>Erik T. Ray</author>
 92 |     <year>2003</year>
 93 |     <p:price>39.95</p:price>
 94 |   </book>
 95 | 
 96 | </bookstore>
 97 | ```
 98 | 
 99 | This code reads the file's contents into an etree document.
100 | ```go
101 | doc := etree.NewDocument()
102 | if err := doc.ReadFromFile("bookstore.xml"); err != nil {
103 |     panic(err)
104 | }
105 | ```
106 | 
107 | You can also read XML from a string, a byte slice, or an `io.Reader`.
108 | 
109 | ### Processing elements and attributes
110 | 
111 | This example illustrates several ways to access elements and attributes using
112 | etree selection queries.
113 | ```go
114 | root := doc.SelectElement("bookstore")
115 | fmt.Println("ROOT element:", root.Tag)
116 | 
117 | for _, book := range root.SelectElements("book") {
118 |     fmt.Println("CHILD element:", book.Tag)
119 |     if title := book.SelectElement("title"); title != nil {
120 |         lang := title.SelectAttrValue("lang", "unknown")
121 |         fmt.Printf("  TITLE: %s (%s)\n", title.Text(), lang)
122 |     }
123 |     for _, attr := range book.Attr {
124 |         fmt.Printf("  ATTR: %s=%s\n", attr.Key, attr.Value)
125 |     }
126 | }
127 | ```
128 | Output:
129 | ```
130 | ROOT element: bookstore
131 | CHILD element: book
132 |   TITLE: Everyday Italian (en)
133 |   ATTR: category=COOKING
134 | CHILD element: book
135 |   TITLE: Harry Potter (en)
136 |   ATTR: category=CHILDREN
137 | CHILD element: book
138 |   TITLE: XQuery Kick Start (en)
139 |   ATTR: category=WEB
140 | CHILD element: book
141 |   TITLE: Learning XML (en)
142 |   ATTR: category=WEB
143 | ```
144 | 
145 | ### Path queries
146 | 
147 | This example uses etree's path functions to select all book titles that fall
148 | into the category of 'WEB'.  The double-slash prefix in the path causes the
149 | search for book elements to occur recursively; book elements may appear at any
150 | level of the XML hierarchy.
151 | ```go
152 | for _, t := range doc.FindElements("//book[@category='WEB']/title") {
153 |     fmt.Println("Title:", t.Text())
154 | }
155 | ```
156 | 
157 | Output:
158 | ```
159 | Title: XQuery Kick Start
160 | Title: Learning XML
161 | ```
162 | 
163 | This example finds the first book element under the root bookstore element and
164 | outputs the tag and text of each of its child elements.
165 | ```go
166 | for _, e := range doc.FindElements("./bookstore/book[1]/*") {
167 |     fmt.Printf("%s: %s\n", e.Tag, e.Text())
168 | }
169 | ```
170 | 
171 | Output:
172 | ```
173 | title: Everyday Italian
174 | author: Giada De Laurentiis
175 | year: 2005
176 | price: 30.00
177 | ```
178 | 
179 | This example finds all books with a price of 49.99 and outputs their titles.
180 | ```go
181 | path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title")
182 | for _, e := range doc.FindElementsPath(path) {
183 |     fmt.Println(e.Text())
184 | }
185 | ```
186 | 
187 | Output:
188 | ```
189 | XQuery Kick Start
190 | ```
191 | 
192 | Note that this example uses the FindElementsPath function, which takes as an
193 | argument a pre-compiled path object. Use precompiled paths when you plan to
194 | search with the same path more than once.
195 | 
196 | ### Other features
197 | 
198 | These are just a few examples of the things the etree package can do. See the
199 | [documentation](http://godoc.org/github.com/beevik/etree) for a complete
200 | description of its capabilities.
201 | 
202 | ### Contributing
203 | 
204 | This project accepts contributions. Just fork the repo and submit a pull
205 | request!
206 | 


--------------------------------------------------------------------------------
/RELEASE_NOTES.md:
--------------------------------------------------------------------------------
  1 | Release 1.5.1
  2 | =============
  3 | 
  4 | **Fixes**
  5 | 
  6 | * Fixed a bug in `InsertChildAt`.
  7 | 
  8 | Release 1.5.0
  9 | =============
 10 | 
 11 | **Changes**
 12 | 
 13 | * Added `Element` function `CreateChild`, which calls a continuation function
 14 |   after creating and adding a child element.
 15 | 
 16 | **Fixes**
 17 | 
 18 | * Removed a potential conflict between two `ReadSettings` values. When
 19 |   `AttrSingleQuote` is true, `CanonicalAttrVal` is forced to be false.
 20 | 
 21 | Release 1.4.1
 22 | =============
 23 | 
 24 | **Changes**
 25 | 
 26 | * Minimal go version updated to 1.21.
 27 | * Default-initialized CharsetReader causes same result as NewDocument().
 28 | * When reading an XML document, attributes are parsed more efficiently.
 29 | 
 30 | Release v1.4.0
 31 | ==============
 32 | 
 33 | **New Features**
 34 | 
 35 | * Add `AutoClose` option to `ReadSettings`.
 36 | * Add `ValidateInput` to `ReadSettings`.
 37 | * Add `NotNil` function to `Element`.
 38 | * Add `NextSibling` and `PrevSibling` functions to `Element`.
 39 | 
 40 | Release v1.3.0
 41 | ==============
 42 | 
 43 | **New Features**
 44 | 
 45 | * Add support for double-quotes in filter path queries.
 46 | * Add `PreserveDuplicateAttrs` to `ReadSettings`.
 47 | * Add `ReindexChildren` to `Element`.
 48 | 
 49 | Release v1.2.0
 50 | ==============
 51 | 
 52 | **New Features**
 53 | 
 54 | * Add the ability to write XML fragments using Token WriteTo functions.
 55 | * Add the ability to re-indent an XML element as though it were the root of
 56 |   the document.
 57 | * Add a ReadSettings option to preserve CDATA blocks when reading and XML
 58 |   document.
 59 | 
 60 | Release v1.1.4
 61 | ==============
 62 | 
 63 | **New Features**
 64 | 
 65 | * Add the ability to preserve whitespace in leaf elements during indent.
 66 | * Add the ability to suppress a document-trailing newline during indent.
 67 | * Add choice of XML attribute quoting style (single-quote or double-quote).
 68 | 
 69 | **Removed Features**
 70 | 
 71 | * Removed the CDATA preservation change introduced in v1.1.3. It was
 72 |   implemented in a way that broke the ability to process XML documents
 73 |   encoded using non-UTF8 character sets.
 74 | 
 75 | Release v1.1.3
 76 | ==============
 77 | 
 78 | * XML reads now preserve CDATA sections instead of converting them to
 79 |   standard character data.
 80 | 
 81 | Release v1.1.2
 82 | ==============
 83 | 
 84 | * Fixed a path parsing bug.
 85 | * The `Element.Text` function now handles comments embedded between
 86 |   character data spans.
 87 | 
 88 | Release v1.1.1
 89 | ==============
 90 | 
 91 | * Updated go version in `go.mod` to 1.20
 92 | 
 93 | Release v1.1.0
 94 | ==============
 95 | 
 96 | **New Features**
 97 | 
 98 | * New attribute helpers.
 99 |   * Added the `Element.SortAttrs` method, which lexicographically sorts an
100 |     element's attributes by key.
101 | * New `ReadSettings` properties.
102 |   * Added `Entity` for the support of custom entity maps.
103 | * New `WriteSettings` properties.
104 |   * Added `UseCRLF` to allow the output of CR-LF newlines instead of the
105 |     default LF newlines. This is useful on Windows systems.
106 | * Additional support for text and CDATA sections.
107 |   * The `Element.Text` method now returns the concatenation of all consecutive
108 |     character data tokens immediately following an element's opening tag.
109 |   * Added `Element.SetCData` to replace the character data immediately
110 |     following an element's opening tag with a CDATA section.
111 |   * Added `Element.CreateCData` to create and add a CDATA section child
112 |     `CharData` token to an element.
113 |   * Added `Element.CreateText` to create and add a child text `CharData` token
114 |     to an element.
115 |   * Added `NewCData` to create a parentless CDATA section `CharData` token.
116 |   * Added `NewText` to create a parentless text `CharData`
117 |     token.
118 |   * Added `CharData.IsCData` to detect if the token contains a CDATA section.
119 |   * Added `CharData.IsWhitespace` to detect if the token contains whitespace
120 |     inserted by one of the document Indent functions.
121 |   * Modified `Element.SetText` so that it replaces a run of consecutive
122 |     character data tokens following the element's opening tag (instead of just
123 |     the first one).
124 | * New "tail text" support.
125 |   * Added the `Element.Tail` method, which returns the text immediately
126 |     following an element's closing tag.
127 |   * Added the `Element.SetTail` method, which modifies the text immediately
128 |     following an element's closing tag.
129 | * New element child insertion and removal methods.
130 |   * Added the `Element.InsertChildAt` method, which inserts a new child token
131 |     before the specified child token index.
132 |   * Added the `Element.RemoveChildAt` method, which removes the child token at
133 |     the specified child token index.
134 | * New element and attribute queries.
135 |   * Added the `Element.Index` method, which returns the element's index within
136 |     its parent element's child token list.
137 |   * Added the `Element.NamespaceURI` method to return the namespace URI
138 |     associated with an element.
139 |   * Added the `Attr.NamespaceURI` method to return the namespace URI
140 |     associated with an element.
141 |   * Added the `Attr.Element` method to return the element that an attribute
142 |     belongs to.
143 | * New Path filter functions.
144 |   * Added `[local-name()='val']` to keep elements whose unprefixed tag matches
145 |     the desired value.
146 |   * Added `[name()='val']` to keep elements whose full tag matches the desired
147 |     value.
148 |   * Added `[namespace-prefix()='val']` to keep elements whose namespace prefix
149 |     matches the desired value.
150 |   * Added `[namespace-uri()='val']` to keep elements whose namespace URI
151 |     matches the desired value.
152 | 
153 | **Bug Fixes**
154 | 
155 | * A default XML `CharSetReader` is now used to prevent failed parsing of XML
156 |   documents using certain encodings.
157 |   ([Issue](https://github.com/beevik/etree/issues/53)).
158 | * All characters are now properly escaped according to XML parsing rules.
159 |   ([Issue](https://github.com/beevik/etree/issues/55)).
160 | * The `Document.Indent` and `Document.IndentTabs` functions no longer insert
161 |   empty string `CharData` tokens.
162 | 
163 | **Deprecated**
164 | 
165 | * `Element`
166 |     * The `InsertChild` method is deprecated. Use `InsertChildAt` instead.
167 |     * The `CreateCharData` method is deprecated. Use `CreateText` instead.
168 | * `CharData`
169 |     * The `NewCharData` method is deprecated. Use `NewText` instead.
170 | 
171 | 
172 | Release v1.0.1
173 | ==============
174 | 
175 | **Changes**
176 | 
177 | * Added support for absolute etree Path queries. An absolute path begins with
178 |   `/` or `//` and begins its search from the element's document root.
179 | * Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath)
180 |   and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath)
181 |   functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element)
182 |   type.
183 | 
184 | **Breaking changes**
185 | 
186 | * A path starting with `//` is now interpreted as an absolute path.
187 |   Previously, it was interpreted as a relative path starting from the element
188 |   whose
189 |   [`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement)
190 |   method was called.  To remain compatible with this release, all paths
191 |   prefixed with `//` should be prefixed with `.//` when called from any
192 |   element other than the document's root.
193 | * [**edit 2/1/2019**]: Minor releases should not contain breaking changes.
194 |   Even though this breaking change was very minor, it was a mistake to include
195 |   it in this minor release. In the future, all breaking changes will be
196 |   limited to major releases (e.g., version 2.0.0).
197 | 
198 | Release v1.0.0
199 | ==============
200 | 
201 | Initial release.
202 | 


--------------------------------------------------------------------------------
/etree.go:
--------------------------------------------------------------------------------
   1 | // Copyright 2015-2019 Brett Vickers.
   2 | // Use of this source code is governed by a BSD-style
   3 | // license that can be found in the LICENSE file.
   4 | 
   5 | // Package etree provides XML services through an Element Tree
   6 | // abstraction.
   7 | package etree
   8 | 
   9 | import (
  10 | 	"bufio"
  11 | 	"bytes"
  12 | 	"encoding/xml"
  13 | 	"errors"
  14 | 	"io"
  15 | 	"os"
  16 | 	"slices"
  17 | 	"strings"
  18 | )
  19 | 
  20 | const (
  21 | 	// NoIndent is used with the IndentSettings record to remove all
  22 | 	// indenting.
  23 | 	NoIndent = -1
  24 | )
  25 | 
  26 | // ErrXML is returned when XML parsing fails due to incorrect formatting.
  27 | var ErrXML = errors.New("etree: invalid XML format")
  28 | 
  29 | // cdataPrefix is used to detect CDATA text when ReadSettings.PreserveCData is
  30 | // true.
  31 | var cdataPrefix = []byte("<![CDATA[")
  32 | 
  33 | // ReadSettings determine the default behavior of the Document's ReadFrom*
  34 | // functions.
  35 | type ReadSettings struct {
  36 | 	// CharsetReader, if non-nil, defines a function to generate
  37 | 	// charset-conversion readers, converting from the provided non-UTF-8
  38 | 	// charset into UTF-8. If nil, the ReadFrom* functions will use a
  39 | 	// "pass-through" CharsetReader that performs no conversion on the reader's
  40 | 	// data regardless of the value of the "charset" encoding string. Default:
  41 | 	// nil.
  42 | 	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
  43 | 
  44 | 	// Permissive allows input containing common mistakes such as missing tags
  45 | 	// or attribute values. Default: false.
  46 | 	Permissive bool
  47 | 
  48 | 	// Preserve CDATA character data blocks when decoding XML (instead of
  49 | 	// converting it to normal character text). This entails additional
  50 | 	// processing and memory usage during ReadFrom* operations. Default:
  51 | 	// false.
  52 | 	PreserveCData bool
  53 | 
  54 | 	// When an element has two or more attributes with the same name,
  55 | 	// preserve them instead of keeping only one. Default: false.
  56 | 	PreserveDuplicateAttrs bool
  57 | 
  58 | 	// ValidateInput forces all ReadFrom* functions to validate that the
  59 | 	// provided input is composed of "well-formed"(*) XML before processing it.
  60 | 	// If invalid XML is detected, the ReadFrom* functions return an error.
  61 | 	// Because this option requires the input to be processed twice, it incurs a
  62 | 	// significant performance penalty. Default: false.
  63 | 	//
  64 | 	// (*) Note that this definition of "well-formed" is in the context of the
  65 | 	// go standard library's encoding/xml package. Go's encoding/xml package
  66 | 	// does not, in fact, guarantee well-formed XML as specified by the W3C XML
  67 | 	// recommendation. See: https://github.com/golang/go/issues/68299
  68 | 	ValidateInput bool
  69 | 
  70 | 	// Entity to be passed to standard xml.Decoder. Default: nil.
  71 | 	Entity map[string]string
  72 | 
  73 | 	// When Permissive is true, AutoClose indicates a set of elements to
  74 | 	// consider closed immediately after they are opened, regardless of
  75 | 	// whether an end element is present. Commonly set to xml.HTMLAutoClose.
  76 | 	// Default: nil.
  77 | 	AutoClose []string
  78 | }
  79 | 
  80 | // defaultCharsetReader is used by the xml decoder when the ReadSettings
  81 | // CharsetReader value is nil. It behaves as a "pass-through", ignoring
  82 | // the requested charset parameter and skipping conversion altogether.
  83 | func defaultCharsetReader(charset string, input io.Reader) (io.Reader, error) {
  84 | 	return input, nil
  85 | }
  86 | 
  87 | // dup creates a duplicate of the ReadSettings object.
  88 | func (s *ReadSettings) dup() ReadSettings {
  89 | 	var entityCopy map[string]string
  90 | 	if s.Entity != nil {
  91 | 		entityCopy = make(map[string]string)
  92 | 		for k, v := range s.Entity {
  93 | 			entityCopy[k] = v
  94 | 		}
  95 | 	}
  96 | 	return ReadSettings{
  97 | 		CharsetReader: s.CharsetReader,
  98 | 		Permissive:    s.Permissive,
  99 | 		Entity:        entityCopy,
 100 | 	}
 101 | }
 102 | 
 103 | // WriteSettings determine the behavior of the Document's WriteTo* functions.
 104 | type WriteSettings struct {
 105 | 	// CanonicalEndTags forces the production of XML end tags, even for
 106 | 	// elements that have no child elements. Default: false.
 107 | 	CanonicalEndTags bool
 108 | 
 109 | 	// CanonicalText forces the production of XML character references for
 110 | 	// text data characters &, <, and >. If false, XML character references
 111 | 	// are also produced for " and '. Default: false.
 112 | 	CanonicalText bool
 113 | 
 114 | 	// CanonicalAttrVal forces the production of XML character references for
 115 | 	// attribute value characters &, < and ". If false, XML character
 116 | 	// references are also produced for > and '. Ignored when AttrSingleQuote
 117 | 	// is true. Default: false.
 118 | 	CanonicalAttrVal bool
 119 | 
 120 | 	// AttrSingleQuote causes attributes to use single quotes (attr='example')
 121 | 	// instead of double quotes (attr = "example") when set to true. Default:
 122 | 	// false.
 123 | 	AttrSingleQuote bool
 124 | 
 125 | 	// UseCRLF causes the document's Indent* functions to use a carriage return
 126 | 	// followed by a linefeed ("\r\n") when outputting a newline. If false,
 127 | 	// only a linefeed is used ("\n"). Default: false.
 128 | 	//
 129 | 	// Deprecated: UseCRLF is deprecated. Use IndentSettings.UseCRLF instead.
 130 | 	UseCRLF bool
 131 | }
 132 | 
 133 | // dup creates a duplicate of the WriteSettings object.
 134 | func (s *WriteSettings) dup() WriteSettings {
 135 | 	return *s
 136 | }
 137 | 
 138 | // IndentSettings determine the behavior of the Document's Indent* functions.
 139 | type IndentSettings struct {
 140 | 	// Spaces indicates the number of spaces to insert for each level of
 141 | 	// indentation. Set to etree.NoIndent to remove all indentation. Ignored
 142 | 	// when UseTabs is true. Default: 4.
 143 | 	Spaces int
 144 | 
 145 | 	// UseTabs causes tabs to be used instead of spaces when indenting.
 146 | 	// Default: false.
 147 | 	UseTabs bool
 148 | 
 149 | 	// UseCRLF causes newlines to be written as a carriage return followed by
 150 | 	// a linefeed ("\r\n"). If false, only a linefeed character is output
 151 | 	// for a newline ("\n"). Default: false.
 152 | 	UseCRLF bool
 153 | 
 154 | 	// PreserveLeafWhitespace causes indent functions to preserve whitespace
 155 | 	// within XML elements containing only non-CDATA character data. Default:
 156 | 	// false.
 157 | 	PreserveLeafWhitespace bool
 158 | 
 159 | 	// SuppressTrailingWhitespace suppresses the generation of a trailing
 160 | 	// whitespace characters (such as newlines) at the end of the indented
 161 | 	// document. Default: false.
 162 | 	SuppressTrailingWhitespace bool
 163 | }
 164 | 
 165 | // NewIndentSettings creates a default IndentSettings record.
 166 | func NewIndentSettings() *IndentSettings {
 167 | 	return &IndentSettings{
 168 | 		Spaces:                     4,
 169 | 		UseTabs:                    false,
 170 | 		UseCRLF:                    false,
 171 | 		PreserveLeafWhitespace:     false,
 172 | 		SuppressTrailingWhitespace: false,
 173 | 	}
 174 | }
 175 | 
 176 | type indentFunc func(depth int) string
 177 | 
 178 | func getIndentFunc(s *IndentSettings) indentFunc {
 179 | 	if s.UseTabs {
 180 | 		if s.UseCRLF {
 181 | 			return func(depth int) string { return indentCRLF(depth, indentTabs) }
 182 | 		} else {
 183 | 			return func(depth int) string { return indentLF(depth, indentTabs) }
 184 | 		}
 185 | 	} else {
 186 | 		if s.Spaces < 0 {
 187 | 			return func(depth int) string { return "" }
 188 | 		} else if s.UseCRLF {
 189 | 			return func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) }
 190 | 		} else {
 191 | 			return func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) }
 192 | 		}
 193 | 	}
 194 | }
 195 | 
 196 | // Writer is the interface that wraps the Write* functions called by each token
 197 | // type's WriteTo function.
 198 | type Writer interface {
 199 | 	io.StringWriter
 200 | 	io.ByteWriter
 201 | 	io.Writer
 202 | }
 203 | 
 204 | // A Token is an interface type used to represent XML elements, character
 205 | // data, CDATA sections, XML comments, XML directives, and XML processing
 206 | // instructions.
 207 | type Token interface {
 208 | 	Parent() *Element
 209 | 	Index() int
 210 | 	WriteTo(w Writer, s *WriteSettings)
 211 | 	dup(parent *Element) Token
 212 | 	setParent(parent *Element)
 213 | 	setIndex(index int)
 214 | }
 215 | 
 216 | // A Document is a container holding a complete XML tree.
 217 | //
 218 | // A document has a single embedded element, which contains zero or more child
 219 | // tokens, one of which is usually the root element. The embedded element may
 220 | // include other children such as processing instruction tokens or character
 221 | // data tokens. The document's embedded element is never directly serialized;
 222 | // only its children are.
 223 | //
 224 | // A document also contains read and write settings, which influence the way
 225 | // the document is deserialized, serialized, and indented.
 226 | type Document struct {
 227 | 	Element
 228 | 	ReadSettings  ReadSettings
 229 | 	WriteSettings WriteSettings
 230 | }
 231 | 
 232 | // An Element represents an XML element, its attributes, and its child tokens.
 233 | type Element struct {
 234 | 	Space, Tag string   // namespace prefix and tag
 235 | 	Attr       []Attr   // key-value attribute pairs
 236 | 	Child      []Token  // child tokens (elements, comments, etc.)
 237 | 	parent     *Element // parent element
 238 | 	index      int      // token index in parent's children
 239 | }
 240 | 
 241 | // An Attr represents a key-value attribute within an XML element.
 242 | type Attr struct {
 243 | 	Space, Key string   // The attribute's namespace prefix and key
 244 | 	Value      string   // The attribute value string
 245 | 	element    *Element // element containing the attribute
 246 | }
 247 | 
 248 | // charDataFlags are used with CharData tokens to store additional settings.
 249 | type charDataFlags uint8
 250 | 
 251 | const (
 252 | 	// The CharData contains only whitespace.
 253 | 	whitespaceFlag charDataFlags = 1 << iota
 254 | 
 255 | 	// The CharData contains a CDATA section.
 256 | 	cdataFlag
 257 | )
 258 | 
 259 | // CharData may be used to represent simple text data or a CDATA section
 260 | // within an XML document. The Data property should never be modified
 261 | // directly; use the SetData function instead.
 262 | type CharData struct {
 263 | 	Data   string // the simple text or CDATA section content
 264 | 	parent *Element
 265 | 	index  int
 266 | 	flags  charDataFlags
 267 | }
 268 | 
 269 | // A Comment represents an XML comment.
 270 | type Comment struct {
 271 | 	Data   string // the comment's text
 272 | 	parent *Element
 273 | 	index  int
 274 | }
 275 | 
 276 | // A Directive represents an XML directive.
 277 | type Directive struct {
 278 | 	Data   string // the directive string
 279 | 	parent *Element
 280 | 	index  int
 281 | }
 282 | 
 283 | // A ProcInst represents an XML processing instruction.
 284 | type ProcInst struct {
 285 | 	Target string // the processing instruction target
 286 | 	Inst   string // the processing instruction value
 287 | 	parent *Element
 288 | 	index  int
 289 | }
 290 | 
 291 | // NewDocument creates an XML document without a root element.
 292 | func NewDocument() *Document {
 293 | 	return &Document{
 294 | 		Element: Element{Child: make([]Token, 0)},
 295 | 	}
 296 | }
 297 | 
 298 | // NewDocumentWithRoot creates an XML document and sets the element 'e' as its
 299 | // root element. If the element 'e' is already part of another document, it is
 300 | // first removed from its existing document.
 301 | func NewDocumentWithRoot(e *Element) *Document {
 302 | 	d := NewDocument()
 303 | 	d.SetRoot(e)
 304 | 	return d
 305 | }
 306 | 
 307 | // Copy returns a recursive, deep copy of the document.
 308 | func (d *Document) Copy() *Document {
 309 | 	return &Document{
 310 | 		Element:       *(d.Element.dup(nil).(*Element)),
 311 | 		ReadSettings:  d.ReadSettings.dup(),
 312 | 		WriteSettings: d.WriteSettings.dup(),
 313 | 	}
 314 | }
 315 | 
 316 | // Root returns the root element of the document. It returns nil if there is
 317 | // no root element.
 318 | func (d *Document) Root() *Element {
 319 | 	for _, t := range d.Child {
 320 | 		if c, ok := t.(*Element); ok {
 321 | 			return c
 322 | 		}
 323 | 	}
 324 | 	return nil
 325 | }
 326 | 
 327 | // SetRoot replaces the document's root element with the element 'e'. If the
 328 | // document already has a root element when this function is called, then the
 329 | // existing root element is unbound from the document. If the element 'e' is
 330 | // part of another document, then it is unbound from the other document.
 331 | func (d *Document) SetRoot(e *Element) {
 332 | 	if e.parent != nil {
 333 | 		e.parent.RemoveChild(e)
 334 | 	}
 335 | 
 336 | 	// If there is already a root element, replace it.
 337 | 	p := &d.Element
 338 | 	for i, t := range p.Child {
 339 | 		if _, ok := t.(*Element); ok {
 340 | 			t.setParent(nil)
 341 | 			t.setIndex(-1)
 342 | 			p.Child[i] = e
 343 | 			e.setParent(p)
 344 | 			e.setIndex(i)
 345 | 			return
 346 | 		}
 347 | 	}
 348 | 
 349 | 	// No existing root element, so add it.
 350 | 	p.addChild(e)
 351 | }
 352 | 
 353 | // ReadFrom reads XML from the reader 'r' into this document. The function
 354 | // returns the number of bytes read and any error encountered.
 355 | func (d *Document) ReadFrom(r io.Reader) (n int64, err error) {
 356 | 	if d.ReadSettings.ValidateInput {
 357 | 		b, err := io.ReadAll(r)
 358 | 		if err != nil {
 359 | 			return 0, err
 360 | 		}
 361 | 		if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil {
 362 | 			return 0, err
 363 | 		}
 364 | 		r = bytes.NewReader(b)
 365 | 	}
 366 | 	return d.Element.readFrom(r, d.ReadSettings)
 367 | }
 368 | 
 369 | // ReadFromFile reads XML from a local file at path 'filepath' into this
 370 | // document.
 371 | func (d *Document) ReadFromFile(filepath string) error {
 372 | 	f, err := os.Open(filepath)
 373 | 	if err != nil {
 374 | 		return err
 375 | 	}
 376 | 	defer f.Close()
 377 | 
 378 | 	_, err = d.ReadFrom(f)
 379 | 	return err
 380 | }
 381 | 
 382 | // ReadFromBytes reads XML from the byte slice 'b' into the this document.
 383 | func (d *Document) ReadFromBytes(b []byte) error {
 384 | 	if d.ReadSettings.ValidateInput {
 385 | 		if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil {
 386 | 			return err
 387 | 		}
 388 | 	}
 389 | 	_, err := d.Element.readFrom(bytes.NewReader(b), d.ReadSettings)
 390 | 	return err
 391 | }
 392 | 
 393 | // ReadFromString reads XML from the string 's' into this document.
 394 | func (d *Document) ReadFromString(s string) error {
 395 | 	if d.ReadSettings.ValidateInput {
 396 | 		if err := validateXML(strings.NewReader(s), d.ReadSettings); err != nil {
 397 | 			return err
 398 | 		}
 399 | 	}
 400 | 	_, err := d.Element.readFrom(strings.NewReader(s), d.ReadSettings)
 401 | 	return err
 402 | }
 403 | 
 404 | // validateXML determines if the data read from the reader 'r' contains
 405 | // well-formed XML according to the rules set by the go xml package.
 406 | func validateXML(r io.Reader, settings ReadSettings) error {
 407 | 	dec := newDecoder(r, settings)
 408 | 	err := dec.Decode(new(interface{}))
 409 | 	if err != nil {
 410 | 		return err
 411 | 	}
 412 | 
 413 | 	// If there are any trailing tokens after unmarshalling with Decode(),
 414 | 	// then the XML input didn't terminate properly.
 415 | 	_, err = dec.Token()
 416 | 	if err == io.EOF {
 417 | 		return nil
 418 | 	}
 419 | 	return ErrXML
 420 | }
 421 | 
 422 | // newDecoder creates an XML decoder for the reader 'r' configured using
 423 | // the provided read settings.
 424 | func newDecoder(r io.Reader, settings ReadSettings) *xml.Decoder {
 425 | 	d := xml.NewDecoder(r)
 426 | 	d.CharsetReader = settings.CharsetReader
 427 | 	if d.CharsetReader == nil {
 428 | 		d.CharsetReader = defaultCharsetReader
 429 | 	}
 430 | 	d.Strict = !settings.Permissive
 431 | 	d.Entity = settings.Entity
 432 | 	d.AutoClose = settings.AutoClose
 433 | 	return d
 434 | }
 435 | 
 436 | // WriteTo serializes the document out to the writer 'w'. The function returns
 437 | // the number of bytes written and any error encountered.
 438 | func (d *Document) WriteTo(w io.Writer) (n int64, err error) {
 439 | 	xw := newXmlWriter(w)
 440 | 	b := bufio.NewWriter(xw)
 441 | 	for _, c := range d.Child {
 442 | 		c.WriteTo(b, &d.WriteSettings)
 443 | 	}
 444 | 	err, n = b.Flush(), xw.bytes
 445 | 	return
 446 | }
 447 | 
 448 | // WriteToFile serializes the document out to the file at path 'filepath'.
 449 | func (d *Document) WriteToFile(filepath string) error {
 450 | 	f, err := os.Create(filepath)
 451 | 	if err != nil {
 452 | 		return err
 453 | 	}
 454 | 	defer f.Close()
 455 | 	_, err = d.WriteTo(f)
 456 | 	return err
 457 | }
 458 | 
 459 | // WriteToBytes serializes this document into a slice of bytes.
 460 | func (d *Document) WriteToBytes() (b []byte, err error) {
 461 | 	var buf bytes.Buffer
 462 | 	if _, err = d.WriteTo(&buf); err != nil {
 463 | 		return
 464 | 	}
 465 | 	return buf.Bytes(), nil
 466 | }
 467 | 
 468 | // WriteToString serializes this document into a string.
 469 | func (d *Document) WriteToString() (s string, err error) {
 470 | 	var b []byte
 471 | 	if b, err = d.WriteToBytes(); err != nil {
 472 | 		return
 473 | 	}
 474 | 	return string(b), nil
 475 | }
 476 | 
 477 | // Indent modifies the document's element tree by inserting character data
 478 | // tokens containing newlines and spaces for indentation. The amount of
 479 | // indentation per depth level is given by the 'spaces' parameter. Other than
 480 | // the number of spaces, default IndentSettings are used.
 481 | func (d *Document) Indent(spaces int) {
 482 | 	s := NewIndentSettings()
 483 | 	s.Spaces = spaces
 484 | 	d.IndentWithSettings(s)
 485 | }
 486 | 
 487 | // IndentTabs modifies the document's element tree by inserting CharData
 488 | // tokens containing newlines and tabs for indentation. One tab is used per
 489 | // indentation level. Other than the use of tabs, default IndentSettings
 490 | // are used.
 491 | func (d *Document) IndentTabs() {
 492 | 	s := NewIndentSettings()
 493 | 	s.UseTabs = true
 494 | 	d.IndentWithSettings(s)
 495 | }
 496 | 
 497 | // IndentWithSettings modifies the document's element tree by inserting
 498 | // character data tokens containing newlines and indentation. The behavior
 499 | // of the indentation algorithm is configured by the indent settings.
 500 | func (d *Document) IndentWithSettings(s *IndentSettings) {
 501 | 	// WriteSettings.UseCRLF is deprecated. Until removed from the package, it
 502 | 	// overrides IndentSettings.UseCRLF when true.
 503 | 	if d.WriteSettings.UseCRLF {
 504 | 		s.UseCRLF = true
 505 | 	}
 506 | 
 507 | 	d.Element.indent(0, getIndentFunc(s), s)
 508 | 
 509 | 	if s.SuppressTrailingWhitespace {
 510 | 		d.Element.stripTrailingWhitespace()
 511 | 	}
 512 | }
 513 | 
 514 | // Unindent modifies the document's element tree by removing character data
 515 | // tokens containing only whitespace. Other than the removal of indentation,
 516 | // default IndentSettings are used.
 517 | func (d *Document) Unindent() {
 518 | 	s := NewIndentSettings()
 519 | 	s.Spaces = NoIndent
 520 | 	d.IndentWithSettings(s)
 521 | }
 522 | 
 523 | // NewElement creates an unparented element with the specified tag (i.e.,
 524 | // name). The tag may include a namespace prefix followed by a colon.
 525 | func NewElement(tag string) *Element {
 526 | 	space, stag := spaceDecompose(tag)
 527 | 	return newElement(space, stag, nil)
 528 | }
 529 | 
 530 | // newElement is a helper function that creates an element and binds it to
 531 | // a parent element if possible.
 532 | func newElement(space, tag string, parent *Element) *Element {
 533 | 	e := &Element{
 534 | 		Space:  space,
 535 | 		Tag:    tag,
 536 | 		Attr:   make([]Attr, 0),
 537 | 		Child:  make([]Token, 0),
 538 | 		parent: parent,
 539 | 		index:  -1,
 540 | 	}
 541 | 	if parent != nil {
 542 | 		parent.addChild(e)
 543 | 	}
 544 | 	return e
 545 | }
 546 | 
 547 | // Copy creates a recursive, deep copy of the element and all its attributes
 548 | // and children. The returned element has no parent but can be parented to a
 549 | // another element using AddChild, or added to a document with SetRoot or
 550 | // NewDocumentWithRoot.
 551 | func (e *Element) Copy() *Element {
 552 | 	return e.dup(nil).(*Element)
 553 | }
 554 | 
 555 | // FullTag returns the element e's complete tag, including namespace prefix if
 556 | // present.
 557 | func (e *Element) FullTag() string {
 558 | 	if e.Space == "" {
 559 | 		return e.Tag
 560 | 	}
 561 | 	return e.Space + ":" + e.Tag
 562 | }
 563 | 
 564 | // NamespaceURI returns the XML namespace URI associated with the element. If
 565 | // the element is part of the XML default namespace, NamespaceURI returns the
 566 | // empty string.
 567 | func (e *Element) NamespaceURI() string {
 568 | 	if e.Space == "" {
 569 | 		return e.findDefaultNamespaceURI()
 570 | 	}
 571 | 	return e.findLocalNamespaceURI(e.Space)
 572 | }
 573 | 
 574 | // findLocalNamespaceURI finds the namespace URI corresponding to the
 575 | // requested prefix.
 576 | func (e *Element) findLocalNamespaceURI(prefix string) string {
 577 | 	for _, a := range e.Attr {
 578 | 		if a.Space == "xmlns" && a.Key == prefix {
 579 | 			return a.Value
 580 | 		}
 581 | 	}
 582 | 
 583 | 	if e.parent == nil {
 584 | 		return ""
 585 | 	}
 586 | 
 587 | 	return e.parent.findLocalNamespaceURI(prefix)
 588 | }
 589 | 
 590 | // findDefaultNamespaceURI finds the default namespace URI of the element.
 591 | func (e *Element) findDefaultNamespaceURI() string {
 592 | 	for _, a := range e.Attr {
 593 | 		if a.Space == "" && a.Key == "xmlns" {
 594 | 			return a.Value
 595 | 		}
 596 | 	}
 597 | 
 598 | 	if e.parent == nil {
 599 | 		return ""
 600 | 	}
 601 | 
 602 | 	return e.parent.findDefaultNamespaceURI()
 603 | }
 604 | 
 605 | // namespacePrefix returns the namespace prefix associated with the element.
 606 | func (e *Element) namespacePrefix() string {
 607 | 	return e.Space
 608 | }
 609 | 
 610 | // name returns the tag associated with the element.
 611 | func (e *Element) name() string {
 612 | 	return e.Tag
 613 | }
 614 | 
 615 | // ReindexChildren recalculates the index values of the element's child
 616 | // tokens. This is necessary only if you have manually manipulated the
 617 | // element's `Child` array.
 618 | func (e *Element) ReindexChildren() {
 619 | 	for i := 0; i < len(e.Child); i++ {
 620 | 		e.Child[i].setIndex(i)
 621 | 	}
 622 | }
 623 | 
 624 | // Text returns all character data immediately following the element's opening
 625 | // tag.
 626 | func (e *Element) Text() string {
 627 | 	if len(e.Child) == 0 {
 628 | 		return ""
 629 | 	}
 630 | 
 631 | 	text := ""
 632 | 	for _, ch := range e.Child {
 633 | 		if cd, ok := ch.(*CharData); ok {
 634 | 			if text == "" {
 635 | 				text = cd.Data
 636 | 			} else {
 637 | 				text += cd.Data
 638 | 			}
 639 | 		} else if _, ok := ch.(*Comment); ok {
 640 | 			// ignore
 641 | 		} else {
 642 | 			break
 643 | 		}
 644 | 	}
 645 | 	return text
 646 | }
 647 | 
 648 | // SetText replaces all character data immediately following an element's
 649 | // opening tag with the requested string.
 650 | func (e *Element) SetText(text string) {
 651 | 	e.replaceText(0, text, 0)
 652 | }
 653 | 
 654 | // SetCData replaces all character data immediately following an element's
 655 | // opening tag with a CDATA section.
 656 | func (e *Element) SetCData(text string) {
 657 | 	e.replaceText(0, text, cdataFlag)
 658 | }
 659 | 
 660 | // Tail returns all character data immediately following the element's end
 661 | // tag.
 662 | func (e *Element) Tail() string {
 663 | 	if e.Parent() == nil {
 664 | 		return ""
 665 | 	}
 666 | 
 667 | 	p := e.Parent()
 668 | 	i := e.Index()
 669 | 
 670 | 	text := ""
 671 | 	for _, ch := range p.Child[i+1:] {
 672 | 		if cd, ok := ch.(*CharData); ok {
 673 | 			if text == "" {
 674 | 				text = cd.Data
 675 | 			} else {
 676 | 				text += cd.Data
 677 | 			}
 678 | 		} else {
 679 | 			break
 680 | 		}
 681 | 	}
 682 | 	return text
 683 | }
 684 | 
 685 | // SetTail replaces all character data immediately following the element's end
 686 | // tag with the requested string.
 687 | func (e *Element) SetTail(text string) {
 688 | 	if e.Parent() == nil {
 689 | 		return
 690 | 	}
 691 | 
 692 | 	p := e.Parent()
 693 | 	p.replaceText(e.Index()+1, text, 0)
 694 | }
 695 | 
 696 | // replaceText is a helper function that replaces a series of chardata tokens
 697 | // starting at index i with the requested text.
 698 | func (e *Element) replaceText(i int, text string, flags charDataFlags) {
 699 | 	end := e.findTermCharDataIndex(i)
 700 | 
 701 | 	switch {
 702 | 	case end == i:
 703 | 		if text != "" {
 704 | 			// insert a new chardata token at index i
 705 | 			cd := newCharData(text, flags, nil)
 706 | 			e.InsertChildAt(i, cd)
 707 | 		}
 708 | 
 709 | 	case end == i+1:
 710 | 		if text == "" {
 711 | 			// remove the chardata token at index i
 712 | 			e.RemoveChildAt(i)
 713 | 		} else {
 714 | 			// replace the first and only character token at index i
 715 | 			cd := e.Child[i].(*CharData)
 716 | 			cd.Data, cd.flags = text, flags
 717 | 		}
 718 | 
 719 | 	default:
 720 | 		if text == "" {
 721 | 			// remove all chardata tokens starting from index i
 722 | 			copy(e.Child[i:], e.Child[end:])
 723 | 			removed := end - i
 724 | 			e.Child = e.Child[:len(e.Child)-removed]
 725 | 			for j := i; j < len(e.Child); j++ {
 726 | 				e.Child[j].setIndex(j)
 727 | 			}
 728 | 		} else {
 729 | 			// replace the first chardata token at index i and remove all
 730 | 			// subsequent chardata tokens
 731 | 			cd := e.Child[i].(*CharData)
 732 | 			cd.Data, cd.flags = text, flags
 733 | 			copy(e.Child[i+1:], e.Child[end:])
 734 | 			removed := end - (i + 1)
 735 | 			e.Child = e.Child[:len(e.Child)-removed]
 736 | 			for j := i + 1; j < len(e.Child); j++ {
 737 | 				e.Child[j].setIndex(j)
 738 | 			}
 739 | 		}
 740 | 	}
 741 | }
 742 | 
 743 | // findTermCharDataIndex finds the index of the first child token that isn't
 744 | // a CharData token. It starts from the requested start index.
 745 | func (e *Element) findTermCharDataIndex(start int) int {
 746 | 	for i := start; i < len(e.Child); i++ {
 747 | 		if _, ok := e.Child[i].(*CharData); !ok {
 748 | 			return i
 749 | 		}
 750 | 	}
 751 | 	return len(e.Child)
 752 | }
 753 | 
 754 | // CreateElement creates a new element with the specified tag (i.e., name) and
 755 | // adds it as the last child of element 'e'. The tag may include a prefix
 756 | // followed by a colon.
 757 | func (e *Element) CreateElement(tag string) *Element {
 758 | 	space, stag := spaceDecompose(tag)
 759 | 	return newElement(space, stag, e)
 760 | }
 761 | 
 762 | // CreateChild performs the same task as CreateElement but calls a
 763 | // continuation function after the child element is created, allowing
 764 | // additional actions to be performed on the child element before returning.
 765 | //
 766 | // This method of element creation is particularly useful when building nested
 767 | // XML documents from code. For example:
 768 | //
 769 | //	org := doc.CreateChild("organization", func(e *Element) {
 770 | //		e.CreateComment("Mary")
 771 | //		e.CreateChild("person", func(e *Element) {
 772 | //			e.CreateAttr("name", "Mary")
 773 | //			e.CreateAttr("age", "30")
 774 | //			e.CreateAttr("hair", "brown")
 775 | //		})
 776 | //	})
 777 | func (e *Element) CreateChild(tag string, cont func(e *Element)) *Element {
 778 | 	child := e.CreateElement(tag)
 779 | 	cont(child)
 780 | 	return child
 781 | }
 782 | 
 783 | // AddChild adds the token 't' as the last child of the element. If token 't'
 784 | // was already the child of another element, it is first removed from its
 785 | // parent element.
 786 | func (e *Element) AddChild(t Token) {
 787 | 	if t.Parent() != nil {
 788 | 		t.Parent().RemoveChild(t)
 789 | 	}
 790 | 	e.addChild(t)
 791 | }
 792 | 
 793 | // InsertChild inserts the token 't' into this element's list of children just
 794 | // before the element's existing child token 'ex'. If the existing element
 795 | // 'ex' does not appear in this element's list of child tokens, then 't' is
 796 | // added to the end of this element's list of child tokens. If token 't' is
 797 | // already the child of another element, it is first removed from the other
 798 | // element's list of child tokens.
 799 | //
 800 | // Deprecated: InsertChild is deprecated. Use InsertChildAt instead.
 801 | func (e *Element) InsertChild(ex Token, t Token) {
 802 | 	if ex == nil || ex.Parent() != e {
 803 | 		e.AddChild(t)
 804 | 		return
 805 | 	}
 806 | 
 807 | 	if t.Parent() != nil {
 808 | 		t.Parent().RemoveChild(t)
 809 | 	}
 810 | 
 811 | 	t.setParent(e)
 812 | 
 813 | 	i := ex.Index()
 814 | 	e.Child = append(e.Child, nil)
 815 | 	copy(e.Child[i+1:], e.Child[i:])
 816 | 	e.Child[i] = t
 817 | 
 818 | 	for j := i; j < len(e.Child); j++ {
 819 | 		e.Child[j].setIndex(j)
 820 | 	}
 821 | }
 822 | 
 823 | // InsertChildAt inserts the token 't' into this element's list of child
 824 | // tokens just before the requested 'index'. If the index is greater than or
 825 | // equal to the length of the list of child tokens, then the token 't' is
 826 | // added to the end of the list of child tokens.
 827 | func (e *Element) InsertChildAt(index int, t Token) {
 828 | 	if index >= len(e.Child) {
 829 | 		e.AddChild(t)
 830 | 		return
 831 | 	}
 832 | 
 833 | 	if t.Parent() != nil {
 834 | 		if t.Parent() == e && t.Index() < index {
 835 | 			index--
 836 | 		}
 837 | 		t.Parent().RemoveChild(t)
 838 | 	}
 839 | 
 840 | 	t.setParent(e)
 841 | 
 842 | 	e.Child = append(e.Child, nil)
 843 | 	copy(e.Child[index+1:], e.Child[index:])
 844 | 	e.Child[index] = t
 845 | 
 846 | 	for j := index; j < len(e.Child); j++ {
 847 | 		e.Child[j].setIndex(j)
 848 | 	}
 849 | }
 850 | 
 851 | // RemoveChild attempts to remove the token 't' from this element's list of
 852 | // child tokens. If the token 't' was a child of this element, then it is
 853 | // removed and returned. Otherwise, nil is returned.
 854 | func (e *Element) RemoveChild(t Token) Token {
 855 | 	if t.Parent() != e {
 856 | 		return nil
 857 | 	}
 858 | 	return e.RemoveChildAt(t.Index())
 859 | }
 860 | 
 861 | // RemoveChildAt removes the child token appearing in slot 'index' of this
 862 | // element's list of child tokens. The removed child token is then returned.
 863 | // If the index is out of bounds, no child is removed and nil is returned.
 864 | func (e *Element) RemoveChildAt(index int) Token {
 865 | 	if index >= len(e.Child) {
 866 | 		return nil
 867 | 	}
 868 | 
 869 | 	t := e.Child[index]
 870 | 	for j := index + 1; j < len(e.Child); j++ {
 871 | 		e.Child[j].setIndex(j - 1)
 872 | 	}
 873 | 	e.Child = append(e.Child[:index], e.Child[index+1:]...)
 874 | 	t.setIndex(-1)
 875 | 	t.setParent(nil)
 876 | 	return t
 877 | }
 878 | 
 879 | // autoClose analyzes the stack's top element and the current token to decide
 880 | // whether the top element should be closed.
 881 | func (e *Element) autoClose(stack *stack[*Element], t xml.Token, tags []string) {
 882 | 	if stack.empty() {
 883 | 		return
 884 | 	}
 885 | 
 886 | 	top := stack.peek()
 887 | 
 888 | 	for _, tag := range tags {
 889 | 		if strings.EqualFold(tag, top.FullTag()) {
 890 | 			if e, ok := t.(xml.EndElement); !ok ||
 891 | 				!strings.EqualFold(e.Name.Space, top.Space) ||
 892 | 				!strings.EqualFold(e.Name.Local, top.Tag) {
 893 | 				stack.pop()
 894 | 			}
 895 | 			break
 896 | 		}
 897 | 	}
 898 | }
 899 | 
 900 | // ReadFrom reads XML from the reader 'ri' and stores the result as a new
 901 | // child of this element.
 902 | func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) {
 903 | 	var r xmlReader
 904 | 	var pr *xmlPeekReader
 905 | 	if settings.PreserveCData {
 906 | 		pr = newXmlPeekReader(ri)
 907 | 		r = pr
 908 | 	} else {
 909 | 		r = newXmlSimpleReader(ri)
 910 | 	}
 911 | 
 912 | 	attrCheck := make(map[xml.Name]int)
 913 | 	dec := newDecoder(r, settings)
 914 | 
 915 | 	var stack stack[*Element]
 916 | 	stack.push(e)
 917 | 	for {
 918 | 		if pr != nil {
 919 | 			pr.PeekPrepare(dec.InputOffset(), len(cdataPrefix))
 920 | 		}
 921 | 
 922 | 		t, err := dec.RawToken()
 923 | 
 924 | 		if settings.Permissive && settings.AutoClose != nil {
 925 | 			e.autoClose(&stack, t, settings.AutoClose)
 926 | 		}
 927 | 
 928 | 		switch {
 929 | 		case err == io.EOF:
 930 | 			if len(stack.data) != 1 {
 931 | 				return r.Bytes(), ErrXML
 932 | 			}
 933 | 			return r.Bytes(), nil
 934 | 		case err != nil:
 935 | 			return r.Bytes(), err
 936 | 		case stack.empty():
 937 | 			return r.Bytes(), ErrXML
 938 | 		}
 939 | 
 940 | 		top := stack.peek()
 941 | 
 942 | 		switch t := t.(type) {
 943 | 		case xml.StartElement:
 944 | 			e := newElement(t.Name.Space, t.Name.Local, top)
 945 | 			if settings.PreserveDuplicateAttrs || len(t.Attr) < 2 {
 946 | 				for _, a := range t.Attr {
 947 | 					e.addAttr(a.Name.Space, a.Name.Local, a.Value)
 948 | 				}
 949 | 			} else {
 950 | 				for _, a := range t.Attr {
 951 | 					if i, contains := attrCheck[a.Name]; contains {
 952 | 						e.Attr[i].Value = a.Value
 953 | 					} else {
 954 | 						attrCheck[a.Name] = e.addAttr(a.Name.Space, a.Name.Local, a.Value)
 955 | 					}
 956 | 				}
 957 | 				clear(attrCheck)
 958 | 			}
 959 | 			stack.push(e)
 960 | 		case xml.EndElement:
 961 | 			if top.Tag != t.Name.Local || top.Space != t.Name.Space {
 962 | 				return r.Bytes(), ErrXML
 963 | 			}
 964 | 			stack.pop()
 965 | 		case xml.CharData:
 966 | 			data := string(t)
 967 | 			var flags charDataFlags
 968 | 			if pr != nil {
 969 | 				peekBuf := pr.PeekFinalize()
 970 | 				if bytes.Equal(peekBuf, cdataPrefix) {
 971 | 					flags = cdataFlag
 972 | 				} else if isWhitespace(data) {
 973 | 					flags = whitespaceFlag
 974 | 				}
 975 | 			} else {
 976 | 				if isWhitespace(data) {
 977 | 					flags = whitespaceFlag
 978 | 				}
 979 | 			}
 980 | 			newCharData(data, flags, top)
 981 | 		case xml.Comment:
 982 | 			newComment(string(t), top)
 983 | 		case xml.Directive:
 984 | 			newDirective(string(t), top)
 985 | 		case xml.ProcInst:
 986 | 			newProcInst(t.Target, string(t.Inst), top)
 987 | 		}
 988 | 	}
 989 | }
 990 | 
 991 | // SelectAttr finds an element attribute matching the requested 'key' and, if
 992 | // found, returns a pointer to the matching attribute. The function returns
 993 | // nil if no matching attribute is found. The key may include a namespace
 994 | // prefix followed by a colon.
 995 | func (e *Element) SelectAttr(key string) *Attr {
 996 | 	space, skey := spaceDecompose(key)
 997 | 	for i, a := range e.Attr {
 998 | 		if spaceMatch(space, a.Space) && skey == a.Key {
 999 | 			return &e.Attr[i]
1000 | 		}
1001 | 	}
1002 | 	return nil
1003 | }
1004 | 
1005 | // SelectAttrValue finds an element attribute matching the requested 'key' and
1006 | // returns its value if found. If no matching attribute is found, the function
1007 | // returns the 'dflt' value instead. The key may include a namespace prefix
1008 | // followed by a colon.
1009 | func (e *Element) SelectAttrValue(key, dflt string) string {
1010 | 	space, skey := spaceDecompose(key)
1011 | 	for _, a := range e.Attr {
1012 | 		if spaceMatch(space, a.Space) && skey == a.Key {
1013 | 			return a.Value
1014 | 		}
1015 | 	}
1016 | 	return dflt
1017 | }
1018 | 
1019 | // ChildElements returns all elements that are children of this element.
1020 | func (e *Element) ChildElements() []*Element {
1021 | 	var elements []*Element
1022 | 	for _, t := range e.Child {
1023 | 		if c, ok := t.(*Element); ok {
1024 | 			elements = append(elements, c)
1025 | 		}
1026 | 	}
1027 | 	return elements
1028 | }
1029 | 
1030 | // SelectElement returns the first child element with the given 'tag' (i.e.,
1031 | // name). The function returns nil if no child element matching the tag is
1032 | // found. The tag may include a namespace prefix followed by a colon.
1033 | func (e *Element) SelectElement(tag string) *Element {
1034 | 	space, stag := spaceDecompose(tag)
1035 | 	for _, t := range e.Child {
1036 | 		if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
1037 | 			return c
1038 | 		}
1039 | 	}
1040 | 	return nil
1041 | }
1042 | 
1043 | // SelectElements returns a slice of all child elements with the given 'tag'
1044 | // (i.e., name). The tag may include a namespace prefix followed by a colon.
1045 | func (e *Element) SelectElements(tag string) []*Element {
1046 | 	space, stag := spaceDecompose(tag)
1047 | 	var elements []*Element
1048 | 	for _, t := range e.Child {
1049 | 		if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
1050 | 			elements = append(elements, c)
1051 | 		}
1052 | 	}
1053 | 	return elements
1054 | }
1055 | 
1056 | // FindElement returns the first element matched by the XPath-like 'path'
1057 | // string. The function returns nil if no child element is found using the
1058 | // path. It panics if an invalid path string is supplied.
1059 | func (e *Element) FindElement(path string) *Element {
1060 | 	return e.FindElementPath(MustCompilePath(path))
1061 | }
1062 | 
1063 | // FindElementPath returns the first element matched by the 'path' object. The
1064 | // function returns nil if no element is found using the path.
1065 | func (e *Element) FindElementPath(path Path) *Element {
1066 | 	p := newPather()
1067 | 	elements := p.traverse(e, path)
1068 | 	if len(elements) > 0 {
1069 | 		return elements[0]
1070 | 	}
1071 | 	return nil
1072 | }
1073 | 
1074 | // FindElements returns a slice of elements matched by the XPath-like 'path'
1075 | // string. The function returns nil if no child element is found using the
1076 | // path. It panics if an invalid path string is supplied.
1077 | func (e *Element) FindElements(path string) []*Element {
1078 | 	return e.FindElementsPath(MustCompilePath(path))
1079 | }
1080 | 
1081 | // FindElementsPath returns a slice of elements matched by the 'path' object.
1082 | func (e *Element) FindElementsPath(path Path) []*Element {
1083 | 	p := newPather()
1084 | 	return p.traverse(e, path)
1085 | }
1086 | 
1087 | // NotNil returns the receiver element if it isn't nil; otherwise, it returns
1088 | // an unparented element with an empty string tag. This function simplifies
1089 | // the task of writing code to ignore not-found results from element queries.
1090 | // For example, instead of writing this:
1091 | //
1092 | //	if e := doc.SelectElement("enabled"); e != nil {
1093 | //		e.SetText("true")
1094 | //	}
1095 | //
1096 | // You could write this:
1097 | //
1098 | //	doc.SelectElement("enabled").NotNil().SetText("true")
1099 | func (e *Element) NotNil() *Element {
1100 | 	if e == nil {
1101 | 		return NewElement("")
1102 | 	}
1103 | 	return e
1104 | }
1105 | 
1106 | // GetPath returns the absolute path of the element. The absolute path is the
1107 | // full path from the document's root.
1108 | func (e *Element) GetPath() string {
1109 | 	path := []string{}
1110 | 	for seg := e; seg != nil; seg = seg.Parent() {
1111 | 		if seg.Tag != "" {
1112 | 			path = append(path, seg.Tag)
1113 | 		}
1114 | 	}
1115 | 
1116 | 	// Reverse the path.
1117 | 	for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 {
1118 | 		path[i], path[j] = path[j], path[i]
1119 | 	}
1120 | 
1121 | 	return "/" + strings.Join(path, "/")
1122 | }
1123 | 
1124 | // GetRelativePath returns the path of this element relative to the 'source'
1125 | // element. If the two elements are not part of the same element tree, then
1126 | // the function returns the empty string.
1127 | func (e *Element) GetRelativePath(source *Element) string {
1128 | 	var path []*Element
1129 | 
1130 | 	if source == nil {
1131 | 		return ""
1132 | 	}
1133 | 
1134 | 	// Build a reverse path from the element toward the root. Stop if the
1135 | 	// source element is encountered.
1136 | 	var seg *Element
1137 | 	for seg = e; seg != nil && seg != source; seg = seg.Parent() {
1138 | 		path = append(path, seg)
1139 | 	}
1140 | 
1141 | 	// If we found the source element, reverse the path and compose the
1142 | 	// string.
1143 | 	if seg == source {
1144 | 		if len(path) == 0 {
1145 | 			return "."
1146 | 		}
1147 | 		parts := []string{}
1148 | 		for i := len(path) - 1; i >= 0; i-- {
1149 | 			parts = append(parts, path[i].Tag)
1150 | 		}
1151 | 		return "./" + strings.Join(parts, "/")
1152 | 	}
1153 | 
1154 | 	// The source wasn't encountered, so climb from the source element toward
1155 | 	// the root of the tree until an element in the reversed path is
1156 | 	// encountered.
1157 | 
1158 | 	findPathIndex := func(e *Element, path []*Element) int {
1159 | 		for i, ee := range path {
1160 | 			if e == ee {
1161 | 				return i
1162 | 			}
1163 | 		}
1164 | 		return -1
1165 | 	}
1166 | 
1167 | 	climb := 0
1168 | 	for seg = source; seg != nil; seg = seg.Parent() {
1169 | 		i := findPathIndex(seg, path)
1170 | 		if i >= 0 {
1171 | 			path = path[:i] // truncate at found segment
1172 | 			break
1173 | 		}
1174 | 		climb++
1175 | 	}
1176 | 
1177 | 	// No element in the reversed path was encountered, so the two elements
1178 | 	// must not be part of the same tree.
1179 | 	if seg == nil {
1180 | 		return ""
1181 | 	}
1182 | 
1183 | 	// Reverse the (possibly truncated) path and prepend ".." segments to
1184 | 	// climb.
1185 | 	parts := []string{}
1186 | 	for i := 0; i < climb; i++ {
1187 | 		parts = append(parts, "..")
1188 | 	}
1189 | 	for i := len(path) - 1; i >= 0; i-- {
1190 | 		parts = append(parts, path[i].Tag)
1191 | 	}
1192 | 	return strings.Join(parts, "/")
1193 | }
1194 | 
1195 | // IndentWithSettings modifies the element and its child tree by inserting
1196 | // character data tokens containing newlines and indentation. The behavior of
1197 | // the indentation algorithm is configured by the indent settings. Because
1198 | // this function indents the element as if it were at the root of a document,
1199 | // it is most useful when called just before writing the element as an XML
1200 | // fragment using WriteTo.
1201 | func (e *Element) IndentWithSettings(s *IndentSettings) {
1202 | 	e.indent(1, getIndentFunc(s), s)
1203 | }
1204 | 
1205 | // indent recursively inserts proper indentation between an XML element's
1206 | // child tokens.
1207 | func (e *Element) indent(depth int, indent indentFunc, s *IndentSettings) {
1208 | 	e.stripIndent(s)
1209 | 	n := len(e.Child)
1210 | 	if n == 0 {
1211 | 		return
1212 | 	}
1213 | 
1214 | 	oldChild := e.Child
1215 | 	e.Child = make([]Token, 0, n*2+1)
1216 | 	isCharData, firstNonCharData := false, true
1217 | 	for _, c := range oldChild {
1218 | 		// Insert NL+indent before child if it's not character data.
1219 | 		// Exceptions: when it's the first non-character-data child, or when
1220 | 		// the child is at root depth.
1221 | 		_, isCharData = c.(*CharData)
1222 | 		if !isCharData {
1223 | 			if !firstNonCharData || depth > 0 {
1224 | 				s := indent(depth)
1225 | 				if s != "" {
1226 | 					newCharData(s, whitespaceFlag, e)
1227 | 				}
1228 | 			}
1229 | 			firstNonCharData = false
1230 | 		}
1231 | 
1232 | 		e.addChild(c)
1233 | 
1234 | 		// Recursively process child elements.
1235 | 		if ce, ok := c.(*Element); ok {
1236 | 			ce.indent(depth+1, indent, s)
1237 | 		}
1238 | 	}
1239 | 
1240 | 	// Insert NL+indent before the last child.
1241 | 	if !isCharData {
1242 | 		if !firstNonCharData || depth > 0 {
1243 | 			s := indent(depth - 1)
1244 | 			if s != "" {
1245 | 				newCharData(s, whitespaceFlag, e)
1246 | 			}
1247 | 		}
1248 | 	}
1249 | }
1250 | 
1251 | // stripIndent removes any previously inserted indentation.
1252 | func (e *Element) stripIndent(s *IndentSettings) {
1253 | 	// Count the number of non-indent child tokens
1254 | 	n := len(e.Child)
1255 | 	for _, c := range e.Child {
1256 | 		if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
1257 | 			n--
1258 | 		}
1259 | 	}
1260 | 	if n == len(e.Child) {
1261 | 		return
1262 | 	}
1263 | 	if n == 0 && len(e.Child) == 1 && s.PreserveLeafWhitespace {
1264 | 		return
1265 | 	}
1266 | 
1267 | 	// Strip out indent CharData
1268 | 	newChild := make([]Token, n)
1269 | 	j := 0
1270 | 	for _, c := range e.Child {
1271 | 		if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
1272 | 			continue
1273 | 		}
1274 | 		newChild[j] = c
1275 | 		newChild[j].setIndex(j)
1276 | 		j++
1277 | 	}
1278 | 	e.Child = newChild
1279 | }
1280 | 
1281 | // stripTrailingWhitespace removes any trailing whitespace CharData tokens
1282 | // from the element's children.
1283 | func (e *Element) stripTrailingWhitespace() {
1284 | 	for i := len(e.Child) - 1; i >= 0; i-- {
1285 | 		if cd, ok := e.Child[i].(*CharData); !ok || !cd.IsWhitespace() {
1286 | 			e.Child = e.Child[:i+1]
1287 | 			return
1288 | 		}
1289 | 	}
1290 | }
1291 | 
1292 | // dup duplicates the element.
1293 | func (e *Element) dup(parent *Element) Token {
1294 | 	ne := &Element{
1295 | 		Space:  e.Space,
1296 | 		Tag:    e.Tag,
1297 | 		Attr:   make([]Attr, len(e.Attr)),
1298 | 		Child:  make([]Token, len(e.Child)),
1299 | 		parent: parent,
1300 | 		index:  e.index,
1301 | 	}
1302 | 	for i, t := range e.Child {
1303 | 		ne.Child[i] = t.dup(ne)
1304 | 	}
1305 | 	copy(ne.Attr, e.Attr)
1306 | 	return ne
1307 | }
1308 | 
1309 | // NextSibling returns this element's next sibling element. It returns nil if
1310 | // there is no next sibling element.
1311 | func (e *Element) NextSibling() *Element {
1312 | 	if e.parent == nil {
1313 | 		return nil
1314 | 	}
1315 | 	for i := e.index + 1; i < len(e.parent.Child); i++ {
1316 | 		if s, ok := e.parent.Child[i].(*Element); ok {
1317 | 			return s
1318 | 		}
1319 | 	}
1320 | 	return nil
1321 | }
1322 | 
1323 | // PrevSibling returns this element's preceding sibling element. It returns
1324 | // nil if there is no preceding sibling element.
1325 | func (e *Element) PrevSibling() *Element {
1326 | 	if e.parent == nil {
1327 | 		return nil
1328 | 	}
1329 | 	for i := e.index - 1; i >= 0; i-- {
1330 | 		if s, ok := e.parent.Child[i].(*Element); ok {
1331 | 			return s
1332 | 		}
1333 | 	}
1334 | 	return nil
1335 | }
1336 | 
1337 | // Parent returns this element's parent element. It returns nil if this
1338 | // element has no parent.
1339 | func (e *Element) Parent() *Element {
1340 | 	return e.parent
1341 | }
1342 | 
1343 | // Index returns the index of this element within its parent element's
1344 | // list of child tokens. If this element has no parent, then the function
1345 | // returns -1.
1346 | func (e *Element) Index() int {
1347 | 	return e.index
1348 | }
1349 | 
1350 | // WriteTo serializes the element to the writer w.
1351 | func (e *Element) WriteTo(w Writer, s *WriteSettings) {
1352 | 	w.WriteByte('<')
1353 | 	w.WriteString(e.FullTag())
1354 | 	for _, a := range e.Attr {
1355 | 		w.WriteByte(' ')
1356 | 		a.WriteTo(w, s)
1357 | 	}
1358 | 	if len(e.Child) > 0 {
1359 | 		w.WriteByte('>')
1360 | 		for _, c := range e.Child {
1361 | 			c.WriteTo(w, s)
1362 | 		}
1363 | 		w.Write([]byte{'<', '/'})
1364 | 		w.WriteString(e.FullTag())
1365 | 		w.WriteByte('>')
1366 | 	} else {
1367 | 		if s.CanonicalEndTags {
1368 | 			w.Write([]byte{'>', '<', '/'})
1369 | 			w.WriteString(e.FullTag())
1370 | 			w.WriteByte('>')
1371 | 		} else {
1372 | 			w.Write([]byte{'/', '>'})
1373 | 		}
1374 | 	}
1375 | }
1376 | 
1377 | // setParent replaces this element token's parent.
1378 | func (e *Element) setParent(parent *Element) {
1379 | 	e.parent = parent
1380 | }
1381 | 
1382 | // setIndex sets this element token's index within its parent's Child slice.
1383 | func (e *Element) setIndex(index int) {
1384 | 	e.index = index
1385 | }
1386 | 
1387 | // addChild adds a child token to the element e.
1388 | func (e *Element) addChild(t Token) {
1389 | 	t.setParent(e)
1390 | 	t.setIndex(len(e.Child))
1391 | 	e.Child = append(e.Child, t)
1392 | }
1393 | 
1394 | // CreateAttr creates an attribute with the specified 'key' and 'value' and
1395 | // adds it to this element. If an attribute with same key already exists on
1396 | // this element, then its value is replaced. The key may include a namespace
1397 | // prefix followed by a colon.
1398 | func (e *Element) CreateAttr(key, value string) *Attr {
1399 | 	space, skey := spaceDecompose(key)
1400 | 
1401 | 	for i, a := range e.Attr {
1402 | 		if space == a.Space && skey == a.Key {
1403 | 			e.Attr[i].Value = value
1404 | 			return &e.Attr[i]
1405 | 		}
1406 | 	}
1407 | 
1408 | 	i := e.addAttr(space, skey, value)
1409 | 	return &e.Attr[i]
1410 | }
1411 | 
1412 | // addAttr is a helper function that adds an attribute to an element. Returns
1413 | // the index of the added attribute.
1414 | func (e *Element) addAttr(space, key, value string) int {
1415 | 	a := Attr{
1416 | 		Space:   space,
1417 | 		Key:     key,
1418 | 		Value:   value,
1419 | 		element: e,
1420 | 	}
1421 | 	e.Attr = append(e.Attr, a)
1422 | 	return len(e.Attr) - 1
1423 | }
1424 | 
1425 | // RemoveAttr removes the first attribute of this element whose key matches
1426 | // 'key'. It returns a copy of the removed attribute if a match is found. If
1427 | // no match is found, it returns nil. The key may include a namespace prefix
1428 | // followed by a colon.
1429 | func (e *Element) RemoveAttr(key string) *Attr {
1430 | 	space, skey := spaceDecompose(key)
1431 | 	for i, a := range e.Attr {
1432 | 		if space == a.Space && skey == a.Key {
1433 | 			e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...)
1434 | 			return &Attr{
1435 | 				Space:   a.Space,
1436 | 				Key:     a.Key,
1437 | 				Value:   a.Value,
1438 | 				element: nil,
1439 | 			}
1440 | 		}
1441 | 	}
1442 | 	return nil
1443 | }
1444 | 
1445 | // SortAttrs sorts this element's attributes lexicographically by key.
1446 | func (e *Element) SortAttrs() {
1447 | 	slices.SortFunc(e.Attr, func(a, b Attr) int {
1448 | 		if v := strings.Compare(a.Space, b.Space); v != 0 {
1449 | 			return v
1450 | 		}
1451 | 		return strings.Compare(a.Key, b.Key)
1452 | 	})
1453 | }
1454 | 
1455 | // FullKey returns this attribute's complete key, including namespace prefix
1456 | // if present.
1457 | func (a *Attr) FullKey() string {
1458 | 	if a.Space == "" {
1459 | 		return a.Key
1460 | 	}
1461 | 	return a.Space + ":" + a.Key
1462 | }
1463 | 
1464 | // Element returns a pointer to the element containing this attribute.
1465 | func (a *Attr) Element() *Element {
1466 | 	return a.element
1467 | }
1468 | 
1469 | // NamespaceURI returns the XML namespace URI associated with this attribute.
1470 | // The function returns the empty string if the attribute is unprefixed or
1471 | // if the attribute is part of the XML default namespace.
1472 | func (a *Attr) NamespaceURI() string {
1473 | 	if a.Space == "" {
1474 | 		return ""
1475 | 	}
1476 | 	return a.element.findLocalNamespaceURI(a.Space)
1477 | }
1478 | 
1479 | // WriteTo serializes the attribute to the writer.
1480 | func (a *Attr) WriteTo(w Writer, s *WriteSettings) {
1481 | 	w.WriteString(a.FullKey())
1482 | 	if s.AttrSingleQuote {
1483 | 		w.WriteString(`='`)
1484 | 	} else {
1485 | 		w.WriteString(`="`)
1486 | 	}
1487 | 	var m escapeMode
1488 | 	if s.CanonicalAttrVal && !s.AttrSingleQuote {
1489 | 		m = escapeCanonicalAttr
1490 | 	} else {
1491 | 		m = escapeNormal
1492 | 	}
1493 | 	escapeString(w, a.Value, m)
1494 | 	if s.AttrSingleQuote {
1495 | 		w.WriteByte('\'')
1496 | 	} else {
1497 | 		w.WriteByte('"')
1498 | 	}
1499 | }
1500 | 
1501 | // NewText creates an unparented CharData token containing simple text data.
1502 | func NewText(text string) *CharData {
1503 | 	return newCharData(text, 0, nil)
1504 | }
1505 | 
1506 | // NewCData creates an unparented XML character CDATA section with 'data' as
1507 | // its content.
1508 | func NewCData(data string) *CharData {
1509 | 	return newCharData(data, cdataFlag, nil)
1510 | }
1511 | 
1512 | // NewCharData creates an unparented CharData token containing simple text
1513 | // data.
1514 | //
1515 | // Deprecated: NewCharData is deprecated. Instead, use NewText, which does the
1516 | // same thing.
1517 | func NewCharData(data string) *CharData {
1518 | 	return newCharData(data, 0, nil)
1519 | }
1520 | 
1521 | // newCharData creates a character data token and binds it to a parent
1522 | // element. If parent is nil, the CharData token remains unbound.
1523 | func newCharData(data string, flags charDataFlags, parent *Element) *CharData {
1524 | 	c := &CharData{
1525 | 		Data:   data,
1526 | 		parent: nil,
1527 | 		index:  -1,
1528 | 		flags:  flags,
1529 | 	}
1530 | 	if parent != nil {
1531 | 		parent.addChild(c)
1532 | 	}
1533 | 	return c
1534 | }
1535 | 
1536 | // CreateText creates a CharData token containing simple text data and adds it
1537 | // to the end of this element's list of child tokens.
1538 | func (e *Element) CreateText(text string) *CharData {
1539 | 	return newCharData(text, 0, e)
1540 | }
1541 | 
1542 | // CreateCData creates a CharData token containing a CDATA section with 'data'
1543 | // as its content and adds it to the end of this element's list of child
1544 | // tokens.
1545 | func (e *Element) CreateCData(data string) *CharData {
1546 | 	return newCharData(data, cdataFlag, e)
1547 | }
1548 | 
1549 | // CreateCharData creates a CharData token containing simple text data and
1550 | // adds it to the end of this element's list of child tokens.
1551 | //
1552 | // Deprecated: CreateCharData is deprecated. Instead, use CreateText, which
1553 | // does the same thing.
1554 | func (e *Element) CreateCharData(data string) *CharData {
1555 | 	return e.CreateText(data)
1556 | }
1557 | 
1558 | // SetData modifies the content of the CharData token. In the case of a
1559 | // CharData token containing simple text, the simple text is modified. In the
1560 | // case of a CharData token containing a CDATA section, the CDATA section's
1561 | // content is modified.
1562 | func (c *CharData) SetData(text string) {
1563 | 	c.Data = text
1564 | 	if isWhitespace(text) {
1565 | 		c.flags |= whitespaceFlag
1566 | 	} else {
1567 | 		c.flags &= ^whitespaceFlag
1568 | 	}
1569 | }
1570 | 
1571 | // IsCData returns true if this CharData token is contains a CDATA section. It
1572 | // returns false if the CharData token contains simple text.
1573 | func (c *CharData) IsCData() bool {
1574 | 	return (c.flags & cdataFlag) != 0
1575 | }
1576 | 
1577 | // IsWhitespace returns true if this CharData token contains only whitespace.
1578 | func (c *CharData) IsWhitespace() bool {
1579 | 	return (c.flags & whitespaceFlag) != 0
1580 | }
1581 | 
1582 | // Parent returns this CharData token's parent element, or nil if it has no
1583 | // parent.
1584 | func (c *CharData) Parent() *Element {
1585 | 	return c.parent
1586 | }
1587 | 
1588 | // Index returns the index of this CharData token within its parent element's
1589 | // list of child tokens. If this CharData token has no parent, then the
1590 | // function returns -1.
1591 | func (c *CharData) Index() int {
1592 | 	return c.index
1593 | }
1594 | 
1595 | // WriteTo serializes character data to the writer.
1596 | func (c *CharData) WriteTo(w Writer, s *WriteSettings) {
1597 | 	if c.IsCData() {
1598 | 		w.WriteString(`<![CDATA[`)
1599 | 		w.WriteString(c.Data)
1600 | 		w.WriteString(`]]>`)
1601 | 	} else {
1602 | 		var m escapeMode
1603 | 		if s.CanonicalText {
1604 | 			m = escapeCanonicalText
1605 | 		} else {
1606 | 			m = escapeNormal
1607 | 		}
1608 | 		escapeString(w, c.Data, m)
1609 | 	}
1610 | }
1611 | 
1612 | // dup duplicates the character data.
1613 | func (c *CharData) dup(parent *Element) Token {
1614 | 	return &CharData{
1615 | 		Data:   c.Data,
1616 | 		flags:  c.flags,
1617 | 		parent: parent,
1618 | 		index:  c.index,
1619 | 	}
1620 | }
1621 | 
1622 | // setParent replaces the character data token's parent.
1623 | func (c *CharData) setParent(parent *Element) {
1624 | 	c.parent = parent
1625 | }
1626 | 
1627 | // setIndex sets the CharData token's index within its parent element's Child
1628 | // slice.
1629 | func (c *CharData) setIndex(index int) {
1630 | 	c.index = index
1631 | }
1632 | 
1633 | // NewComment creates an unparented comment token.
1634 | func NewComment(comment string) *Comment {
1635 | 	return newComment(comment, nil)
1636 | }
1637 | 
1638 | // NewComment creates a comment token and sets its parent element to 'parent'.
1639 | func newComment(comment string, parent *Element) *Comment {
1640 | 	c := &Comment{
1641 | 		Data:   comment,
1642 | 		parent: nil,
1643 | 		index:  -1,
1644 | 	}
1645 | 	if parent != nil {
1646 | 		parent.addChild(c)
1647 | 	}
1648 | 	return c
1649 | }
1650 | 
1651 | // CreateComment creates a comment token using the specified 'comment' string
1652 | // and adds it as the last child token of this element.
1653 | func (e *Element) CreateComment(comment string) *Comment {
1654 | 	return newComment(comment, e)
1655 | }
1656 | 
1657 | // dup duplicates the comment.
1658 | func (c *Comment) dup(parent *Element) Token {
1659 | 	return &Comment{
1660 | 		Data:   c.Data,
1661 | 		parent: parent,
1662 | 		index:  c.index,
1663 | 	}
1664 | }
1665 | 
1666 | // Parent returns comment token's parent element, or nil if it has no parent.
1667 | func (c *Comment) Parent() *Element {
1668 | 	return c.parent
1669 | }
1670 | 
1671 | // Index returns the index of this Comment token within its parent element's
1672 | // list of child tokens. If this Comment token has no parent, then the
1673 | // function returns -1.
1674 | func (c *Comment) Index() int {
1675 | 	return c.index
1676 | }
1677 | 
1678 | // WriteTo serialies the comment to the writer.
1679 | func (c *Comment) WriteTo(w Writer, s *WriteSettings) {
1680 | 	w.WriteString("<!--")
1681 | 	w.WriteString(c.Data)
1682 | 	w.WriteString("-->")
1683 | }
1684 | 
1685 | // setParent replaces the comment token's parent.
1686 | func (c *Comment) setParent(parent *Element) {
1687 | 	c.parent = parent
1688 | }
1689 | 
1690 | // setIndex sets the Comment token's index within its parent element's Child
1691 | // slice.
1692 | func (c *Comment) setIndex(index int) {
1693 | 	c.index = index
1694 | }
1695 | 
1696 | // NewDirective creates an unparented XML directive token.
1697 | func NewDirective(data string) *Directive {
1698 | 	return newDirective(data, nil)
1699 | }
1700 | 
1701 | // newDirective creates an XML directive and binds it to a parent element. If
1702 | // parent is nil, the Directive remains unbound.
1703 | func newDirective(data string, parent *Element) *Directive {
1704 | 	d := &Directive{
1705 | 		Data:   data,
1706 | 		parent: nil,
1707 | 		index:  -1,
1708 | 	}
1709 | 	if parent != nil {
1710 | 		parent.addChild(d)
1711 | 	}
1712 | 	return d
1713 | }
1714 | 
1715 | // CreateDirective creates an XML directive token with the specified 'data'
1716 | // value and adds it as the last child token of this element.
1717 | func (e *Element) CreateDirective(data string) *Directive {
1718 | 	return newDirective(data, e)
1719 | }
1720 | 
1721 | // dup duplicates the directive.
1722 | func (d *Directive) dup(parent *Element) Token {
1723 | 	return &Directive{
1724 | 		Data:   d.Data,
1725 | 		parent: parent,
1726 | 		index:  d.index,
1727 | 	}
1728 | }
1729 | 
1730 | // Parent returns directive token's parent element, or nil if it has no
1731 | // parent.
1732 | func (d *Directive) Parent() *Element {
1733 | 	return d.parent
1734 | }
1735 | 
1736 | // Index returns the index of this Directive token within its parent element's
1737 | // list of child tokens. If this Directive token has no parent, then the
1738 | // function returns -1.
1739 | func (d *Directive) Index() int {
1740 | 	return d.index
1741 | }
1742 | 
1743 | // WriteTo serializes the XML directive to the writer.
1744 | func (d *Directive) WriteTo(w Writer, s *WriteSettings) {
1745 | 	w.WriteString("<!")
1746 | 	w.WriteString(d.Data)
1747 | 	w.WriteString(">")
1748 | }
1749 | 
1750 | // setParent replaces the directive token's parent.
1751 | func (d *Directive) setParent(parent *Element) {
1752 | 	d.parent = parent
1753 | }
1754 | 
1755 | // setIndex sets the Directive token's index within its parent element's Child
1756 | // slice.
1757 | func (d *Directive) setIndex(index int) {
1758 | 	d.index = index
1759 | }
1760 | 
1761 | // NewProcInst creates an unparented XML processing instruction.
1762 | func NewProcInst(target, inst string) *ProcInst {
1763 | 	return newProcInst(target, inst, nil)
1764 | }
1765 | 
1766 | // newProcInst creates an XML processing instruction and binds it to a parent
1767 | // element. If parent is nil, the ProcInst remains unbound.
1768 | func newProcInst(target, inst string, parent *Element) *ProcInst {
1769 | 	p := &ProcInst{
1770 | 		Target: target,
1771 | 		Inst:   inst,
1772 | 		parent: nil,
1773 | 		index:  -1,
1774 | 	}
1775 | 	if parent != nil {
1776 | 		parent.addChild(p)
1777 | 	}
1778 | 	return p
1779 | }
1780 | 
1781 | // CreateProcInst creates an XML processing instruction token with the
1782 | // specified 'target' and instruction 'inst'. It is then added as the last
1783 | // child token of this element.
1784 | func (e *Element) CreateProcInst(target, inst string) *ProcInst {
1785 | 	return newProcInst(target, inst, e)
1786 | }
1787 | 
1788 | // dup duplicates the procinst.
1789 | func (p *ProcInst) dup(parent *Element) Token {
1790 | 	return &ProcInst{
1791 | 		Target: p.Target,
1792 | 		Inst:   p.Inst,
1793 | 		parent: parent,
1794 | 		index:  p.index,
1795 | 	}
1796 | }
1797 | 
1798 | // Parent returns processing instruction token's parent element, or nil if it
1799 | // has no parent.
1800 | func (p *ProcInst) Parent() *Element {
1801 | 	return p.parent
1802 | }
1803 | 
1804 | // Index returns the index of this ProcInst token within its parent element's
1805 | // list of child tokens. If this ProcInst token has no parent, then the
1806 | // function returns -1.
1807 | func (p *ProcInst) Index() int {
1808 | 	return p.index
1809 | }
1810 | 
1811 | // WriteTo serializes the processing instruction to the writer.
1812 | func (p *ProcInst) WriteTo(w Writer, s *WriteSettings) {
1813 | 	w.WriteString("<?")
1814 | 	w.WriteString(p.Target)
1815 | 	if p.Inst != "" {
1816 | 		w.WriteByte(' ')
1817 | 		w.WriteString(p.Inst)
1818 | 	}
1819 | 	w.WriteString("?>")
1820 | }
1821 | 
1822 | // setParent replaces the processing instruction token's parent.
1823 | func (p *ProcInst) setParent(parent *Element) {
1824 | 	p.parent = parent
1825 | }
1826 | 
1827 | // setIndex sets the processing instruction token's index within its parent
1828 | // element's Child slice.
1829 | func (p *ProcInst) setIndex(index int) {
1830 | 	p.index = index
1831 | }
1832 | 


--------------------------------------------------------------------------------
/etree_test.go:
--------------------------------------------------------------------------------
   1 | // Copyright 2015-2019 Brett Vickers.
   2 | // Use of this source code is governed by a BSD-style
   3 | // license that can be found in the LICENSE file.
   4 | 
   5 | package etree
   6 | 
   7 | import (
   8 | 	"bytes"
   9 | 	"encoding/xml"
  10 | 	"errors"
  11 | 	"io"
  12 | 	"io/fs"
  13 | 	"math/rand"
  14 | 	"os"
  15 | 	"path"
  16 | 	"strings"
  17 | 	"testing"
  18 | )
  19 | 
  20 | func newDocumentFromString(t *testing.T, s string) *Document {
  21 | 	return newDocumentFromString2(t, s, ReadSettings{})
  22 | }
  23 | 
  24 | func newDocumentFromString2(t *testing.T, s string, settings ReadSettings) *Document {
  25 | 	t.Helper()
  26 | 	doc := NewDocument()
  27 | 	doc.ReadSettings = settings
  28 | 	err := doc.ReadFromString(s)
  29 | 	if err != nil {
  30 | 		t.Fatal("etree: failed to parse document")
  31 | 	}
  32 | 	return doc
  33 | }
  34 | 
  35 | func checkStrEq(t *testing.T, got, want string) {
  36 | 	t.Helper()
  37 | 	if got != want {
  38 | 		t.Errorf("etree: unexpected result.\nGot:\n%s\nWanted:\n%s\n", got, want)
  39 | 	}
  40 | }
  41 | 
  42 | func checkStrBinaryEq(t *testing.T, got, want string) {
  43 | 	t.Helper()
  44 | 	if got != want {
  45 | 		t.Errorf("etree: unexpected result.\nGot:\n%v\nWanted:\n%v\n", []byte(got), []byte(want))
  46 | 	}
  47 | }
  48 | 
  49 | func checkIntEq(t *testing.T, got, want int) {
  50 | 	t.Helper()
  51 | 	if got != want {
  52 | 		t.Errorf("etree: unexpected integer. Got: %d. Wanted: %d\n", got, want)
  53 | 	}
  54 | }
  55 | 
  56 | func checkBoolEq(t *testing.T, got, want bool) {
  57 | 	t.Helper()
  58 | 	if got != want {
  59 | 		t.Errorf("etree: unexpected boolean. Got: %v. Wanted: %v\n", got, want)
  60 | 	}
  61 | }
  62 | 
  63 | func checkElementEq(t *testing.T, got, want *Element) {
  64 | 	t.Helper()
  65 | 	if got != want {
  66 | 		t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want)
  67 | 	}
  68 | }
  69 | 
  70 | func checkDocEq(t *testing.T, doc *Document, expected string) {
  71 | 	t.Helper()
  72 | 	doc.Indent(NoIndent)
  73 | 	s, err := doc.WriteToString()
  74 | 	if err != nil {
  75 | 		t.Error("etree: failed to serialize document")
  76 | 	}
  77 | 	if s != expected {
  78 | 		t.Errorf("etree: unexpected document.\nGot:\n%s\nWanted:\n%s\n", s, expected)
  79 | 	}
  80 | }
  81 | 
  82 | func checkIndexes(t *testing.T, e *Element) {
  83 | 	t.Helper()
  84 | 	for i := 0; i < len(e.Child); i++ {
  85 | 		c := e.Child[i]
  86 | 		if c.Index() != i {
  87 | 			t.Errorf("Child index mismatch. Got %d, expected %d.", c.Index(), i)
  88 | 		}
  89 | 		if ce, ok := c.(*Element); ok {
  90 | 			checkIndexes(t, ce)
  91 | 		}
  92 | 	}
  93 | }
  94 | 
  95 | func TestDocument(t *testing.T) {
  96 | 	// Create a document
  97 | 	doc := NewDocument()
  98 | 	doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
  99 | 	doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
 100 | 	store := doc.CreateElement("store")
 101 | 	store.CreateAttr("xmlns:t", "urn:books-com:titles")
 102 | 	store.CreateDirective("Directive")
 103 | 	store.CreateComment("This is a comment")
 104 | 	book := store.CreateElement("book")
 105 | 	book.CreateAttr("lang", "fr")
 106 | 	book.CreateAttr("lang", "en")
 107 | 	title := book.CreateElement("t:title")
 108 | 	title.SetText("Nicholas Nickleby")
 109 | 	title.SetText("Great Expectations")
 110 | 	author := book.CreateElement("author")
 111 | 	author.CreateCharData("Charles Dickens")
 112 | 	review := book.CreateElement("review")
 113 | 	review.CreateCData("<<< Will be replaced")
 114 | 	review.SetCData(">>> Excellent book")
 115 | 	doc.IndentTabs()
 116 | 
 117 | 	checkIndexes(t, &doc.Element)
 118 | 
 119 | 	// Serialize the document to a string
 120 | 	s, err := doc.WriteToString()
 121 | 	if err != nil {
 122 | 		t.Error("etree: failed to serialize document")
 123 | 	}
 124 | 
 125 | 	// Make sure the serialized XML matches expectation.
 126 | 	expected := `<?xml version="1.0" encoding="UTF-8"?>
 127 | <?xml-stylesheet type="text/xsl" href="style.xsl"?>
 128 | <store xmlns:t="urn:books-com:titles">
 129 | 	<!Directive>
 130 | 	<!--This is a comment-->
 131 | 	<book lang="en">
 132 | 		<t:title>Great Expectations</t:title>
 133 | 		<author>Charles Dickens</author>
 134 | 		<review><![CDATA[>>> Excellent book]]></review>
 135 | 	</book>
 136 | </store>
 137 | `
 138 | 	checkStrEq(t, s, expected)
 139 | 
 140 | 	// Test the structure of the XML
 141 | 	if doc.Root() != store {
 142 | 		t.Error("etree: root mismatch")
 143 | 	}
 144 | 	if len(store.ChildElements()) != 1 || len(store.Child) != 7 {
 145 | 		t.Error("etree: incorrect tree structure")
 146 | 	}
 147 | 	if len(book.ChildElements()) != 3 || len(book.Attr) != 1 || len(book.Child) != 7 {
 148 | 		t.Error("etree: incorrect tree structure")
 149 | 	}
 150 | 	if len(title.ChildElements()) != 0 || len(title.Child) != 1 || len(title.Attr) != 0 {
 151 | 		t.Error("etree: incorrect tree structure")
 152 | 	}
 153 | 	if len(author.ChildElements()) != 0 || len(author.Child) != 1 || len(author.Attr) != 0 {
 154 | 		t.Error("etree: incorrect tree structure")
 155 | 	}
 156 | 	if len(review.ChildElements()) != 0 || len(review.Child) != 1 || len(review.Attr) != 0 {
 157 | 		t.Error("etree: incorrect tree structure")
 158 | 	}
 159 | 	if book.parent != store || store.parent != &doc.Element || doc.parent != nil {
 160 | 		t.Error("etree: incorrect tree structure")
 161 | 	}
 162 | 	if title.parent != book || author.parent != book {
 163 | 		t.Error("etree: incorrect tree structure")
 164 | 	}
 165 | 
 166 | 	// Perform some basic queries on the document
 167 | 	elements := doc.SelectElements("store")
 168 | 	if len(elements) != 1 || elements[0] != store {
 169 | 		t.Error("etree: incorrect SelectElements result")
 170 | 	}
 171 | 	element := doc.SelectElement("store")
 172 | 	if element != store {
 173 | 		t.Error("etree: incorrect SelectElement result")
 174 | 	}
 175 | 	elements = store.SelectElements("book")
 176 | 	if len(elements) != 1 || elements[0] != book {
 177 | 		t.Error("etree: incorrect SelectElements result")
 178 | 	}
 179 | 	element = store.SelectElement("book")
 180 | 	if element != book {
 181 | 		t.Error("etree: incorrect SelectElement result")
 182 | 	}
 183 | 	attr := book.SelectAttr("lang")
 184 | 	if attr == nil || attr.Key != "lang" || attr.Value != "en" {
 185 | 		t.Error("etree: incorrect SelectAttr result")
 186 | 	}
 187 | 	if book.SelectAttrValue("lang", "unknown") != "en" {
 188 | 		t.Error("etree: incorrect SelectAttrValue result")
 189 | 	}
 190 | 	if book.SelectAttrValue("t:missing", "unknown") != "unknown" {
 191 | 		t.Error("etree: incorrect SelectAttrValue result")
 192 | 	}
 193 | 	attr = book.RemoveAttr("lang")
 194 | 	if attr.Value != "en" {
 195 | 		t.Error("etree: incorrect RemoveAttr result")
 196 | 	}
 197 | 	book.CreateAttr("lang", "de")
 198 | 	attr = book.RemoveAttr("lang")
 199 | 	if attr.Value != "de" {
 200 | 		t.Error("etree: incorrect RemoveAttr result")
 201 | 	}
 202 | 	element = book.SelectElement("t:title")
 203 | 	if element != title || element.Text() != "Great Expectations" || len(element.Attr) != 0 {
 204 | 		t.Error("etree: incorrect SelectElement result")
 205 | 	}
 206 | 	element = book.SelectElement("title")
 207 | 	if element != title {
 208 | 		t.Error("etree: incorrect SelectElement result")
 209 | 	}
 210 | 	element = book.SelectElement("p:title")
 211 | 	if element != nil {
 212 | 		t.Error("etree: incorrect SelectElement result")
 213 | 	}
 214 | 	element = book.RemoveChildAt(title.Index()).(*Element)
 215 | 	if element != title {
 216 | 		t.Error("etree: incorrect RemoveElement result")
 217 | 	}
 218 | 	element = book.SelectElement("title")
 219 | 	if element != nil {
 220 | 		t.Error("etree: incorrect SelectElement result")
 221 | 	}
 222 | 	element = book.SelectElement("review")
 223 | 	if element != review || element.Text() != ">>> Excellent book" || len(element.Attr) != 0 {
 224 | 		t.Error("etree: incorrect SelectElement result")
 225 | 	}
 226 | }
 227 | 
 228 | func TestImbalancedXML(t *testing.T) {
 229 | 	cases := []string{
 230 | 		`<test>`,
 231 | 		`</test>`,
 232 | 		`<test></test2>`,
 233 | 		`<doc xmlns:p="xyz"><p:test></test></doc>`,
 234 | 		`<doc xmlns:p="xyz"><test></p:test></doc>`,
 235 | 		`<test>malformed`,
 236 | 		`malformed</test>`,
 237 | 		`<test><test></test>`,
 238 | 		`<test></test></test>`,
 239 | 		`<test><test></test></test2>`,
 240 | 		`<test><test2></test></test2>`,
 241 | 	}
 242 | 	for _, c := range cases {
 243 | 		doc := NewDocument()
 244 | 		err := doc.ReadFromString(c)
 245 | 		if err == nil {
 246 | 			t.Errorf("etree: imbalanced XML should have failed:\n%s", c)
 247 | 		}
 248 | 	}
 249 | }
 250 | 
 251 | func TestDocumentCharsetReader(t *testing.T) {
 252 | 	s := `<?xml version="1.0" encoding="lowercase"?>
 253 | <Store>
 254 | 	<Book Lang="en">
 255 | 		<Title>Great Expectations</Title>
 256 | 		<Author>Charles Dickens</Author>
 257 | 	</Book>
 258 | </Store>`
 259 | 
 260 | 	doc := newDocumentFromString2(t, s, ReadSettings{
 261 | 		CharsetReader: func(label string, input io.Reader) (io.Reader, error) {
 262 | 			if label == "lowercase" {
 263 | 				return &lowercaseCharsetReader{input}, nil
 264 | 			}
 265 | 			return nil, errors.New("unknown charset")
 266 | 		},
 267 | 	})
 268 | 
 269 | 	cases := []struct {
 270 | 		path string
 271 | 		text string
 272 | 	}{
 273 | 		{"/store/book/title", "great expectations"},
 274 | 		{"/store/book/author", "charles dickens"},
 275 | 	}
 276 | 	for _, c := range cases {
 277 | 		e := doc.FindElement(c.path)
 278 | 		if e == nil {
 279 | 			t.Errorf("etree: failed to find element '%s'", c.path)
 280 | 		} else if e.Text() != c.text {
 281 | 			t.Errorf("etree: expected path '%s' to contain '%s', got '%s'", c.path, c.text, e.Text())
 282 | 		}
 283 | 	}
 284 | }
 285 | 
 286 | type lowercaseCharsetReader struct {
 287 | 	r io.Reader
 288 | }
 289 | 
 290 | func (c *lowercaseCharsetReader) Read(p []byte) (n int, err error) {
 291 | 	n, err = c.r.Read(p)
 292 | 	if err != nil {
 293 | 		return n, err
 294 | 	}
 295 | 	for i := 0; i < n; i++ {
 296 | 		if p[i] >= 'A' && p[i] <= 'Z' {
 297 | 			p[i] = p[i] - 'A' + 'a'
 298 | 		}
 299 | 	}
 300 | 	return n, nil
 301 | }
 302 | 
 303 | func TestDocumentReadPermissive(t *testing.T) {
 304 | 	s := "<select disabled></select>"
 305 | 
 306 | 	doc := NewDocument()
 307 | 	err := doc.ReadFromString(s)
 308 | 	if err == nil {
 309 | 		t.Fatal("etree: incorrect ReadFromString result")
 310 | 	}
 311 | 
 312 | 	doc.ReadSettings.Permissive = true
 313 | 	err = doc.ReadFromString(s)
 314 | 	if err != nil {
 315 | 		t.Fatal("etree: incorrect ReadFromString result")
 316 | 	}
 317 | }
 318 | 
 319 | func TestEmbeddedComment(t *testing.T) {
 320 | 	s := `<a>123<!-- test -->456</a>`
 321 | 
 322 | 	doc := NewDocument()
 323 | 	err := doc.ReadFromString(s)
 324 | 	if err != nil {
 325 | 		t.Fatal("etree: incorrect ReadFromString result")
 326 | 	}
 327 | 
 328 | 	a := doc.SelectElement("a")
 329 | 	checkStrEq(t, a.Text(), "123456")
 330 | }
 331 | 
 332 | func TestDocumentReadHTMLEntities(t *testing.T) {
 333 | 	s := `<store>
 334 | 	<book lang="en">
 335 | 		<title>&rarr;&nbsp;Great Expectations</title>
 336 | 		<author>Charles Dickens</author>
 337 | 	</book>
 338 | </store>`
 339 | 
 340 | 	doc := NewDocument()
 341 | 	err := doc.ReadFromString(s)
 342 | 	if err == nil {
 343 | 		t.Fatal("etree: incorrect ReadFromString result")
 344 | 	}
 345 | 
 346 | 	doc.ReadSettings.Entity = xml.HTMLEntity
 347 | 	err = doc.ReadFromString(s)
 348 | 	if err != nil {
 349 | 		t.Fatal("etree: incorrect ReadFromString result")
 350 | 	}
 351 | }
 352 | 
 353 | func TestDocumentReadHTMLAutoClose(t *testing.T) {
 354 | 	cases := []struct {
 355 | 		name  string
 356 | 		input string
 357 | 		want  string
 358 | 	}{
 359 | 		{"empty", ``, ``},
 360 | 		{"oneSelfClosing", `<br>`, `<br/>`},
 361 | 		{"twoSelfClosingAndText", `<br>some text<br>`, `<br/>some text<br/>`},
 362 | 		{
 363 | 			name: "largerExample",
 364 | 			input: `<img src="cover.jpg">
 365 | <hr>
 366 | Author: Charles Dickens<br>
 367 | Book: Great Expectations<br>`,
 368 | 			want: `<img src="cover.jpg"/>
 369 | <hr/>
 370 | Author: Charles Dickens<br/>
 371 | Book: Great Expectations<br/>`},
 372 | 	}
 373 | 
 374 | 	for _, c := range cases {
 375 | 		t.Run(c.name, func(t *testing.T) {
 376 | 			doc := NewDocument()
 377 | 			doc.ReadSettings.Permissive = true
 378 | 			doc.ReadSettings.AutoClose = xml.HTMLAutoClose
 379 | 			err := doc.ReadFromString(c.input)
 380 | 			if err != nil {
 381 | 				t.Fatal("etree: ReadFromString() error = ", err)
 382 | 			}
 383 | 			s, err := doc.WriteToString()
 384 | 			if err != nil {
 385 | 				t.Fatal("etree: WriteToString() error = ", err)
 386 | 			}
 387 | 			checkStrEq(t, s, c.want)
 388 | 		})
 389 | 	}
 390 | }
 391 | 
 392 | func TestEscapeCodes(t *testing.T) {
 393 | 	cases := []struct {
 394 | 		input         string
 395 | 		normal        string
 396 | 		attrCanonical string
 397 | 		textCanonical string
 398 | 	}{
 399 | 		{
 400 | 			"&<>'\"\t\n\r",
 401 | 			"<e a=\"&amp;&lt;&gt;&apos;&quot;\t\n\r\">&amp;&lt;&gt;&apos;&quot;\t\n\r</e>",
 402 | 			"<e a=\"&amp;&lt;>'&quot;&#x9;&#xA;&#xD;\">&amp;&lt;&gt;&apos;&quot;\t\n\r</e>",
 403 | 			"<e a=\"&amp;&lt;&gt;&apos;&quot;\t\n\r\">&amp;&lt;&gt;'\"\t\n&#xD;</e>",
 404 | 		},
 405 | 		{
 406 | 			"\x00\x1f\x08\x09\x0a\x0d",
 407 | 			"<e a=\"���\t\n\r\">���\t\n\r</e>",
 408 | 			"<e a=\"���&#x9;&#xA;&#xD;\">���\t\n\r</e>",
 409 | 			"<e a=\"���\t\n\r\">���\t\n&#xD;</e>",
 410 | 		},
 411 | 	}
 412 | 	for _, c := range cases {
 413 | 		doc := NewDocument()
 414 | 
 415 | 		e := doc.CreateElement("e")
 416 | 		e.SetText(c.input)
 417 | 		e.CreateAttr("a", c.input)
 418 | 
 419 | 		doc.WriteSettings.CanonicalText = false
 420 | 		doc.WriteSettings.CanonicalAttrVal = false
 421 | 		s, err := doc.WriteToString()
 422 | 		if err != nil {
 423 | 			t.Error("etree: Escape test produced inocrrect result.")
 424 | 		}
 425 | 		checkStrEq(t, s, c.normal)
 426 | 
 427 | 		doc.WriteSettings.CanonicalText = false
 428 | 		doc.WriteSettings.CanonicalAttrVal = true
 429 | 		s, err = doc.WriteToString()
 430 | 		if err != nil {
 431 | 			t.Error("etree: Escape test produced inocrrect result.")
 432 | 		}
 433 | 		checkStrEq(t, s, c.attrCanonical)
 434 | 
 435 | 		doc.WriteSettings.CanonicalText = true
 436 | 		doc.WriteSettings.CanonicalAttrVal = false
 437 | 		s, err = doc.WriteToString()
 438 | 		if err != nil {
 439 | 			t.Error("etree: Escape test produced inocrrect result.")
 440 | 		}
 441 | 		checkStrEq(t, s, c.textCanonical)
 442 | 	}
 443 | }
 444 | 
 445 | func TestCanonical(t *testing.T) {
 446 | 	BOM := "\xef\xbb\xbf"
 447 | 
 448 | 	doc := NewDocument()
 449 | 	doc.WriteSettings.CanonicalEndTags = true
 450 | 	doc.WriteSettings.CanonicalText = true
 451 | 	doc.WriteSettings.CanonicalAttrVal = true
 452 | 	doc.CreateCharData(BOM)
 453 | 	doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
 454 | 
 455 | 	people := doc.CreateElement("People")
 456 | 	people.CreateComment("These are all known people")
 457 | 
 458 | 	jon := people.CreateElement("Person")
 459 | 	jon.CreateAttr("name", "Jon O'Reilly")
 460 | 	jon.SetText("\r<'\">&\u0004\u0005\u001f�")
 461 | 
 462 | 	sally := people.CreateElement("Person")
 463 | 	sally.CreateAttr("name", "Sally")
 464 | 	sally.CreateAttr("escape", "\r\n\t<'\">&")
 465 | 
 466 | 	doc.Indent(2)
 467 | 	s, err := doc.WriteToString()
 468 | 	if err != nil {
 469 | 		t.Error("etree: WriteSettings WriteTo produced incorrect result.")
 470 | 	}
 471 | 
 472 | 	expected := BOM + `<?xml-stylesheet type="text/xsl" href="style.xsl"?>
 473 | <People>
 474 |   <!--These are all known people-->
 475 |   <Person name="Jon O'Reilly">&#xD;&lt;'"&gt;&amp;����</Person>
 476 |   <Person name="Sally" escape="&#xD;&#xA;&#x9;&lt;'&quot;>&amp;"></Person>
 477 | </People>
 478 | `
 479 | 	checkStrEq(t, s, expected)
 480 | }
 481 | 
 482 | func TestCopy(t *testing.T) {
 483 | 	s := `<store>
 484 | 	<book lang="en">
 485 | 		<title>Great Expectations</title>
 486 | 		<author>Charles Dickens</author>
 487 | 	</book>
 488 | </store>`
 489 | 
 490 | 	doc := newDocumentFromString(t, s)
 491 | 
 492 | 	s1, err := doc.WriteToString()
 493 | 	if err != nil {
 494 | 		t.Error("etree: incorrect WriteToString result")
 495 | 	}
 496 | 
 497 | 	doc2 := doc.Copy()
 498 | 	checkIndexes(t, &doc2.Element)
 499 | 	s2, err := doc2.WriteToString()
 500 | 	if err != nil {
 501 | 		t.Error("etree: incorrect Copy result")
 502 | 	}
 503 | 
 504 | 	if s1 != s2 {
 505 | 		t.Error("etree: mismatched Copy result")
 506 | 		t.Error("wanted:\n" + s1)
 507 | 		t.Error("got:\n" + s2)
 508 | 	}
 509 | 
 510 | 	e1 := doc.FindElement("./store/book/title")
 511 | 	e2 := doc2.FindElement("./store/book/title")
 512 | 	if e1 == nil || e2 == nil || e1.parent == nil || e1 == e2 {
 513 | 		t.Error("etree: incorrect FindElement result")
 514 | 	}
 515 | 
 516 | 	e1.parent.RemoveChildAt(e1.Index())
 517 | 	s1, _ = doc.WriteToString()
 518 | 	s2, _ = doc2.WriteToString()
 519 | 	if s1 == s2 {
 520 | 		t.Error("etree: incorrect result after RemoveElement")
 521 | 	}
 522 | }
 523 | 
 524 | func TestGetPath(t *testing.T) {
 525 | 	s := `<a>
 526 |  <b1>
 527 |   <c1>
 528 |    <d1/>
 529 |    <d1a/>
 530 |   </c1>
 531 |  </b1>
 532 |  <b2>
 533 |   <c2>
 534 |    <d2/>
 535 |   </c2>
 536 |  </b2>
 537 | </a>`
 538 | 
 539 | 	doc := newDocumentFromString(t, s)
 540 | 
 541 | 	cases := []struct {
 542 | 		from    string
 543 | 		to      string
 544 | 		relpath string
 545 | 		topath  string
 546 | 	}{
 547 | 		{"a", ".", "..", "/"},
 548 | 		{".", "a", "./a", "/a"},
 549 | 		{"a/b1/c1/d1", ".", "../../../..", "/"},
 550 | 		{".", "a/b1/c1/d1", "./a/b1/c1/d1", "/a/b1/c1/d1"},
 551 | 		{"a", "a", ".", "/a"},
 552 | 		{"a/b1", "a/b1/c1", "./c1", "/a/b1/c1"},
 553 | 		{"a/b1/c1", "a/b1", "..", "/a/b1"},
 554 | 		{"a/b1/c1", "a/b1/c1", ".", "/a/b1/c1"},
 555 | 		{"a", "a/b1", "./b1", "/a/b1"},
 556 | 		{"a/b1", "a", "..", "/a"},
 557 | 		{"a", "a/b1/c1", "./b1/c1", "/a/b1/c1"},
 558 | 		{"a/b1/c1", "a", "../..", "/a"},
 559 | 		{"a/b1/c1/d1", "a", "../../..", "/a"},
 560 | 		{"a", "a/b1/c1/d1", "./b1/c1/d1", "/a/b1/c1/d1"},
 561 | 		{"a/b1", "a/b2", "../b2", "/a/b2"},
 562 | 		{"a/b2", "a/b1", "../b1", "/a/b1"},
 563 | 		{"a/b1/c1/d1", "a/b2/c2/d2", "../../../b2/c2/d2", "/a/b2/c2/d2"},
 564 | 		{"a/b2/c2/d2", "a/b1/c1/d1", "../../../b1/c1/d1", "/a/b1/c1/d1"},
 565 | 		{"a/b1/c1/d1", "a/b1/c1/d1a", "../d1a", "/a/b1/c1/d1a"},
 566 | 	}
 567 | 
 568 | 	for _, c := range cases {
 569 | 		fe := doc.FindElement(c.from)
 570 | 		te := doc.FindElement(c.to)
 571 | 
 572 | 		rp := te.GetRelativePath(fe)
 573 | 		if rp != c.relpath {
 574 | 			t.Errorf("GetRelativePath from '%s' to '%s'. Expected '%s', got '%s'.\n", c.from, c.to, c.relpath, rp)
 575 | 		}
 576 | 
 577 | 		p := te.GetPath()
 578 | 		if p != c.topath {
 579 | 			t.Errorf("GetPath for '%s'. Expected '%s', got '%s'.\n", c.to, c.topath, p)
 580 | 		}
 581 | 	}
 582 | }
 583 | 
 584 | func TestInsertChild(t *testing.T) {
 585 | 	s := `<book lang="en">
 586 |   <t:title>Great Expectations</t:title>
 587 |   <author>Charles Dickens</author>
 588 | </book>
 589 | `
 590 | 
 591 | 	doc := newDocumentFromString(t, s)
 592 | 
 593 | 	year := NewElement("year")
 594 | 	year.SetText("1861")
 595 | 
 596 | 	book := doc.FindElement("//book")
 597 | 	book.InsertChildAt(book.SelectElement("t:title").Index(), year)
 598 | 
 599 | 	expected1 := `<book lang="en">
 600 |   <year>1861</year>
 601 |   <t:title>Great Expectations</t:title>
 602 |   <author>Charles Dickens</author>
 603 | </book>
 604 | `
 605 | 	doc.Indent(2)
 606 | 	s1, _ := doc.WriteToString()
 607 | 	checkStrEq(t, s1, expected1)
 608 | 
 609 | 	book.RemoveChildAt(year.Index())
 610 | 	book.InsertChildAt(book.SelectElement("author").Index(), year)
 611 | 
 612 | 	expected2 := `<book lang="en">
 613 |   <t:title>Great Expectations</t:title>
 614 |   <year>1861</year>
 615 |   <author>Charles Dickens</author>
 616 | </book>
 617 | `
 618 | 	doc.Indent(2)
 619 | 	s2, _ := doc.WriteToString()
 620 | 	checkStrEq(t, s2, expected2)
 621 | 
 622 | 	book.RemoveChildAt(year.Index())
 623 | 	book.InsertChildAt(len(book.Child), year)
 624 | 
 625 | 	expected3 := `<book lang="en">
 626 |   <t:title>Great Expectations</t:title>
 627 |   <author>Charles Dickens</author>
 628 |   <year>1861</year>
 629 | </book>
 630 | `
 631 | 	doc.Indent(2)
 632 | 	s3, _ := doc.WriteToString()
 633 | 	checkStrEq(t, s3, expected3)
 634 | 
 635 | 	book.RemoveChildAt(year.Index())
 636 | 	book.InsertChildAt(999, year)
 637 | 
 638 | 	expected4 := `<book lang="en">
 639 |   <t:title>Great Expectations</t:title>
 640 |   <author>Charles Dickens</author>
 641 |   <year>1861</year>
 642 | </book>
 643 | `
 644 | 	doc.Indent(2)
 645 | 	s4, _ := doc.WriteToString()
 646 | 	checkStrEq(t, s4, expected4)
 647 | 
 648 | 	year = doc.FindElement("//book/year")
 649 | 	book.InsertChildAt(0, year)
 650 | 
 651 | 	expected5 := `<book lang="en">
 652 |   <year>1861</year>
 653 |   <t:title>Great Expectations</t:title>
 654 |   <author>Charles Dickens</author>
 655 | </book>
 656 | `
 657 | 
 658 | 	doc.Indent(2)
 659 | 	s5, _ := doc.WriteToString()
 660 | 	checkStrEq(t, s5, expected5)
 661 | 
 662 | 	author := doc.FindElement("//book/author")
 663 | 	year = doc.FindElement("//book/year")
 664 | 	book.InsertChildAt(author.Index(), year)
 665 | 
 666 | 	expected6 := `<book lang="en">
 667 |   <t:title>Great Expectations</t:title>
 668 |   <year>1861</year>
 669 |   <author>Charles Dickens</author>
 670 | </book>
 671 | `
 672 | 	doc.Indent(2)
 673 | 	s6, _ := doc.WriteToString()
 674 | 	checkStrEq(t, s6, expected6)
 675 | }
 676 | 
 677 | func TestCdata(t *testing.T) {
 678 | 	var tests = []struct {
 679 | 		in, out string
 680 | 	}{
 681 | 		{`<tag>1234567</tag>`, "1234567"},
 682 | 		{`<tag><![CDATA[1234567]]></tag>`, "1234567"},
 683 | 		{`<tag>1<![CDATA[2]]>3<![CDATA[4]]>5<![CDATA[6]]>7</tag>`, "1234567"},
 684 | 		{`<tag>1<![CDATA[2]]>3<inner>4</inner>5<![CDATA[6]]>7</tag>`, "123"},
 685 | 		{`<tag>1<inner>4</inner>5<![CDATA[6]]>7</tag>`, "1"},
 686 | 		{`<tag><![CDATA[1]]><inner>4</inner>5<![CDATA[6]]>7</tag>`, "1"},
 687 | 	}
 688 | 
 689 | 	for _, test := range tests {
 690 | 		doc := NewDocument()
 691 | 		err := doc.ReadFromString(test.in)
 692 | 		if err != nil {
 693 | 			t.Fatal("etree ReadFromString: " + err.Error())
 694 | 		}
 695 | 
 696 | 		tag := doc.FindElement("tag")
 697 | 		if tag.Text() != test.out {
 698 | 			t.Fatalf("etree invalid cdata. Expected: %v. Got: %v\n", test.out, tag.Text())
 699 | 		}
 700 | 	}
 701 | }
 702 | 
 703 | func TestAddChild(t *testing.T) {
 704 | 	s := `<book lang="en">
 705 |   <t:title>Great Expectations</t:title>
 706 |   <author>Charles Dickens</author>
 707 | </book>
 708 | `
 709 | 	doc1 := newDocumentFromString(t, s)
 710 | 
 711 | 	doc2 := NewDocument()
 712 | 	root := doc2.CreateElement("root")
 713 | 
 714 | 	for _, e := range doc1.FindElements("//book/*") {
 715 | 		root.AddChild(e)
 716 | 	}
 717 | 
 718 | 	expected1 := `<book lang="en"/>
 719 | `
 720 | 	doc1.Indent(2)
 721 | 	s1, _ := doc1.WriteToString()
 722 | 	checkStrEq(t, s1, expected1)
 723 | 
 724 | 	expected2 := `<root>
 725 |   <t:title>Great Expectations</t:title>
 726 |   <author>Charles Dickens</author>
 727 | </root>
 728 | `
 729 | 	doc2.Indent(2)
 730 | 	s2, _ := doc2.WriteToString()
 731 | 	checkStrEq(t, s2, expected2)
 732 | }
 733 | 
 734 | func TestSetRoot(t *testing.T) {
 735 | 	s := `<?test a="wow"?>
 736 | <book>
 737 |   <title>Great Expectations</title>
 738 |   <author>Charles Dickens</author>
 739 | </book>
 740 | `
 741 | 	doc := newDocumentFromString(t, s)
 742 | 
 743 | 	origroot := doc.Root()
 744 | 	if origroot.Parent() != &doc.Element {
 745 | 		t.Error("Root incorrect")
 746 | 	}
 747 | 
 748 | 	newroot := NewElement("root")
 749 | 	doc.SetRoot(newroot)
 750 | 
 751 | 	if doc.Root() != newroot {
 752 | 		t.Error("doc.Root() != newroot")
 753 | 	}
 754 | 	if origroot.Parent() != nil {
 755 | 		t.Error("origroot.Parent() != nil")
 756 | 	}
 757 | 
 758 | 	expected1 := `<?test a="wow"?>
 759 | <root/>
 760 | `
 761 | 	doc.Indent(2)
 762 | 	s1, _ := doc.WriteToString()
 763 | 	checkStrEq(t, s1, expected1)
 764 | 
 765 | 	doc.SetRoot(origroot)
 766 | 	doc.Indent(2)
 767 | 	expected2 := s
 768 | 	s2, _ := doc.WriteToString()
 769 | 	checkStrEq(t, s2, expected2)
 770 | 
 771 | 	doc2 := NewDocument()
 772 | 	doc2.CreateProcInst("test", `a="wow"`)
 773 | 	doc2.SetRoot(NewElement("root"))
 774 | 	doc2.Indent(2)
 775 | 	expected3 := expected1
 776 | 	s3, _ := doc2.WriteToString()
 777 | 	checkStrEq(t, s3, expected3)
 778 | 
 779 | 	doc2.SetRoot(doc.Root())
 780 | 	doc2.Indent(2)
 781 | 	expected4 := s
 782 | 	s4, _ := doc2.WriteToString()
 783 | 	checkStrEq(t, s4, expected4)
 784 | 
 785 | 	expected5 := `<?test a="wow"?>
 786 | `
 787 | 	doc.Indent(2)
 788 | 	s5, _ := doc.WriteToString()
 789 | 	checkStrEq(t, s5, expected5)
 790 | }
 791 | 
 792 | func TestSortAttrs(t *testing.T) {
 793 | 	s := `<el foo='5' Foo='2' aaa='4' สวัสดี='7' AAA='1' a01='3' z='6' a:ZZZ='9' a:AAA='8'/>`
 794 | 	doc := newDocumentFromString(t, s)
 795 | 	doc.Root().SortAttrs()
 796 | 	doc.Indent(2)
 797 | 	out, _ := doc.WriteToString()
 798 | 	checkStrEq(t, out, `<el AAA="1" Foo="2" a01="3" aaa="4" foo="5" z="6" สวัสดี="7" a:AAA="8" a:ZZZ="9"/>`+"\n")
 799 | }
 800 | 
 801 | func TestCharsetReaderDefaultSetting(t *testing.T) {
 802 | 	// Test encodings where the default pass-through charset conversion
 803 | 	// should work for common single-byte character encodings.
 804 | 	cases := []string{
 805 | 		`<?xml version="1.0"?><foo></foo>`,
 806 | 		`<?xml version="1.0" encoding="ISO-8859-1"?><foo></foo>`,
 807 | 		`<?xml version="1.0" encoding="Windows-1252"?><foo></foo>`,
 808 | 		`<?xml version="1.0" encoding="UTF-8"?><foo></foo>`,
 809 | 		`<?xml version="1.0" encoding="US-ASCII"?><foo></foo>`,
 810 | 	}
 811 | 
 812 | 	for _, c := range cases {
 813 | 		doc := NewDocument()
 814 | 		if err := doc.ReadFromBytes([]byte(c)); err != nil {
 815 | 			t.Error(err)
 816 | 		}
 817 | 	}
 818 | }
 819 | 
 820 | func TestCharData(t *testing.T) {
 821 | 	doc := NewDocument()
 822 | 	root := doc.CreateElement("root")
 823 | 	root.CreateCharData("This ")
 824 | 	root.CreateCData("is ")
 825 | 	e1 := NewText("a ")
 826 | 	e2 := NewCData("text ")
 827 | 	root.AddChild(e1)
 828 | 	root.AddChild(e2)
 829 | 	root.CreateCharData("Element!!")
 830 | 
 831 | 	s, err := doc.WriteToString()
 832 | 	if err != nil {
 833 | 		t.Error("etree: failed to serialize document")
 834 | 	}
 835 | 
 836 | 	checkStrEq(t, s, `<root>This <![CDATA[is ]]>a <![CDATA[text ]]>Element!!</root>`)
 837 | 
 838 | 	// Check we can parse the output
 839 | 	err = doc.ReadFromString(s)
 840 | 	if err != nil {
 841 | 		t.Fatal("etree: incorrect ReadFromString result")
 842 | 	}
 843 | 	if doc.Root().Text() != "This is a text Element!!" {
 844 | 		t.Error("etree: invalid text")
 845 | 	}
 846 | }
 847 | 
 848 | func TestIndentSimple(t *testing.T) {
 849 | 	doc := NewDocument()
 850 | 	root := doc.CreateElement("root")
 851 | 	ch1 := root.CreateElement("child1")
 852 | 	ch1.CreateElement("child2")
 853 | 
 854 | 	// First test Unindent.
 855 | 	doc.Unindent()
 856 | 	s, err := doc.WriteToString()
 857 | 	if err != nil {
 858 | 		t.Error("etree: failed to serialize document")
 859 | 	}
 860 | 	expected := "<root><child1><child2/></child1></root>"
 861 | 	checkStrEq(t, s, expected)
 862 | 
 863 | 	// Now test Indent with NoIndent (which should produce the same result
 864 | 	// as Unindent).
 865 | 	doc.Indent(NoIndent)
 866 | 	s, err = doc.WriteToString()
 867 | 	if err != nil {
 868 | 		t.Error("etree: failed to serialize document")
 869 | 	}
 870 | 	checkStrEq(t, s, expected)
 871 | 
 872 | 	// Run all indent test cases.
 873 | 	tests := []struct {
 874 | 		useTabs, useCRLF bool
 875 | 		ws, nl           string
 876 | 	}{
 877 | 		{false, false, " ", "\n"},
 878 | 		{false, true, " ", "\r\n"},
 879 | 		{true, false, "\t", "\n"},
 880 | 		{true, true, "\t", "\r\n"},
 881 | 	}
 882 | 
 883 | 	for _, test := range tests {
 884 | 		doc.WriteSettings.UseCRLF = test.useCRLF
 885 | 		if test.useTabs {
 886 | 			doc.IndentTabs()
 887 | 			s, err := doc.WriteToString()
 888 | 			if err != nil {
 889 | 				t.Error("etree: failed to serialize document")
 890 | 			}
 891 | 			tab := test.ws
 892 | 			expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
 893 | 				tab + tab + "<child2/>" + test.nl + tab +
 894 | 				"</child1>" + test.nl + "</root>" + test.nl
 895 | 			checkStrEq(t, s, expected)
 896 | 		} else {
 897 | 			for i := 0; i < 256; i++ {
 898 | 				doc.Indent(i)
 899 | 				s, err := doc.WriteToString()
 900 | 				if err != nil {
 901 | 					t.Error("etree: failed to serialize document")
 902 | 				}
 903 | 				tab := strings.Repeat(test.ws, i)
 904 | 				expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
 905 | 					tab + tab + "<child2/>" + test.nl + tab +
 906 | 					"</child1>" + test.nl + "</root>" + test.nl
 907 | 				checkStrEq(t, s, expected)
 908 | 			}
 909 | 		}
 910 | 	}
 911 | }
 912 | 
 913 | func TestIndentWithDefaultSettings(t *testing.T) {
 914 | 	input := `<root>
 915 | 	<child1>
 916 | 		<child2>    </child2>
 917 | 	</child1>
 918 | </root>`
 919 | 
 920 | 	doc := NewDocument()
 921 | 	err := doc.ReadFromString(input)
 922 | 	if err != nil {
 923 | 		t.Error("etree: failed to read string")
 924 | 	}
 925 | 
 926 | 	settings := NewIndentSettings()
 927 | 	doc.IndentWithSettings(settings)
 928 | 	s, err := doc.WriteToString()
 929 | 	if err != nil {
 930 | 		t.Error("etree: failed to serialize document")
 931 | 	}
 932 | 	expected := "<root>\n    <child1>\n        <child2/>\n    </child1>\n</root>\n"
 933 | 	checkStrEq(t, s, expected)
 934 | }
 935 | 
 936 | func TestIndentWithSettings(t *testing.T) {
 937 | 	doc := NewDocument()
 938 | 	root := doc.CreateElement("root")
 939 | 	ch1 := root.CreateElement("child1")
 940 | 	ch1.CreateElement("child2")
 941 | 
 942 | 	// First test with NoIndent.
 943 | 	settings := NewIndentSettings()
 944 | 	settings.UseCRLF = false
 945 | 	settings.UseTabs = false
 946 | 	settings.Spaces = NoIndent
 947 | 	doc.IndentWithSettings(settings)
 948 | 	s, err := doc.WriteToString()
 949 | 	if err != nil {
 950 | 		t.Error("etree: failed to serialize document")
 951 | 	}
 952 | 	expected := "<root><child1><child2/></child1></root>"
 953 | 	checkStrEq(t, s, expected)
 954 | 
 955 | 	// Run all indent test cases.
 956 | 	tests := []struct {
 957 | 		useTabs, useCRLF bool
 958 | 		ws, nl           string
 959 | 	}{
 960 | 		{false, false, " ", "\n"},
 961 | 		{false, true, " ", "\r\n"},
 962 | 		{true, false, "\t", "\n"},
 963 | 		{true, true, "\t", "\r\n"},
 964 | 	}
 965 | 
 966 | 	for _, test := range tests {
 967 | 		if test.useTabs {
 968 | 			settings := NewIndentSettings()
 969 | 			settings.UseTabs = true
 970 | 			settings.UseCRLF = test.useCRLF
 971 | 			doc.IndentWithSettings(settings)
 972 | 			s, err := doc.WriteToString()
 973 | 			if err != nil {
 974 | 				t.Error("etree: failed to serialize document")
 975 | 			}
 976 | 			tab := test.ws
 977 | 			expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
 978 | 				tab + tab + "<child2/>" + test.nl + tab +
 979 | 				"</child1>" + test.nl + "</root>" + test.nl
 980 | 			checkStrEq(t, s, expected)
 981 | 		} else {
 982 | 			for i := 0; i < 256; i++ {
 983 | 				settings := NewIndentSettings()
 984 | 				settings.Spaces = i
 985 | 				settings.UseTabs = false
 986 | 				settings.UseCRLF = test.useCRLF
 987 | 				doc.IndentWithSettings(settings)
 988 | 				s, err := doc.WriteToString()
 989 | 				if err != nil {
 990 | 					t.Error("etree: failed to serialize document")
 991 | 				}
 992 | 				tab := strings.Repeat(test.ws, i)
 993 | 				expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
 994 | 					tab + tab + "<child2/>" + test.nl + tab +
 995 | 					"</child1>" + test.nl + "</root>" + test.nl
 996 | 				checkStrEq(t, s, expected)
 997 | 			}
 998 | 		}
 999 | 	}
1000 | }
1001 | 
1002 | func TestIndentPreserveWhitespace(t *testing.T) {
1003 | 	tests := []struct {
1004 | 		input    string
1005 | 		expected string
1006 | 	}{
1007 | 		{"<test></test>", "<test/>"},
1008 | 		{"<test>  </test>", "<test>  </test>"},
1009 | 		{"<test>\t</test>", "<test>\t</test>"},
1010 | 		{"<test>\t\n \t</test>", "<test>\t\n \t</test>"},
1011 | 		{"<test><![CDATA[ ]]></test>", "<test> </test>"},
1012 | 		{"<test> <![CDATA[ ]]> </test>", "<test/>"},
1013 | 		{"<outer> <inner> </inner> </outer>", "<outer>\n  <inner> </inner>\n</outer>"},
1014 | 	}
1015 | 
1016 | 	for _, test := range tests {
1017 | 		doc := NewDocument()
1018 | 		err := doc.ReadFromString(test.input)
1019 | 		if err != nil {
1020 | 			t.Error("etree: failed to read string")
1021 | 		}
1022 | 
1023 | 		s := NewIndentSettings()
1024 | 		s.Spaces = 2
1025 | 		s.PreserveLeafWhitespace = true
1026 | 		s.SuppressTrailingWhitespace = true
1027 | 		doc.IndentWithSettings(s)
1028 | 
1029 | 		output, err := doc.WriteToString()
1030 | 		if err != nil {
1031 | 			t.Error("etree: failed to read string")
1032 | 		}
1033 | 		checkStrEq(t, output, test.expected)
1034 | 	}
1035 | }
1036 | 
1037 | func TestPreserveCData(t *testing.T) {
1038 | 	tests := []struct {
1039 | 		input                   string
1040 | 		expectedWithPreserve    string
1041 | 		expectedWithoutPreserve string
1042 | 	}{
1043 | 		{
1044 | 			"<test><![CDATA[x]]></test>",
1045 | 			"<test><![CDATA[x]]></test>",
1046 | 			"<test>x</test>",
1047 | 		},
1048 | 		{
1049 | 			"<tag><![CDATA[x <b>foo</b>]]></tag>",
1050 | 			"<tag><![CDATA[x <b>foo</b>]]></tag>",
1051 | 			"<tag>x &lt;b&gt;foo&lt;/b&gt;</tag>",
1052 | 		},
1053 | 		{
1054 | 			"<name><![CDATA[My]]> <b>name</b> <![CDATA[is]]></name>",
1055 | 			"<name><![CDATA[My]]> <b>name</b> <![CDATA[is]]></name>",
1056 | 			"<name>My <b>name</b> is</name>",
1057 | 		},
1058 | 	}
1059 | 
1060 | 	for _, test := range tests {
1061 | 		doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: true})
1062 | 		output, _ := doc.WriteToString()
1063 | 		checkStrEq(t, output, test.expectedWithPreserve)
1064 | 	}
1065 | 
1066 | 	for _, test := range tests {
1067 | 		doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: false})
1068 | 		output, _ := doc.WriteToString()
1069 | 		checkStrEq(t, output, test.expectedWithoutPreserve)
1070 | 	}
1071 | }
1072 | 
1073 | func TestTokenIndexing(t *testing.T) {
1074 | 	s := `<?xml version="1.0" encoding="UTF-8"?>
1075 | <?xml-stylesheet type="text/xsl" href="style.xsl"?>
1076 | <store xmlns:t="urn:books-com:titles">
1077 | 	<!Directive>
1078 | 	<!--This is a comment-->
1079 | 	<book lang="en">
1080 | 		<t:title>Great Expectations</t:title>
1081 | 		<author>Charles Dickens</author>
1082 | 		<review/>
1083 | 	</book>
1084 | </store>`
1085 | 
1086 | 	doc := newDocumentFromString(t, s)
1087 | 	review := doc.FindElement("/store/book/review")
1088 | 	review.SetText("Excellent")
1089 | 
1090 | 	checkIndexes(t, &doc.Element)
1091 | 
1092 | 	doc.Indent(4)
1093 | 	checkIndexes(t, &doc.Element)
1094 | 
1095 | 	doc.Indent(NoIndent)
1096 | 	checkIndexes(t, &doc.Element)
1097 | 
1098 | 	e := NewElement("foo")
1099 | 	store := doc.SelectElement("store")
1100 | 	store.InsertChildAt(0, e)
1101 | 	checkIndexes(t, &doc.Element)
1102 | 
1103 | 	store.RemoveChildAt(0)
1104 | 	checkIndexes(t, &doc.Element)
1105 | }
1106 | 
1107 | func TestSetText(t *testing.T) {
1108 | 	doc := NewDocument()
1109 | 	root := doc.CreateElement("root")
1110 | 
1111 | 	checkDocEq(t, doc, `<root/>`)
1112 | 	checkStrEq(t, root.Text(), "")
1113 | 	checkIntEq(t, len(root.Child), 0)
1114 | 
1115 | 	root.SetText("foo")
1116 | 	checkDocEq(t, doc, `<root>foo</root>`)
1117 | 	checkStrEq(t, root.Text(), "foo")
1118 | 	checkIntEq(t, len(root.Child), 1)
1119 | 
1120 | 	root.SetText("bar")
1121 | 	checkDocEq(t, doc, `<root>bar</root>`)
1122 | 	checkStrEq(t, root.Text(), "bar")
1123 | 	checkIntEq(t, len(root.Child), 1)
1124 | 
1125 | 	root.CreateCData("cdata")
1126 | 	checkDocEq(t, doc, `<root>bar<![CDATA[cdata]]></root>`)
1127 | 	checkStrEq(t, root.Text(), "barcdata")
1128 | 	checkIntEq(t, len(root.Child), 2)
1129 | 
1130 | 	root.SetText("qux")
1131 | 	checkDocEq(t, doc, `<root>qux</root>`)
1132 | 	checkStrEq(t, root.Text(), "qux")
1133 | 	checkIntEq(t, len(root.Child), 1)
1134 | 
1135 | 	root.CreateCData("cdata")
1136 | 	checkDocEq(t, doc, `<root>qux<![CDATA[cdata]]></root>`)
1137 | 	checkStrEq(t, root.Text(), "quxcdata")
1138 | 	checkIntEq(t, len(root.Child), 2)
1139 | 
1140 | 	root.SetCData("baz")
1141 | 	checkDocEq(t, doc, `<root><![CDATA[baz]]></root>`)
1142 | 	checkStrEq(t, root.Text(), "baz")
1143 | 	checkIntEq(t, len(root.Child), 1)
1144 | 
1145 | 	root.CreateText("corge")
1146 | 	root.CreateCData("grault")
1147 | 	root.CreateText("waldo")
1148 | 	root.CreateCData("fred")
1149 | 	root.CreateElement("child")
1150 | 	checkDocEq(t, doc, `<root><![CDATA[baz]]>corge<![CDATA[grault]]>waldo<![CDATA[fred]]><child/></root>`)
1151 | 	checkStrEq(t, root.Text(), "bazcorgegraultwaldofred")
1152 | 	checkIntEq(t, len(root.Child), 6)
1153 | 
1154 | 	root.SetText("plugh")
1155 | 	checkDocEq(t, doc, `<root>plugh<child/></root>`)
1156 | 	checkStrEq(t, root.Text(), "plugh")
1157 | 	checkIntEq(t, len(root.Child), 2)
1158 | 
1159 | 	root.SetText("")
1160 | 	checkDocEq(t, doc, `<root><child/></root>`)
1161 | 	checkStrEq(t, root.Text(), "")
1162 | 	checkIntEq(t, len(root.Child), 1)
1163 | 
1164 | 	root.SetText("")
1165 | 	checkDocEq(t, doc, `<root><child/></root>`)
1166 | 	checkStrEq(t, root.Text(), "")
1167 | 	checkIntEq(t, len(root.Child), 1)
1168 | 
1169 | 	root.RemoveChildAt(0)
1170 | 	root.CreateText("corge")
1171 | 	root.CreateCData("grault")
1172 | 	root.CreateText("waldo")
1173 | 	root.CreateCData("fred")
1174 | 	root.CreateElement("child")
1175 | 	checkDocEq(t, doc, `<root>corge<![CDATA[grault]]>waldo<![CDATA[fred]]><child/></root>`)
1176 | 	checkStrEq(t, root.Text(), "corgegraultwaldofred")
1177 | 	checkIntEq(t, len(root.Child), 5)
1178 | 
1179 | 	root.SetText("")
1180 | 	checkDocEq(t, doc, `<root><child/></root>`)
1181 | 	checkStrEq(t, root.Text(), "")
1182 | 	checkIntEq(t, len(root.Child), 1)
1183 | }
1184 | 
1185 | func TestSetTail(t *testing.T) {
1186 | 	doc := NewDocument()
1187 | 	root := doc.CreateElement("root")
1188 | 	child := root.CreateElement("child")
1189 | 	root.CreateText("\n\t")
1190 | 	child.SetText("foo")
1191 | 	checkDocEq(t, doc, "<root><child>foo</child>\n\t</root>")
1192 | 	checkStrEq(t, child.Tail(), "\n\t")
1193 | 	checkIntEq(t, len(root.Child), 2)
1194 | 	checkIntEq(t, len(child.Child), 1)
1195 | 
1196 | 	root.CreateCData("    ")
1197 | 	checkDocEq(t, doc, "<root><child>foo</child>\n\t<![CDATA[    ]]></root>")
1198 | 	checkStrEq(t, child.Tail(), "\n\t    ")
1199 | 	checkIntEq(t, len(root.Child), 3)
1200 | 	checkIntEq(t, len(child.Child), 1)
1201 | 
1202 | 	child.SetTail("")
1203 | 	checkDocEq(t, doc, "<root><child>foo</child></root>")
1204 | 	checkStrEq(t, child.Tail(), "")
1205 | 	checkIntEq(t, len(root.Child), 1)
1206 | 	checkIntEq(t, len(child.Child), 1)
1207 | 
1208 | 	child.SetTail("\t\t\t")
1209 | 	checkDocEq(t, doc, "<root><child>foo</child>\t\t\t</root>")
1210 | 	checkStrEq(t, child.Tail(), "\t\t\t")
1211 | 	checkIntEq(t, len(root.Child), 2)
1212 | 	checkIntEq(t, len(child.Child), 1)
1213 | 
1214 | 	child.SetTail("\t\n\n\t")
1215 | 	checkDocEq(t, doc, "<root><child>foo</child>\t\n\n\t</root>")
1216 | 	checkStrEq(t, child.Tail(), "\t\n\n\t")
1217 | 	checkIntEq(t, len(root.Child), 2)
1218 | 	checkIntEq(t, len(child.Child), 1)
1219 | 
1220 | 	child.SetTail("")
1221 | 	checkDocEq(t, doc, "<root><child>foo</child></root>")
1222 | 	checkStrEq(t, child.Tail(), "")
1223 | 	checkIntEq(t, len(root.Child), 1)
1224 | 	checkIntEq(t, len(child.Child), 1)
1225 | }
1226 | 
1227 | func TestAttrParent(t *testing.T) {
1228 | 	doc := NewDocument()
1229 | 	root := doc.CreateElement("root")
1230 | 	attr1 := root.CreateAttr("bar", "1")
1231 | 	attr2 := root.CreateAttr("qux", "2")
1232 | 
1233 | 	checkIntEq(t, len(root.Attr), 2)
1234 | 	checkElementEq(t, attr1.Element(), root)
1235 | 	checkElementEq(t, attr2.Element(), root)
1236 | 
1237 | 	attr1 = root.RemoveAttr("bar")
1238 | 	attr2 = root.RemoveAttr("qux")
1239 | 	checkElementEq(t, attr1.Element(), nil)
1240 | 	checkElementEq(t, attr2.Element(), nil)
1241 | 
1242 | 	s := `<root a="1" b="2" c="3" d="4"/>`
1243 | 	err := doc.ReadFromString(s)
1244 | 	if err != nil {
1245 | 		t.Error("etree: failed to parse document")
1246 | 	}
1247 | 
1248 | 	root = doc.SelectElement("root")
1249 | 	for i := range root.Attr {
1250 | 		checkElementEq(t, root.Attr[i].Element(), root)
1251 | 	}
1252 | }
1253 | 
1254 | func TestDefaultNamespaceURI(t *testing.T) {
1255 | 	s := `
1256 | <root xmlns="https://root.example.com" xmlns:attrib="https://attrib.example.com" attrib:a="foo" b="bar">
1257 | 	<child1 xmlns="https://child.example.com" attrib:a="foo">
1258 | 		<grandchild1 xmlns="https://grandchild.example.com" a="foo">
1259 | 		</grandchild1>
1260 | 		<grandchild2 a="foo">
1261 | 			<greatgrandchild1 attrib:a="foo"/>
1262 | 		</grandchild2>
1263 | 	</child1>
1264 | 	<child2 a="foo"/>
1265 | </root>`
1266 | 
1267 | 	doc := newDocumentFromString(t, s)
1268 | 	root := doc.SelectElement("root")
1269 | 	child1 := root.SelectElement("child1")
1270 | 	child2 := root.SelectElement("child2")
1271 | 	grandchild1 := child1.SelectElement("grandchild1")
1272 | 	grandchild2 := child1.SelectElement("grandchild2")
1273 | 	greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
1274 | 
1275 | 	checkStrEq(t, doc.NamespaceURI(), "")
1276 | 	checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
1277 | 	checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
1278 | 	checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
1279 | 	checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
1280 | 	checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
1281 | 	checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://child.example.com")
1282 | 
1283 | 	checkStrEq(t, root.Attr[0].NamespaceURI(), "")
1284 | 	checkStrEq(t, root.Attr[1].NamespaceURI(), "")
1285 | 	checkStrEq(t, root.Attr[2].NamespaceURI(), "https://attrib.example.com")
1286 | 	checkStrEq(t, root.Attr[3].NamespaceURI(), "")
1287 | 	checkStrEq(t, child1.Attr[0].NamespaceURI(), "")
1288 | 	checkStrEq(t, child1.Attr[1].NamespaceURI(), "https://attrib.example.com")
1289 | 	checkStrEq(t, child2.Attr[0].NamespaceURI(), "")
1290 | 	checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "")
1291 | 	checkStrEq(t, grandchild1.Attr[1].NamespaceURI(), "")
1292 | 	checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "")
1293 | 	checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "https://attrib.example.com")
1294 | 
1295 | 	f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
1296 | 	if len(f) != 2 || f[0] != root || f[1] != child2 {
1297 | 		t.Error("etree: failed namespace-uri test")
1298 | 	}
1299 | 
1300 | 	f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
1301 | 	if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 {
1302 | 		t.Error("etree: failed namespace-uri test")
1303 | 	}
1304 | 
1305 | 	f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
1306 | 	if len(f) != 1 || f[0] != grandchild1 {
1307 | 		t.Error("etree: failed namespace-uri test")
1308 | 	}
1309 | 
1310 | 	f = doc.FindElements("//*[namespace-uri()='']")
1311 | 	if len(f) != 0 {
1312 | 		t.Error("etree: failed namespace-uri test")
1313 | 	}
1314 | 
1315 | 	f = doc.FindElements("//*[namespace-uri()='foo']")
1316 | 	if len(f) != 0 {
1317 | 		t.Error("etree: failed namespace-uri test")
1318 | 	}
1319 | }
1320 | 
1321 | func TestLocalNamespaceURI(t *testing.T) {
1322 | 	s := `
1323 | <a:root xmlns:a="https://root.example.com">
1324 | 	<b:child1 xmlns:b="https://child.example.com">
1325 | 		<c:grandchild1 xmlns:c="https://grandchild.example.com"/>
1326 | 		<b:grandchild2>
1327 | 			<a:greatgrandchild1/>
1328 | 		</b:grandchild2>
1329 | 		<a:grandchild3/>
1330 | 		<grandchild4/>
1331 | 	</b:child1>
1332 | 	<a:child2>
1333 | 	</a:child2>
1334 | 	<child3>
1335 | 	</child3>
1336 | </a:root>`
1337 | 
1338 | 	doc := newDocumentFromString(t, s)
1339 | 	root := doc.SelectElement("root")
1340 | 	child1 := root.SelectElement("child1")
1341 | 	child2 := root.SelectElement("child2")
1342 | 	child3 := root.SelectElement("child3")
1343 | 	grandchild1 := child1.SelectElement("grandchild1")
1344 | 	grandchild2 := child1.SelectElement("grandchild2")
1345 | 	grandchild3 := child1.SelectElement("grandchild3")
1346 | 	grandchild4 := child1.SelectElement("grandchild4")
1347 | 	greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
1348 | 
1349 | 	checkStrEq(t, doc.NamespaceURI(), "")
1350 | 	checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
1351 | 	checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
1352 | 	checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
1353 | 	checkStrEq(t, child3.NamespaceURI(), "")
1354 | 	checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
1355 | 	checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
1356 | 	checkStrEq(t, grandchild3.NamespaceURI(), "https://root.example.com")
1357 | 	checkStrEq(t, grandchild4.NamespaceURI(), "")
1358 | 	checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://root.example.com")
1359 | 
1360 | 	f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
1361 | 	if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 {
1362 | 		t.Error("etree: failed namespace-uri test")
1363 | 	}
1364 | 
1365 | 	f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
1366 | 	if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 {
1367 | 		t.Error("etree: failed namespace-uri test")
1368 | 	}
1369 | 
1370 | 	f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
1371 | 	if len(f) != 1 || f[0] != grandchild1 {
1372 | 		t.Error("etree: failed namespace-uri test")
1373 | 	}
1374 | 
1375 | 	f = doc.FindElements("//*[namespace-uri()='']")
1376 | 	if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 {
1377 | 		t.Error("etree: failed namespace-uri test")
1378 | 	}
1379 | 
1380 | 	f = doc.FindElements("//*[namespace-uri()='foo']")
1381 | 	if len(f) != 0 {
1382 | 		t.Error("etree: failed namespace-uri test")
1383 | 	}
1384 | }
1385 | 
1386 | func TestWhitespace(t *testing.T) {
1387 | 	s := "<root>\n\t<child>\n\t\t<grandchild> x</grandchild>\n    </child>\n</root>"
1388 | 
1389 | 	doc := newDocumentFromString(t, s)
1390 | 	root := doc.Root()
1391 | 	checkIntEq(t, len(root.Child), 3)
1392 | 
1393 | 	cd := root.Child[0].(*CharData)
1394 | 	checkBoolEq(t, cd.IsWhitespace(), true)
1395 | 	checkStrBinaryEq(t, cd.Data, "\n\t")
1396 | 
1397 | 	cd = root.Child[2].(*CharData)
1398 | 	checkBoolEq(t, cd.IsWhitespace(), true)
1399 | 	checkStrBinaryEq(t, cd.Data, "\n")
1400 | 
1401 | 	child := root.SelectElement("child")
1402 | 	checkIntEq(t, len(child.Child), 3)
1403 | 
1404 | 	cd = child.Child[0].(*CharData)
1405 | 	checkBoolEq(t, cd.IsWhitespace(), true)
1406 | 	checkStrBinaryEq(t, cd.Data, "\n\t\t")
1407 | 
1408 | 	cd = child.Child[2].(*CharData)
1409 | 	checkBoolEq(t, cd.IsWhitespace(), true)
1410 | 	checkStrBinaryEq(t, cd.Data, "\n    ")
1411 | 
1412 | 	grandchild := child.SelectElement("grandchild")
1413 | 	checkIntEq(t, len(grandchild.Child), 1)
1414 | 
1415 | 	cd = grandchild.Child[0].(*CharData)
1416 | 	checkBoolEq(t, cd.IsWhitespace(), false)
1417 | 
1418 | 	cd.SetData(" ")
1419 | 	checkBoolEq(t, cd.IsWhitespace(), true)
1420 | 
1421 | 	cd.SetData("        x")
1422 | 	checkBoolEq(t, cd.IsWhitespace(), false)
1423 | 
1424 | 	cd.SetData("\t\n\r    ")
1425 | 	checkBoolEq(t, cd.IsWhitespace(), true)
1426 | 
1427 | 	cd.SetData("\uFFFD")
1428 | 	checkBoolEq(t, cd.IsWhitespace(), false)
1429 | 
1430 | 	cd.SetData("")
1431 | 	checkBoolEq(t, cd.IsWhitespace(), true)
1432 | }
1433 | 
1434 | func TestTokenWriteTo(t *testing.T) {
1435 | 	s := `<store>
1436 | 	<!-- comment -->
1437 | 	<book>
1438 | 		<title>Great Expectations</title>
1439 | 	</book>
1440 | </store>`
1441 | 	doc := newDocumentFromString(t, s)
1442 | 
1443 | 	writeSettings := WriteSettings{}
1444 | 	indentSettings := IndentSettings{UseTabs: true}
1445 | 
1446 | 	tests := []struct {
1447 | 		path     string
1448 | 		expected string
1449 | 	}{
1450 | 		{"//store", "<store>\n\t<!-- comment -->\n\t<book>\n\t\t<title>Great Expectations</title>\n\t</book>\n</store>"},
1451 | 		{"//store/book", "<book>\n\t<title>Great Expectations</title>\n</book>"},
1452 | 		{"//store/book/title", "<title>Great Expectations</title>"},
1453 | 	}
1454 | 	for _, test := range tests {
1455 | 		var buffer bytes.Buffer
1456 | 
1457 | 		c := doc.FindElement(test.path)
1458 | 		c.IndentWithSettings(&indentSettings)
1459 | 		c.WriteTo(&buffer, &writeSettings)
1460 | 		checkStrEq(t, buffer.String(), test.expected)
1461 | 	}
1462 | }
1463 | 
1464 | func TestReindexChildren(t *testing.T) {
1465 | 	s := `<root>
1466 | 	<child1/>
1467 | 	<child2/>
1468 | 	<child3/>
1469 | 	<child4/>
1470 | 	<child5/>
1471 | </root>`
1472 | 	doc := newDocumentFromString(t, s)
1473 | 	doc.Unindent()
1474 | 
1475 | 	root := doc.Root()
1476 | 	if root == nil || root.Tag != "root" || len(root.Child) != 5 {
1477 | 		t.Error("etree: expected root element not found")
1478 | 	}
1479 | 
1480 | 	for i := 0; i < len(root.Child); i++ {
1481 | 		if root.Child[i].Index() != i {
1482 | 			t.Error("etree: incorrect child index found in root element child")
1483 | 		}
1484 | 	}
1485 | 
1486 | 	rand.Shuffle(len(root.Child), func(i, j int) {
1487 | 		root.Child[i], root.Child[j] = root.Child[j], root.Child[i]
1488 | 	})
1489 | 
1490 | 	root.ReindexChildren()
1491 | 
1492 | 	for i := 0; i < len(root.Child); i++ {
1493 | 		if root.Child[i].Index() != i {
1494 | 			t.Error("etree: incorrect child index found in root element child")
1495 | 		}
1496 | 	}
1497 | }
1498 | 
1499 | func TestPreserveDuplicateAttrs(t *testing.T) {
1500 | 	s := `<element x="value1" y="value2" x="value3" x="value4" y="value5"/>`
1501 | 
1502 | 	checkAttrCount := func(e *Element, n int) {
1503 | 		if len(e.Attr) != n {
1504 | 			t.Errorf("etree: expected %d attributes, got %d", n, len(e.Attr))
1505 | 		}
1506 | 	}
1507 | 	checkAttr := func(e *Element, i int, key, value string) {
1508 | 		if i >= len(e.Attr) {
1509 | 			t.Errorf("etree: attr[%d] out of bounds", i)
1510 | 			return
1511 | 		}
1512 | 		if e.Attr[i].Key != key {
1513 | 			t.Errorf("etree: attr[%d] expected key %s, got %s", i, key, e.Attr[i].Key)
1514 | 		}
1515 | 		if e.Attr[i].Value != value {
1516 | 			t.Errorf("etree: attr[%d] expected value %s, got %s", i, value, e.Attr[i].Value)
1517 | 		}
1518 | 	}
1519 | 
1520 | 	t.Run("enabled", func(t *testing.T) {
1521 | 		doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: true})
1522 | 		e := doc.FindElement("element")
1523 | 		checkAttrCount(e, 5)
1524 | 		checkAttr(e, 0, "x", "value1")
1525 | 		checkAttr(e, 1, "y", "value2")
1526 | 		checkAttr(e, 2, "x", "value3")
1527 | 		checkAttr(e, 3, "x", "value4")
1528 | 		checkAttr(e, 4, "y", "value5")
1529 | 	})
1530 | 
1531 | 	t.Run("disabled", func(t *testing.T) {
1532 | 		doc := newDocumentFromString2(t, s, ReadSettings{})
1533 | 		e := doc.FindElement("element")
1534 | 		checkAttrCount(e, 2)
1535 | 		checkAttr(e, 0, "x", "value4")
1536 | 		checkAttr(e, 1, "y", "value5")
1537 | 	})
1538 | }
1539 | 
1540 | func TestNotNil(t *testing.T) {
1541 | 	s := `<enabled>true</enabled>`
1542 | 
1543 | 	doc := newDocumentFromString(t, s)
1544 | 	doc.SelectElement("enabled").NotNil().SetText("false")
1545 | 	doc.SelectElement("visible").NotNil().SetText("true")
1546 | 
1547 | 	want := `<enabled>false</enabled>`
1548 | 	got, err := doc.WriteToString()
1549 | 	if err != nil {
1550 | 		t.Fatal("etree: failed to write document to string")
1551 | 	}
1552 | 	if got != want {
1553 | 		t.Error("etree: unexpected NotNil result")
1554 | 		t.Error("wanted:\n" + want)
1555 | 		t.Error("got:\n" + got)
1556 | 	}
1557 | }
1558 | 
1559 | func TestValidateInput(t *testing.T) {
1560 | 	tests := []struct {
1561 | 		s   string
1562 | 		err string
1563 | 	}{
1564 | 		{`<root>x</root>`, ""},
1565 | 		{`<root/>`, ""},
1566 | 		{`<root>x`, `XML syntax error on line 1: unexpected EOF`},
1567 | 		{`</root><root>`, `XML syntax error on line 1: unexpected end element </root>`},
1568 | 		{`<>`, `XML syntax error on line 1: expected element name after <`},
1569 | 		{`<root>x</root>trailing`, "etree: invalid XML format"},
1570 | 		{`<root>x</root><`, "etree: invalid XML format"},
1571 | 		{`<root><child>x</child></root1>`, `XML syntax error on line 1: element <root> closed by </root1>`},
1572 | 	}
1573 | 
1574 | 	type readFunc func(doc *Document, s string) error
1575 | 	runTests := func(t *testing.T, read readFunc) {
1576 | 		for i, test := range tests {
1577 | 			doc := NewDocument()
1578 | 			doc.ReadSettings.ValidateInput = true
1579 | 			err := read(doc, test.s)
1580 | 			if err == nil {
1581 | 				if test.err != "" {
1582 | 					t.Errorf("etree: test #%d:\nExpected error:\n  %s\nReceived error:\n  nil", i, test.err)
1583 | 				}
1584 | 				root := doc.Root()
1585 | 				if root == nil || root.Tag != "root" {
1586 | 					t.Errorf("etree: test #%d: failed to read document after input validation", i)
1587 | 				}
1588 | 			} else {
1589 | 				te := err.Error()
1590 | 				if te != test.err {
1591 | 					t.Errorf("etree: test #%d:\nExpected error;\n  %s\nReceived error:\n  %s", i, test.err, te)
1592 | 				}
1593 | 			}
1594 | 		}
1595 | 	}
1596 | 
1597 | 	readFromString := func(doc *Document, s string) error {
1598 | 		return doc.ReadFromString(s)
1599 | 	}
1600 | 	t.Run("ReadFromString", func(t *testing.T) { runTests(t, readFromString) })
1601 | 
1602 | 	readFromBytes := func(doc *Document, s string) error {
1603 | 		return doc.ReadFromBytes([]byte(s))
1604 | 	}
1605 | 	t.Run("ReadFromBytes", func(t *testing.T) { runTests(t, readFromBytes) })
1606 | 
1607 | 	readFromFile := func(doc *Document, s string) error {
1608 | 		pathtmp := path.Join(t.TempDir(), "etree-test")
1609 | 		err := os.WriteFile(pathtmp, []byte(s), fs.ModePerm)
1610 | 		if err != nil {
1611 | 			return errors.New("unable to write tmp file for input validation")
1612 | 		}
1613 | 		return doc.ReadFromFile(pathtmp)
1614 | 	}
1615 | 	t.Run("ReadFromFile", func(t *testing.T) { runTests(t, readFromFile) })
1616 | }
1617 | 
1618 | func TestSiblingElement(t *testing.T) {
1619 | 	doc := newDocumentFromString(t, `<root><a/><b>  <b1/> </b> <!--test--> <c/></root>`)
1620 | 
1621 | 	root := doc.SelectElement("root")
1622 | 	a := root.SelectElement("a")
1623 | 	b := root.SelectElement("b")
1624 | 	c := root.SelectElement("c")
1625 | 	b1 := b.SelectElement("b1")
1626 | 
1627 | 	tests := []struct {
1628 | 		e    *Element
1629 | 		next *Element
1630 | 		prev *Element
1631 | 	}{
1632 | 		{root, nil, nil},
1633 | 		{a, b, nil},
1634 | 		{b, c, a},
1635 | 		{c, nil, b},
1636 | 		{b1, nil, nil},
1637 | 	}
1638 | 
1639 | 	toString := func(e *Element) string {
1640 | 		if e == nil {
1641 | 			return "nil"
1642 | 		}
1643 | 		return e.Tag
1644 | 	}
1645 | 
1646 | 	for i, test := range tests {
1647 | 		next := test.e.NextSibling()
1648 | 		if next != test.next {
1649 | 			t.Errorf("etree: test #%d unexpected NextSibling result.\n  Expected: %s\n  Received: %s\n",
1650 | 				i, toString(next), toString(test.next))
1651 | 		}
1652 | 
1653 | 		prev := test.e.PrevSibling()
1654 | 		if prev != test.prev {
1655 | 			t.Errorf("etree: test #%d unexpected PrevSibling result.\n  Expected: %s\n  Received: %s\n",
1656 | 				i, toString(prev), toString(test.prev))
1657 | 		}
1658 | 	}
1659 | }
1660 | 
1661 | func TestContinuations(t *testing.T) {
1662 | 	doc := NewDocument()
1663 | 	root := doc.CreateChild("root", func(e *Element) {
1664 | 		e.CreateChild("child1", func(e *Element) {
1665 | 			e.CreateComment("Grandchildren of child #1")
1666 | 			e.CreateChild("grandchild1", func(e *Element) {
1667 | 				e.CreateAttr("attr1", "1")
1668 | 				e.CreateAttr("attr2", "2")
1669 | 			})
1670 | 			e.CreateChild("grandchild2", func(e *Element) {
1671 | 				e.CreateAttr("attr1", "3")
1672 | 				e.CreateAttr("attr2", "4")
1673 | 			})
1674 | 		})
1675 | 		e.CreateChild("child2", func(e *Element) {
1676 | 			e.CreateComment("Grandchildren of child #2")
1677 | 			e.CreateChild("grandchild1", func(e *Element) {
1678 | 				e.CreateAttr("attr1", "5")
1679 | 				e.CreateAttr("attr2", "6")
1680 | 			})
1681 | 			e.CreateChild("grandchild2", func(e *Element) {
1682 | 				e.CreateAttr("attr1", "7")
1683 | 				e.CreateAttr("attr2", "8")
1684 | 			})
1685 | 		})
1686 | 	})
1687 | 	checkStrEq(t, root.Tag, "root")
1688 | 
1689 | 	// Serialize the document to a string
1690 | 	doc.IndentTabs()
1691 | 	s, err := doc.WriteToString()
1692 | 	if err != nil {
1693 | 		t.Error("etree: failed to serialize document")
1694 | 	}
1695 | 
1696 | 	// Make sure the serialized XML matches expectation.
1697 | 	expected := `<root>
1698 | 	<child1>
1699 | 		<!--Grandchildren of child #1-->
1700 | 		<grandchild1 attr1="1" attr2="2"/>
1701 | 		<grandchild2 attr1="3" attr2="4"/>
1702 | 	</child1>
1703 | 	<child2>
1704 | 		<!--Grandchildren of child #2-->
1705 | 		<grandchild1 attr1="5" attr2="6"/>
1706 | 		<grandchild2 attr1="7" attr2="8"/>
1707 | 	</child2>
1708 | </root>
1709 | `
1710 | 
1711 | 	checkStrEq(t, s, expected)
1712 | }
1713 | 


--------------------------------------------------------------------------------
/example_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015-2019 Brett Vickers.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package etree
 6 | 
 7 | import "os"
 8 | 
 9 | // Create an etree Document, add XML entities to it, and serialize it
10 | // to stdout.
11 | func ExampleDocument_creating() {
12 | 	doc := NewDocument()
13 | 	doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
14 | 	doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
15 | 
16 | 	people := doc.CreateElement("People")
17 | 	people.CreateComment("These are all known people")
18 | 
19 | 	jon := people.CreateElement("Person")
20 | 	jon.CreateAttr("name", "Jon O'Reilly")
21 | 
22 | 	sally := people.CreateElement("Person")
23 | 	sally.CreateAttr("name", "Sally")
24 | 
25 | 	doc.Indent(2)
26 | 	doc.WriteTo(os.Stdout)
27 | 	// Output:
28 | 	// <?xml version="1.0" encoding="UTF-8"?>
29 | 	// <?xml-stylesheet type="text/xsl" href="style.xsl"?>
30 | 	// <People>
31 | 	//   <!--These are all known people-->
32 | 	//   <Person name="Jon O&apos;Reilly"/>
33 | 	//   <Person name="Sally"/>
34 | 	// </People>
35 | }
36 | 
37 | func ExampleDocument_reading() {
38 | 	doc := NewDocument()
39 | 	if err := doc.ReadFromFile("document.xml"); err != nil {
40 | 		panic(err)
41 | 	}
42 | }
43 | 
44 | func ExamplePath() {
45 | 	xml := `
46 | <bookstore>
47 | 	<book>
48 | 		<title>Great Expectations</title>
49 | 		<author>Charles Dickens</author>
50 | 	</book>
51 | 	<book>
52 | 		<title>Ulysses</title>
53 | 		<author>James Joyce</author>
54 | 	</book>
55 | </bookstore>`
56 | 
57 | 	doc := NewDocument()
58 | 	doc.ReadFromString(xml)
59 | 	for _, e := range doc.FindElements(".//book[author='Charles Dickens']") {
60 | 		doc := NewDocumentWithRoot(e.Copy())
61 | 		doc.Indent(2)
62 | 		doc.WriteTo(os.Stdout)
63 | 	}
64 | 	// Output:
65 | 	// <book>
66 | 	//   <title>Great Expectations</title>
67 | 	//   <author>Charles Dickens</author>
68 | 	// </book>
69 | }
70 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/beevik/etree
2 | 
3 | go 1.21.0
4 | 


--------------------------------------------------------------------------------
/helpers.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015-2019 Brett Vickers.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package etree
  6 | 
  7 | import (
  8 | 	"io"
  9 | 	"strings"
 10 | 	"unicode/utf8"
 11 | )
 12 | 
 13 | type stack[E any] struct {
 14 | 	data []E
 15 | }
 16 | 
 17 | func (s *stack[E]) empty() bool {
 18 | 	return len(s.data) == 0
 19 | }
 20 | 
 21 | func (s *stack[E]) push(value E) {
 22 | 	s.data = append(s.data, value)
 23 | }
 24 | 
 25 | func (s *stack[E]) pop() E {
 26 | 	value := s.data[len(s.data)-1]
 27 | 	var empty E
 28 | 	s.data[len(s.data)-1] = empty
 29 | 	s.data = s.data[:len(s.data)-1]
 30 | 	return value
 31 | }
 32 | 
 33 | func (s *stack[E]) peek() E {
 34 | 	return s.data[len(s.data)-1]
 35 | }
 36 | 
 37 | type queue[E any] struct {
 38 | 	data       []E
 39 | 	head, tail int
 40 | }
 41 | 
 42 | func (f *queue[E]) add(value E) {
 43 | 	if f.len()+1 >= len(f.data) {
 44 | 		f.grow()
 45 | 	}
 46 | 	f.data[f.tail] = value
 47 | 	if f.tail++; f.tail == len(f.data) {
 48 | 		f.tail = 0
 49 | 	}
 50 | }
 51 | 
 52 | func (f *queue[E]) remove() E {
 53 | 	value := f.data[f.head]
 54 | 	var empty E
 55 | 	f.data[f.head] = empty
 56 | 	if f.head++; f.head == len(f.data) {
 57 | 		f.head = 0
 58 | 	}
 59 | 	return value
 60 | }
 61 | 
 62 | func (f *queue[E]) len() int {
 63 | 	if f.tail >= f.head {
 64 | 		return f.tail - f.head
 65 | 	}
 66 | 	return len(f.data) - f.head + f.tail
 67 | }
 68 | 
 69 | func (f *queue[E]) grow() {
 70 | 	c := len(f.data) * 2
 71 | 	if c == 0 {
 72 | 		c = 4
 73 | 	}
 74 | 	buf, count := make([]E, c), f.len()
 75 | 	if f.tail >= f.head {
 76 | 		copy(buf[:count], f.data[f.head:f.tail])
 77 | 	} else {
 78 | 		hindex := len(f.data) - f.head
 79 | 		copy(buf[:hindex], f.data[f.head:])
 80 | 		copy(buf[hindex:count], f.data[:f.tail])
 81 | 	}
 82 | 	f.data, f.head, f.tail = buf, 0, count
 83 | }
 84 | 
 85 | // xmlReader provides the interface by which an XML byte stream is
 86 | // processed and decoded.
 87 | type xmlReader interface {
 88 | 	Bytes() int64
 89 | 	Read(p []byte) (n int, err error)
 90 | }
 91 | 
 92 | // xmlSimpleReader implements a proxy reader that counts the number of
 93 | // bytes read from its encapsulated reader.
 94 | type xmlSimpleReader struct {
 95 | 	r     io.Reader
 96 | 	bytes int64
 97 | }
 98 | 
 99 | func newXmlSimpleReader(r io.Reader) xmlReader {
100 | 	return &xmlSimpleReader{r, 0}
101 | }
102 | 
103 | func (xr *xmlSimpleReader) Bytes() int64 {
104 | 	return xr.bytes
105 | }
106 | 
107 | func (xr *xmlSimpleReader) Read(p []byte) (n int, err error) {
108 | 	n, err = xr.r.Read(p)
109 | 	xr.bytes += int64(n)
110 | 	return n, err
111 | }
112 | 
113 | // xmlPeekReader implements a proxy reader that counts the number of
114 | // bytes read from its encapsulated reader. It also allows the caller to
115 | // "peek" at the previous portions of the buffer after they have been
116 | // parsed.
117 | type xmlPeekReader struct {
118 | 	r          io.Reader
119 | 	bytes      int64  // total bytes read by the Read function
120 | 	buf        []byte // internal read buffer
121 | 	bufSize    int    // total bytes used in the read buffer
122 | 	bufOffset  int64  // total bytes read when buf was last filled
123 | 	window     []byte // current read buffer window
124 | 	peekBuf    []byte // buffer used to store data to be peeked at later
125 | 	peekOffset int64  // total read offset of the start of the peek buffer
126 | }
127 | 
128 | func newXmlPeekReader(r io.Reader) *xmlPeekReader {
129 | 	buf := make([]byte, 4096)
130 | 	return &xmlPeekReader{
131 | 		r:          r,
132 | 		bytes:      0,
133 | 		buf:        buf,
134 | 		bufSize:    0,
135 | 		bufOffset:  0,
136 | 		window:     buf[0:0],
137 | 		peekBuf:    make([]byte, 0),
138 | 		peekOffset: -1,
139 | 	}
140 | }
141 | 
142 | func (xr *xmlPeekReader) Bytes() int64 {
143 | 	return xr.bytes
144 | }
145 | 
146 | func (xr *xmlPeekReader) Read(p []byte) (n int, err error) {
147 | 	if len(xr.window) == 0 {
148 | 		err = xr.fill()
149 | 		if err != nil {
150 | 			return 0, err
151 | 		}
152 | 		if len(xr.window) == 0 {
153 | 			return 0, nil
154 | 		}
155 | 	}
156 | 
157 | 	if len(xr.window) < len(p) {
158 | 		n = len(xr.window)
159 | 	} else {
160 | 		n = len(p)
161 | 	}
162 | 
163 | 	copy(p, xr.window)
164 | 	xr.window = xr.window[n:]
165 | 	xr.bytes += int64(n)
166 | 
167 | 	return n, err
168 | }
169 | 
170 | func (xr *xmlPeekReader) PeekPrepare(offset int64, maxLen int) {
171 | 	if maxLen > cap(xr.peekBuf) {
172 | 		xr.peekBuf = make([]byte, 0, maxLen)
173 | 	}
174 | 	xr.peekBuf = xr.peekBuf[0:0]
175 | 	xr.peekOffset = offset
176 | 	xr.updatePeekBuf()
177 | }
178 | 
179 | func (xr *xmlPeekReader) PeekFinalize() []byte {
180 | 	xr.updatePeekBuf()
181 | 	return xr.peekBuf
182 | }
183 | 
184 | func (xr *xmlPeekReader) fill() error {
185 | 	xr.bufOffset = xr.bytes
186 | 	xr.bufSize = 0
187 | 	n, err := xr.r.Read(xr.buf)
188 | 	if err != nil {
189 | 		xr.window, xr.bufSize = xr.buf[0:0], 0
190 | 		return err
191 | 	}
192 | 	xr.window, xr.bufSize = xr.buf[:n], n
193 | 	xr.updatePeekBuf()
194 | 	return nil
195 | }
196 | 
197 | func (xr *xmlPeekReader) updatePeekBuf() {
198 | 	peekRemain := cap(xr.peekBuf) - len(xr.peekBuf)
199 | 	if xr.peekOffset >= 0 && peekRemain > 0 {
200 | 		rangeMin := xr.peekOffset
201 | 		rangeMax := xr.peekOffset + int64(cap(xr.peekBuf))
202 | 		bufMin := xr.bufOffset
203 | 		bufMax := xr.bufOffset + int64(xr.bufSize)
204 | 		if rangeMin < bufMin {
205 | 			rangeMin = bufMin
206 | 		}
207 | 		if rangeMax > bufMax {
208 | 			rangeMax = bufMax
209 | 		}
210 | 		if rangeMax > rangeMin {
211 | 			rangeMin -= xr.bufOffset
212 | 			rangeMax -= xr.bufOffset
213 | 			if int(rangeMax-rangeMin) > peekRemain {
214 | 				rangeMax = rangeMin + int64(peekRemain)
215 | 			}
216 | 			xr.peekBuf = append(xr.peekBuf, xr.buf[rangeMin:rangeMax]...)
217 | 		}
218 | 	}
219 | }
220 | 
221 | // xmlWriter implements a proxy writer that counts the number of
222 | // bytes written by its encapsulated writer.
223 | type xmlWriter struct {
224 | 	w     io.Writer
225 | 	bytes int64
226 | }
227 | 
228 | func newXmlWriter(w io.Writer) *xmlWriter {
229 | 	return &xmlWriter{w: w}
230 | }
231 | 
232 | func (xw *xmlWriter) Write(p []byte) (n int, err error) {
233 | 	n, err = xw.w.Write(p)
234 | 	xw.bytes += int64(n)
235 | 	return n, err
236 | }
237 | 
238 | // isWhitespace returns true if the byte slice contains only
239 | // whitespace characters.
240 | func isWhitespace(s string) bool {
241 | 	for i := 0; i < len(s); i++ {
242 | 		if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
243 | 			return false
244 | 		}
245 | 	}
246 | 	return true
247 | }
248 | 
249 | // spaceMatch returns true if namespace a is the empty string
250 | // or if namespace a equals namespace b.
251 | func spaceMatch(a, b string) bool {
252 | 	switch {
253 | 	case a == "":
254 | 		return true
255 | 	default:
256 | 		return a == b
257 | 	}
258 | }
259 | 
260 | // spaceDecompose breaks a namespace:tag identifier at the ':'
261 | // and returns the two parts.
262 | func spaceDecompose(str string) (space, key string) {
263 | 	colon := strings.IndexByte(str, ':')
264 | 	if colon == -1 {
265 | 		return "", str
266 | 	}
267 | 	return str[:colon], str[colon+1:]
268 | }
269 | 
270 | // Strings used by indentCRLF and indentLF
271 | const (
272 | 	indentSpaces = "\r\n                                                                "
273 | 	indentTabs   = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
274 | )
275 | 
276 | // indentCRLF returns a CRLF newline followed by n copies of the first
277 | // non-CRLF character in the source string.
278 | func indentCRLF(n int, source string) string {
279 | 	switch {
280 | 	case n < 0:
281 | 		return source[:2]
282 | 	case n < len(source)-1:
283 | 		return source[:n+2]
284 | 	default:
285 | 		return source + strings.Repeat(source[2:3], n-len(source)+2)
286 | 	}
287 | }
288 | 
289 | // indentLF returns a LF newline followed by n copies of the first non-LF
290 | // character in the source string.
291 | func indentLF(n int, source string) string {
292 | 	switch {
293 | 	case n < 0:
294 | 		return source[1:2]
295 | 	case n < len(source)-1:
296 | 		return source[1 : n+2]
297 | 	default:
298 | 		return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
299 | 	}
300 | }
301 | 
302 | // nextIndex returns the index of the next occurrence of byte ch in s,
303 | // starting from offset.  It returns -1 if the byte is not found.
304 | func nextIndex(s string, ch byte, offset int) int {
305 | 	switch i := strings.IndexByte(s[offset:], ch); i {
306 | 	case -1:
307 | 		return -1
308 | 	default:
309 | 		return offset + i
310 | 	}
311 | }
312 | 
313 | // isInteger returns true if the string s contains an integer.
314 | func isInteger(s string) bool {
315 | 	for i := 0; i < len(s); i++ {
316 | 		if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
317 | 			return false
318 | 		}
319 | 	}
320 | 	return true
321 | }
322 | 
323 | type escapeMode byte
324 | 
325 | const (
326 | 	escapeNormal escapeMode = iota
327 | 	escapeCanonicalText
328 | 	escapeCanonicalAttr
329 | )
330 | 
331 | // escapeString writes an escaped version of a string to the writer.
332 | func escapeString(w Writer, s string, m escapeMode) {
333 | 	var esc []byte
334 | 	last := 0
335 | 	for i := 0; i < len(s); {
336 | 		r, width := utf8.DecodeRuneInString(s[i:])
337 | 		i += width
338 | 		switch r {
339 | 		case '&':
340 | 			esc = []byte("&amp;")
341 | 		case '<':
342 | 			esc = []byte("&lt;")
343 | 		case '>':
344 | 			if m == escapeCanonicalAttr {
345 | 				continue
346 | 			}
347 | 			esc = []byte("&gt;")
348 | 		case '\'':
349 | 			if m != escapeNormal {
350 | 				continue
351 | 			}
352 | 			esc = []byte("&apos;")
353 | 		case '"':
354 | 			if m == escapeCanonicalText {
355 | 				continue
356 | 			}
357 | 			esc = []byte("&quot;")
358 | 		case '\t':
359 | 			if m != escapeCanonicalAttr {
360 | 				continue
361 | 			}
362 | 			esc = []byte("&#x9;")
363 | 		case '\n':
364 | 			if m != escapeCanonicalAttr {
365 | 				continue
366 | 			}
367 | 			esc = []byte("&#xA;")
368 | 		case '\r':
369 | 			if m == escapeNormal {
370 | 				continue
371 | 			}
372 | 			esc = []byte("&#xD;")
373 | 		default:
374 | 			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
375 | 				esc = []byte("\uFFFD")
376 | 				break
377 | 			}
378 | 			continue
379 | 		}
380 | 		w.WriteString(s[last : i-width])
381 | 		w.Write(esc)
382 | 		last = i
383 | 	}
384 | 	w.WriteString(s[last:])
385 | }
386 | 
387 | func isInCharacterRange(r rune) bool {
388 | 	return r == 0x09 ||
389 | 		r == 0x0A ||
390 | 		r == 0x0D ||
391 | 		r >= 0x20 && r <= 0xD7FF ||
392 | 		r >= 0xE000 && r <= 0xFFFD ||
393 | 		r >= 0x10000 && r <= 0x10FFFF
394 | }
395 | 


--------------------------------------------------------------------------------
/path.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015-2019 Brett Vickers.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package etree
  6 | 
  7 | import (
  8 | 	"strconv"
  9 | 	"strings"
 10 | )
 11 | 
 12 | /*
 13 | A Path is a string that represents a search path through an etree starting
 14 | from the document root or an arbitrary element. Paths are used with the
 15 | Element object's Find* methods to locate and return desired elements.
 16 | 
 17 | A Path consists of a series of slash-separated "selectors", each of which may
 18 | be modified by one or more bracket-enclosed "filters". Selectors are used to
 19 | traverse the etree from element to element, while filters are used to narrow
 20 | the list of candidate elements at each node.
 21 | 
 22 | Although etree Path strings are structurally and behaviorally similar to XPath
 23 | strings (https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more
 24 | limited set of selectors and filtering options.
 25 | 
 26 | The following selectors are supported by etree paths:
 27 | 
 28 | 	.               Select the current element.
 29 | 	..              Select the parent of the current element.
 30 | 	*               Select all child elements of the current element.
 31 | 	/               Select the root element when used at the start of a path.
 32 | 	//              Select all descendants of the current element.
 33 | 	tag             Select all child elements with a name matching the tag.
 34 | 
 35 | The following basic filters are supported:
 36 | 
 37 | 	[@attrib]       Keep elements with an attribute named attrib.
 38 | 	[@attrib='val'] Keep elements with an attribute named attrib and value matching val.
 39 | 	[tag]           Keep elements with a child element named tag.
 40 | 	[tag='val']     Keep elements with a child element named tag and text matching val.
 41 | 	[n]             Keep the n-th element, where n is a numeric index starting from 1.
 42 | 
 43 | The following function-based filters are supported:
 44 | 
 45 | 	[text()]                    Keep elements with non-empty text.
 46 | 	[text()='val']              Keep elements whose text matches val.
 47 | 	[local-name()='val']        Keep elements whose un-prefixed tag matches val.
 48 | 	[name()='val']              Keep elements whose full tag exactly matches val.
 49 | 	[namespace-prefix()]        Keep elements with non-empty namespace prefixes.
 50 | 	[namespace-prefix()='val']  Keep elements whose namespace prefix matches val.
 51 | 	[namespace-uri()]           Keep elements with non-empty namespace URIs.
 52 | 	[namespace-uri()='val']     Keep elements whose namespace URI matches val.
 53 | 
 54 | Below are some examples of etree path strings.
 55 | 
 56 | Select the bookstore child element of the root element:
 57 | 
 58 | 	/bookstore
 59 | 
 60 | Beginning from the root element, select the title elements of all descendant
 61 | book elements having a 'category' attribute of 'WEB':
 62 | 
 63 | 	//book[@category='WEB']/title
 64 | 
 65 | Beginning from the current element, select the first descendant book element
 66 | with a title child element containing the text 'Great Expectations':
 67 | 
 68 | 	.//book[title='Great Expectations'][1]
 69 | 
 70 | Beginning from the current element, select all child elements of book elements
 71 | with an attribute 'language' set to 'english':
 72 | 
 73 | 	./book/*[@language='english']
 74 | 
 75 | Beginning from the current element, select all child elements of book elements
 76 | containing the text 'special':
 77 | 
 78 | 	./book/*[text()='special']
 79 | 
 80 | Beginning from the current element, select all descendant book elements whose
 81 | title child element has a 'language' attribute of 'french':
 82 | 
 83 | 	.//book/title[@language='french']/..
 84 | 
 85 | Beginning from the current element, select all descendant book elements
 86 | belonging to the http://www.w3.org/TR/html4/ namespace:
 87 | 
 88 | 	.//book[namespace-uri()='http://www.w3.org/TR/html4/']
 89 | */
 90 | type Path struct {
 91 | 	segments []segment
 92 | }
 93 | 
 94 | // ErrPath is returned by path functions when an invalid etree path is provided.
 95 | type ErrPath string
 96 | 
 97 | // Error returns the string describing a path error.
 98 | func (err ErrPath) Error() string {
 99 | 	return "etree: " + string(err)
100 | }
101 | 
102 | // CompilePath creates an optimized version of an XPath-like string that
103 | // can be used to query elements in an element tree.
104 | func CompilePath(path string) (Path, error) {
105 | 	var comp compiler
106 | 	segments := comp.parsePath(path)
107 | 	if comp.err != ErrPath("") {
108 | 		return Path{nil}, comp.err
109 | 	}
110 | 	return Path{segments}, nil
111 | }
112 | 
113 | // MustCompilePath creates an optimized version of an XPath-like string that
114 | // can be used to query elements in an element tree.  Panics if an error
115 | // occurs.  Use this function to create Paths when you know the path is
116 | // valid (i.e., if it's hard-coded).
117 | func MustCompilePath(path string) Path {
118 | 	p, err := CompilePath(path)
119 | 	if err != nil {
120 | 		panic(err)
121 | 	}
122 | 	return p
123 | }
124 | 
125 | // A segment is a portion of a path between "/" characters.
126 | // It contains one selector and zero or more [filters].
127 | type segment struct {
128 | 	sel     selector
129 | 	filters []filter
130 | }
131 | 
132 | func (seg *segment) apply(e *Element, p *pather) {
133 | 	seg.sel.apply(e, p)
134 | 	for _, f := range seg.filters {
135 | 		f.apply(p)
136 | 	}
137 | }
138 | 
139 | // A selector selects XML elements for consideration by the
140 | // path traversal.
141 | type selector interface {
142 | 	apply(e *Element, p *pather)
143 | }
144 | 
145 | // A filter pares down a list of candidate XML elements based
146 | // on a path filter in [brackets].
147 | type filter interface {
148 | 	apply(p *pather)
149 | }
150 | 
151 | // A pather is helper object that traverses an element tree using
152 | // a Path object.  It collects and deduplicates all elements matching
153 | // the path query.
154 | type pather struct {
155 | 	queue      queue[node]
156 | 	results    []*Element
157 | 	inResults  map[*Element]bool
158 | 	candidates []*Element
159 | 	scratch    []*Element // used by filters
160 | }
161 | 
162 | // A node represents an element and the remaining path segments that
163 | // should be applied against it by the pather.
164 | type node struct {
165 | 	e        *Element
166 | 	segments []segment
167 | }
168 | 
169 | func newPather() *pather {
170 | 	return &pather{
171 | 		results:    make([]*Element, 0),
172 | 		inResults:  make(map[*Element]bool),
173 | 		candidates: make([]*Element, 0),
174 | 		scratch:    make([]*Element, 0),
175 | 	}
176 | }
177 | 
178 | // traverse follows the path from the element e, collecting
179 | // and then returning all elements that match the path's selectors
180 | // and filters.
181 | func (p *pather) traverse(e *Element, path Path) []*Element {
182 | 	for p.queue.add(node{e, path.segments}); p.queue.len() > 0; {
183 | 		p.eval(p.queue.remove())
184 | 	}
185 | 	return p.results
186 | }
187 | 
188 | // eval evaluates the current path node by applying the remaining
189 | // path's selector rules against the node's element.
190 | func (p *pather) eval(n node) {
191 | 	p.candidates = p.candidates[0:0]
192 | 	seg, remain := n.segments[0], n.segments[1:]
193 | 	seg.apply(n.e, p)
194 | 
195 | 	if len(remain) == 0 {
196 | 		for _, c := range p.candidates {
197 | 			if in := p.inResults[c]; !in {
198 | 				p.inResults[c] = true
199 | 				p.results = append(p.results, c)
200 | 			}
201 | 		}
202 | 	} else {
203 | 		for _, c := range p.candidates {
204 | 			p.queue.add(node{c, remain})
205 | 		}
206 | 	}
207 | }
208 | 
209 | // A compiler generates a compiled path from a path string.
210 | type compiler struct {
211 | 	err ErrPath
212 | }
213 | 
214 | // parsePath parses an XPath-like string describing a path
215 | // through an element tree and returns a slice of segment
216 | // descriptors.
217 | func (c *compiler) parsePath(path string) []segment {
218 | 	// If path ends with //, fix it
219 | 	if strings.HasSuffix(path, "//") {
220 | 		path += "*"
221 | 	}
222 | 
223 | 	var segments []segment
224 | 
225 | 	// Check for an absolute path
226 | 	if strings.HasPrefix(path, "/") {
227 | 		segments = append(segments, segment{new(selectRoot), []filter{}})
228 | 		path = path[1:]
229 | 	}
230 | 
231 | 	// Split path into segments
232 | 	for _, s := range splitPath(path) {
233 | 		segments = append(segments, c.parseSegment(s))
234 | 		if c.err != ErrPath("") {
235 | 			break
236 | 		}
237 | 	}
238 | 	return segments
239 | }
240 | 
241 | func splitPath(path string) []string {
242 | 	var pieces []string
243 | 	start := 0
244 | 	inquote := false
245 | 	var quote byte
246 | 	for i := 0; i+1 <= len(path); i++ {
247 | 		if !inquote {
248 | 			if path[i] == '\'' || path[i] == '"' {
249 | 				inquote, quote = true, path[i]
250 | 			} else if path[i] == '/' {
251 | 				pieces = append(pieces, path[start:i])
252 | 				start = i + 1
253 | 			}
254 | 		} else if path[i] == quote {
255 | 			inquote = false
256 | 		}
257 | 	}
258 | 	return append(pieces, path[start:])
259 | }
260 | 
261 | // parseSegment parses a path segment between / characters.
262 | func (c *compiler) parseSegment(path string) segment {
263 | 	pieces := strings.Split(path, "[")
264 | 	seg := segment{
265 | 		sel:     c.parseSelector(pieces[0]),
266 | 		filters: []filter{},
267 | 	}
268 | 	for i := 1; i < len(pieces); i++ {
269 | 		fpath := pieces[i]
270 | 		if len(fpath) == 0 || fpath[len(fpath)-1] != ']' {
271 | 			c.err = ErrPath("path has invalid filter [brackets].")
272 | 			break
273 | 		}
274 | 		seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1]))
275 | 	}
276 | 	return seg
277 | }
278 | 
279 | // parseSelector parses a selector at the start of a path segment.
280 | func (c *compiler) parseSelector(path string) selector {
281 | 	switch path {
282 | 	case ".":
283 | 		return new(selectSelf)
284 | 	case "..":
285 | 		return new(selectParent)
286 | 	case "*":
287 | 		return new(selectChildren)
288 | 	case "":
289 | 		return new(selectDescendants)
290 | 	default:
291 | 		return newSelectChildrenByTag(path)
292 | 	}
293 | }
294 | 
295 | var fnTable = map[string]func(e *Element) string{
296 | 	"local-name":       (*Element).name,
297 | 	"name":             (*Element).FullTag,
298 | 	"namespace-prefix": (*Element).namespacePrefix,
299 | 	"namespace-uri":    (*Element).NamespaceURI,
300 | 	"text":             (*Element).Text,
301 | }
302 | 
303 | // parseFilter parses a path filter contained within [brackets].
304 | func (c *compiler) parseFilter(path string) filter {
305 | 	if len(path) == 0 {
306 | 		c.err = ErrPath("path contains an empty filter expression.")
307 | 		return nil
308 | 	}
309 | 
310 | 	// Filter contains [@attr='val'], [@attr="val"], [fn()='val'],
311 | 	// [fn()="val"], [tag='val'] or [tag="val"]?
312 | 	eqindex := strings.IndexByte(path, '=')
313 | 	if eqindex >= 0 && eqindex+1 < len(path) {
314 | 		quote := path[eqindex+1]
315 | 		if quote == '\'' || quote == '"' {
316 | 			rindex := nextIndex(path, quote, eqindex+2)
317 | 			if rindex != len(path)-1 {
318 | 				c.err = ErrPath("path has mismatched filter quotes.")
319 | 				return nil
320 | 			}
321 | 
322 | 			key := path[:eqindex]
323 | 			value := path[eqindex+2 : rindex]
324 | 
325 | 			switch {
326 | 			case key[0] == '@':
327 | 				return newFilterAttrVal(key[1:], value)
328 | 			case strings.HasSuffix(key, "()"):
329 | 				name := key[:len(key)-2]
330 | 				if fn, ok := fnTable[name]; ok {
331 | 					return newFilterFuncVal(fn, value)
332 | 				}
333 | 				c.err = ErrPath("path has unknown function " + name)
334 | 				return nil
335 | 			default:
336 | 				return newFilterChildText(key, value)
337 | 			}
338 | 		}
339 | 	}
340 | 
341 | 	// Filter contains [@attr], [N], [tag] or [fn()]
342 | 	switch {
343 | 	case path[0] == '@':
344 | 		return newFilterAttr(path[1:])
345 | 	case strings.HasSuffix(path, "()"):
346 | 		name := path[:len(path)-2]
347 | 		if fn, ok := fnTable[name]; ok {
348 | 			return newFilterFunc(fn)
349 | 		}
350 | 		c.err = ErrPath("path has unknown function " + name)
351 | 		return nil
352 | 	case isInteger(path):
353 | 		pos, _ := strconv.Atoi(path)
354 | 		switch {
355 | 		case pos > 0:
356 | 			return newFilterPos(pos - 1)
357 | 		default:
358 | 			return newFilterPos(pos)
359 | 		}
360 | 	default:
361 | 		return newFilterChild(path)
362 | 	}
363 | }
364 | 
365 | // selectSelf selects the current element into the candidate list.
366 | type selectSelf struct{}
367 | 
368 | func (s *selectSelf) apply(e *Element, p *pather) {
369 | 	p.candidates = append(p.candidates, e)
370 | }
371 | 
372 | // selectRoot selects the element's root node.
373 | type selectRoot struct{}
374 | 
375 | func (s *selectRoot) apply(e *Element, p *pather) {
376 | 	root := e
377 | 	for root.parent != nil {
378 | 		root = root.parent
379 | 	}
380 | 	p.candidates = append(p.candidates, root)
381 | }
382 | 
383 | // selectParent selects the element's parent into the candidate list.
384 | type selectParent struct{}
385 | 
386 | func (s *selectParent) apply(e *Element, p *pather) {
387 | 	if e.parent != nil {
388 | 		p.candidates = append(p.candidates, e.parent)
389 | 	}
390 | }
391 | 
392 | // selectChildren selects the element's child elements into the
393 | // candidate list.
394 | type selectChildren struct{}
395 | 
396 | func (s *selectChildren) apply(e *Element, p *pather) {
397 | 	for _, c := range e.Child {
398 | 		if c, ok := c.(*Element); ok {
399 | 			p.candidates = append(p.candidates, c)
400 | 		}
401 | 	}
402 | }
403 | 
404 | // selectDescendants selects all descendant child elements
405 | // of the element into the candidate list.
406 | type selectDescendants struct{}
407 | 
408 | func (s *selectDescendants) apply(e *Element, p *pather) {
409 | 	var queue queue[*Element]
410 | 	for queue.add(e); queue.len() > 0; {
411 | 		e := queue.remove()
412 | 		p.candidates = append(p.candidates, e)
413 | 		for _, c := range e.Child {
414 | 			if c, ok := c.(*Element); ok {
415 | 				queue.add(c)
416 | 			}
417 | 		}
418 | 	}
419 | }
420 | 
421 | // selectChildrenByTag selects into the candidate list all child
422 | // elements of the element having the specified tag.
423 | type selectChildrenByTag struct {
424 | 	space, tag string
425 | }
426 | 
427 | func newSelectChildrenByTag(path string) *selectChildrenByTag {
428 | 	s, l := spaceDecompose(path)
429 | 	return &selectChildrenByTag{s, l}
430 | }
431 | 
432 | func (s *selectChildrenByTag) apply(e *Element, p *pather) {
433 | 	for _, c := range e.Child {
434 | 		if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag {
435 | 			p.candidates = append(p.candidates, c)
436 | 		}
437 | 	}
438 | }
439 | 
440 | // filterPos filters the candidate list, keeping only the
441 | // candidate at the specified index.
442 | type filterPos struct {
443 | 	index int
444 | }
445 | 
446 | func newFilterPos(pos int) *filterPos {
447 | 	return &filterPos{pos}
448 | }
449 | 
450 | func (f *filterPos) apply(p *pather) {
451 | 	if f.index >= 0 {
452 | 		if f.index < len(p.candidates) {
453 | 			p.scratch = append(p.scratch, p.candidates[f.index])
454 | 		}
455 | 	} else {
456 | 		if -f.index <= len(p.candidates) {
457 | 			p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index])
458 | 		}
459 | 	}
460 | 	p.candidates, p.scratch = p.scratch, p.candidates[0:0]
461 | }
462 | 
463 | // filterAttr filters the candidate list for elements having
464 | // the specified attribute.
465 | type filterAttr struct {
466 | 	space, key string
467 | }
468 | 
469 | func newFilterAttr(str string) *filterAttr {
470 | 	s, l := spaceDecompose(str)
471 | 	return &filterAttr{s, l}
472 | }
473 | 
474 | func (f *filterAttr) apply(p *pather) {
475 | 	for _, c := range p.candidates {
476 | 		for _, a := range c.Attr {
477 | 			if spaceMatch(f.space, a.Space) && f.key == a.Key {
478 | 				p.scratch = append(p.scratch, c)
479 | 				break
480 | 			}
481 | 		}
482 | 	}
483 | 	p.candidates, p.scratch = p.scratch, p.candidates[0:0]
484 | }
485 | 
486 | // filterAttrVal filters the candidate list for elements having
487 | // the specified attribute with the specified value.
488 | type filterAttrVal struct {
489 | 	space, key, val string
490 | }
491 | 
492 | func newFilterAttrVal(str, value string) *filterAttrVal {
493 | 	s, l := spaceDecompose(str)
494 | 	return &filterAttrVal{s, l, value}
495 | }
496 | 
497 | func (f *filterAttrVal) apply(p *pather) {
498 | 	for _, c := range p.candidates {
499 | 		for _, a := range c.Attr {
500 | 			if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value {
501 | 				p.scratch = append(p.scratch, c)
502 | 				break
503 | 			}
504 | 		}
505 | 	}
506 | 	p.candidates, p.scratch = p.scratch, p.candidates[0:0]
507 | }
508 | 
509 | // filterFunc filters the candidate list for elements satisfying a custom
510 | // boolean function.
511 | type filterFunc struct {
512 | 	fn func(e *Element) string
513 | }
514 | 
515 | func newFilterFunc(fn func(e *Element) string) *filterFunc {
516 | 	return &filterFunc{fn}
517 | }
518 | 
519 | func (f *filterFunc) apply(p *pather) {
520 | 	for _, c := range p.candidates {
521 | 		if f.fn(c) != "" {
522 | 			p.scratch = append(p.scratch, c)
523 | 		}
524 | 	}
525 | 	p.candidates, p.scratch = p.scratch, p.candidates[0:0]
526 | }
527 | 
528 | // filterFuncVal filters the candidate list for elements containing a value
529 | // matching the result of a custom function.
530 | type filterFuncVal struct {
531 | 	fn  func(e *Element) string
532 | 	val string
533 | }
534 | 
535 | func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal {
536 | 	return &filterFuncVal{fn, value}
537 | }
538 | 
539 | func (f *filterFuncVal) apply(p *pather) {
540 | 	for _, c := range p.candidates {
541 | 		if f.fn(c) == f.val {
542 | 			p.scratch = append(p.scratch, c)
543 | 		}
544 | 	}
545 | 	p.candidates, p.scratch = p.scratch, p.candidates[0:0]
546 | }
547 | 
548 | // filterChild filters the candidate list for elements having
549 | // a child element with the specified tag.
550 | type filterChild struct {
551 | 	space, tag string
552 | }
553 | 
554 | func newFilterChild(str string) *filterChild {
555 | 	s, l := spaceDecompose(str)
556 | 	return &filterChild{s, l}
557 | }
558 | 
559 | func (f *filterChild) apply(p *pather) {
560 | 	for _, c := range p.candidates {
561 | 		for _, cc := range c.Child {
562 | 			if cc, ok := cc.(*Element); ok &&
563 | 				spaceMatch(f.space, cc.Space) &&
564 | 				f.tag == cc.Tag {
565 | 				p.scratch = append(p.scratch, c)
566 | 			}
567 | 		}
568 | 	}
569 | 	p.candidates, p.scratch = p.scratch, p.candidates[0:0]
570 | }
571 | 
572 | // filterChildText filters the candidate list for elements having
573 | // a child element with the specified tag and text.
574 | type filterChildText struct {
575 | 	space, tag, text string
576 | }
577 | 
578 | func newFilterChildText(str, text string) *filterChildText {
579 | 	s, l := spaceDecompose(str)
580 | 	return &filterChildText{s, l, text}
581 | }
582 | 
583 | func (f *filterChildText) apply(p *pather) {
584 | 	for _, c := range p.candidates {
585 | 		for _, cc := range c.Child {
586 | 			if cc, ok := cc.(*Element); ok &&
587 | 				spaceMatch(f.space, cc.Space) &&
588 | 				f.tag == cc.Tag &&
589 | 				f.text == cc.Text() {
590 | 				p.scratch = append(p.scratch, c)
591 | 			}
592 | 		}
593 | 	}
594 | 	p.candidates, p.scratch = p.scratch, p.candidates[0:0]
595 | }
596 | 


--------------------------------------------------------------------------------
/path_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015-2019 Brett Vickers.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package etree
  6 | 
  7 | import "testing"
  8 | 
  9 | var testXML = `
 10 | <?xml version="1.0" encoding="UTF-8"?>
 11 | <bookstore xmlns:p="urn:books-com:prices">
 12 | 
 13 | 	<!Directive>
 14 | 
 15 | 	<book category="COOKING">
 16 | 		<title lang="en">Everyday Italian</title>
 17 | 		<author>Giada De Laurentiis</author>
 18 | 		<year>2005</year>
 19 | 		<p:price>30.00</p:price>
 20 | 		<editor>Clarkson Potter</editor>
 21 | 	</book>
 22 | 
 23 | 	<book category="CHILDREN">
 24 | 		<title lang="en" sku="150">Harry Potter</title>
 25 | 		<author>J K. Rowling</author>
 26 | 		<year>2005</year>
 27 | 		<p:price p:tax="1.99">29.99</p:price>
 28 | 		<editor></editor>
 29 | 		<editor/>
 30 | 	</book>
 31 | 
 32 | 	<book category="WEB">
 33 | 		<title lang="en">XQuery Kick Start</title>
 34 | 		<author>James McGovern</author>
 35 | 		<author>Per Bothner</author>
 36 | 		<author>Kurt Cagle</author>
 37 | 		<author>James Linn</author>
 38 | 		<author>Vaidyanathan Nagarajan</author>
 39 | 		<year>2003</year>
 40 | 		<price>49.99</price>
 41 | 		<editor>
 42 | 		</editor>
 43 | 	</book>
 44 | 
 45 | 	<!-- Final book -->
 46 | 	<book category="WEB" path="/books/xml">
 47 | 		<title lang="en">Learning XML</title>
 48 | 		<author>Erik T. Ray</author>
 49 | 		<year>2003</year>
 50 | 		<p:price>39.95</p:price>
 51 | 	</book>
 52 | 
 53 | </bookstore>
 54 | `
 55 | 
 56 | type test struct {
 57 | 	path   string
 58 | 	result interface{}
 59 | }
 60 | 
 61 | type errorResult string
 62 | 
 63 | var tests = []test{
 64 | 	// basic queries
 65 | 	{"./bookstore/book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
 66 | 	{"./bookstore/book/author", []string{"Giada De Laurentiis", "J K. Rowling", "James McGovern", "Per Bothner", "Kurt Cagle", "James Linn", "Vaidyanathan Nagarajan", "Erik T. Ray"}},
 67 | 	{"./bookstore/book/year", []string{"2005", "2005", "2003", "2003"}},
 68 | 	{"./bookstore/book/p:price", []string{"30.00", "29.99", "39.95"}},
 69 | 	{"./bookstore/book/isbn", nil},
 70 | 
 71 | 	// descendant queries
 72 | 	{"//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
 73 | 	{"//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
 74 | 	{".//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
 75 | 	{".//bookstore//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
 76 | 	{".//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
 77 | 	{".//p:price/.", []string{"30.00", "29.99", "39.95"}},
 78 | 	{".//price", []string{"30.00", "29.99", "49.99", "39.95"}},
 79 | 
 80 | 	// positional queries
 81 | 	{"./bookstore/book[1]/title", "Everyday Italian"},
 82 | 	{"./bookstore/book[4]/title", "Learning XML"},
 83 | 	{"./bookstore/book[5]/title", nil},
 84 | 	{"./bookstore/book[3]/author[0]", "James McGovern"},
 85 | 	{"./bookstore/book[3]/author[1]", "James McGovern"},
 86 | 	{"./bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
 87 | 	{"./bookstore/book[3]/author[6]", nil},
 88 | 	{"./bookstore/book[-1]/title", "Learning XML"},
 89 | 	{"./bookstore/book[-4]/title", "Everyday Italian"},
 90 | 	{"./bookstore/book[-5]/title", nil},
 91 | 
 92 | 	// text function queries
 93 | 	{"./bookstore/book[author='James McGovern']/title", "XQuery Kick Start"},
 94 | 	{"./bookstore/book[author='Per Bothner']/title", "XQuery Kick Start"},
 95 | 	{"./bookstore/book[author='Kurt Cagle']/title", "XQuery Kick Start"},
 96 | 	{"./bookstore/book[author='James Linn']/title", "XQuery Kick Start"},
 97 | 	{"./bookstore/book[author='Vaidyanathan Nagarajan']/title", "XQuery Kick Start"},
 98 | 	{"//book[p:price='29.99']/title", "Harry Potter"},
 99 | 	{"//book[price='29.99']/title", "Harry Potter"},
100 | 	{"//book/price[text()='29.99']", "29.99"},
101 | 	{"//book/author[text()='Kurt Cagle']", "Kurt Cagle"},
102 | 	{"//book/editor[text()]", []string{"Clarkson Potter", "\n\t\t"}},
103 | 
104 | 	// namespace function queries
105 | 	{"//*[namespace-uri()]", []string{"30.00", "29.99", "39.95"}},
106 | 	{"//*[namespace-uri()='urn:books-com:prices']", []string{"30.00", "29.99", "39.95"}},
107 | 	{"//*[namespace-uri()='foo']", nil},
108 | 	{"//*[namespace-prefix()]", []string{"30.00", "29.99", "39.95"}},
109 | 	{"//*[namespace-prefix()='p']", []string{"30.00", "29.99", "39.95"}},
110 | 	{"//*[name()='p:price']", []string{"30.00", "29.99", "39.95"}},
111 | 	{"//*[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
112 | 	{"//price[namespace-uri()='']", []string{"49.99"}},
113 | 	{"//price[namespace-prefix()='']", []string{"49.99"}},
114 | 	{"//price[name()='price']", []string{"49.99"}},
115 | 	{"//price[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
116 | 
117 | 	// attribute queries
118 | 	{"./bookstore/book[@category='WEB']/title", []string{"XQuery Kick Start", "Learning XML"}},
119 | 	{"./bookstore/book[@path='/books/xml']/title", []string{"Learning XML"}},
120 | 	{"./bookstore/book[@category='COOKING']/title[@lang='en']", "Everyday Italian"},
121 | 	{`./bookstore/book[@category="COOKING"]/title[@lang="en"]`, "Everyday Italian"},
122 | 	{"./bookstore/book/title[@lang='en'][@sku='150']", "Harry Potter"},
123 | 	{"./bookstore/book/title[@lang='fr']", nil},
124 | 	{"//p:price[@p:tax='1.99']", []string{"29.99"}},
125 | 	{"//p:price[@tax='1.99']", []string{"29.99"}},
126 | 	{"//p:price[@p:tax]", []string{"29.99"}},
127 | 	{"//p:price[@tax]", []string{"29.99"}},
128 | 
129 | 	// parent queries
130 | 	{"./bookstore/book[@category='COOKING']/title/../../book[4]/title", "Learning XML"},
131 | 
132 | 	// root queries
133 | 	{"/bookstore/book[1]/title", "Everyday Italian"},
134 | 	{"/bookstore/book[4]/title", "Learning XML"},
135 | 	{"/bookstore/book[5]/title", nil},
136 | 	{"/bookstore/book[3]/author[0]", "James McGovern"},
137 | 	{"/bookstore/book[3]/author[1]", "James McGovern"},
138 | 	{"/bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
139 | 	{"/bookstore/book[3]/author[6]", nil},
140 | 	{"/bookstore/book[-1]/title", "Learning XML"},
141 | 	{"/bookstore/book[-4]/title", "Everyday Italian"},
142 | 	{"/bookstore/book[-5]/title", nil},
143 | 
144 | 	// bad paths
145 | 	{"./bookstore/book[]", errorResult("etree: path contains an empty filter expression.")},
146 | 	{"./bookstore/book[@category='WEB'", errorResult("etree: path has invalid filter [brackets].")},
147 | 	{"./bookstore/book[@category='WEB]", errorResult("etree: path has mismatched filter quotes.")},
148 | 	{`./bookstore/book[@category='WEB"]`, errorResult("etree: path has mismatched filter quotes.")},
149 | 	{`./bookstore/book[@category="WEB']`, errorResult("etree: path has mismatched filter quotes.")},
150 | 	{"./bookstore/book[author]a", errorResult("etree: path has invalid filter [brackets].")},
151 | 	{"/][", errorResult("etree: path has invalid filter [brackets].")},
152 | }
153 | 
154 | func TestPath(t *testing.T) {
155 | 	doc := NewDocument()
156 | 	err := doc.ReadFromString(testXML)
157 | 	if err != nil {
158 | 		t.Error(err)
159 | 	}
160 | 
161 | 	for _, test := range tests {
162 | 		path, err := CompilePath(test.path)
163 | 		if err != nil {
164 | 			if r, ok := test.result.(errorResult); !ok || err.Error() != string(r) {
165 | 				fail(t, test)
166 | 			}
167 | 			continue
168 | 		}
169 | 
170 | 		// Test both FindElementsPath and FindElementPath
171 | 		element := doc.FindElementPath(path)
172 | 		elements := doc.FindElementsPath(path)
173 | 
174 | 		switch s := test.result.(type) {
175 | 		case errorResult:
176 | 			fail(t, test)
177 | 		case nil:
178 | 			if element != nil || len(elements) != 0 {
179 | 				fail(t, test)
180 | 			}
181 | 		case string:
182 | 			if element == nil || element.Text() != s ||
183 | 				len(elements) != 1 || elements[0].Text() != s {
184 | 				fail(t, test)
185 | 			}
186 | 		case []string:
187 | 			if element == nil || element.Text() != s[0] || len(elements) != len(s) {
188 | 				fail(t, test)
189 | 				continue
190 | 			}
191 | 			for i := 0; i < len(elements); i++ {
192 | 				if elements[i].Text() != s[i] {
193 | 					fail(t, test)
194 | 					break
195 | 				}
196 | 			}
197 | 		}
198 | 
199 | 	}
200 | }
201 | 
202 | func fail(t *testing.T, test test) {
203 | 	t.Helper()
204 | 	t.Errorf("etree: failed test '%s'\n", test.path)
205 | }
206 | 
207 | func TestAbsolutePath(t *testing.T) {
208 | 	doc := NewDocument()
209 | 	err := doc.ReadFromString(testXML)
210 | 	if err != nil {
211 | 		t.Error(err)
212 | 	}
213 | 
214 | 	elements := doc.FindElements("//book/author")
215 | 	for _, e := range elements {
216 | 		title := e.FindElement("/bookstore/book[1]/title")
217 | 		if title == nil || title.Text() != "Everyday Italian" {
218 | 			t.Errorf("etree: absolute path test failed")
219 | 		}
220 | 
221 | 		title = e.FindElement("//book[p:price='29.99']/title")
222 | 		if title == nil || title.Text() != "Harry Potter" {
223 | 			t.Errorf("etree: absolute path test failed")
224 | 		}
225 | 	}
226 | }
227 | 


--------------------------------------------------------------------------------