├── .github └── workflows │ └── go.yml ├── CONTRIBUTORS ├── LICENSE ├── README.md ├── RELEASE_NOTES.md ├── etree.go ├── etree_test.go ├── example_test.go ├── go.mod ├── helpers.go ├── path.go └── path_test.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | 10 | analyze: 11 | name: Analyze 12 | runs-on: ubuntu-latest 13 | 14 | permissions: 15 | actions: read 16 | contents: read 17 | security-events: write 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | language: ["go"] 23 | 24 | steps: 25 | - name: Checkout repository 26 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 27 | 28 | - name: Initialize CodeQL 29 | uses: github/codeql-action/init@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12 30 | with: 31 | languages: ${{ matrix.language }} 32 | 33 | - name: Autobuild 34 | uses: github/codeql-action/autobuild@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12 35 | 36 | - name: Perform CodeQL Analysis 37 | uses: github/codeql-action/analyze@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12 38 | with: 39 | category: "/language:${{matrix.language}}" 40 | 41 | build: 42 | name: Build 43 | runs-on: ubuntu-latest 44 | 45 | strategy: 46 | matrix: 47 | go-version: [ '1.21', '1.22.x' ] 48 | 49 | steps: 50 | - name: Checkout repository 51 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 52 | 53 | - name: Setup Go ${{ matrix.go-version }} 54 | uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 55 | with: 56 | go-version: ${{ matrix.go-version }} 57 | 58 | - name: Build 59 | run: go build -v ./... 60 | 61 | - name: Test 62 | run: go test -v ./... 63 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Brett Vickers (beevik) 2 | Felix Geisendörfer (felixge) 3 | Kamil Kisiel (kisielk) 4 | Graham King (grahamking) 5 | Matt Smith (ma314smith) 6 | Michal Jemala (michaljemala) 7 | Nicolas Piganeau (npiganeau) 8 | Chris Brown (ccbrown) 9 | Earncef Sequeira (earncef) 10 | Gabriel de Labachelerie (wuzuf) 11 | Martin Dosch (mdosch) 12 | Hugo Wetterberg (hugowetterberg) 13 | Tobias Theel (nerzal) 14 | Daniel Potapov (dpotapov) 15 | Mikhail Ferapontow (MikhailFerapontow) 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2015-2024 Brett Vickers. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY 15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![GoDoc](https://godoc.org/github.com/beevik/etree?status.svg)](https://godoc.org/github.com/beevik/etree) 2 | [![Go](https://github.com/beevik/etree/actions/workflows/go.yml/badge.svg)](https://github.com/beevik/etree/actions/workflows/go.yml) 3 | 4 | etree 5 | ===== 6 | 7 | The etree package is a lightweight, pure go package that expresses XML in 8 | the form of an element tree. Its design was inspired by the Python 9 | [ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html) 10 | module. 11 | 12 | Some of the package's capabilities and features: 13 | 14 | * Represents XML documents as trees of elements for easy traversal. 15 | * Imports, serializes, modifies or creates XML documents from scratch. 16 | * Writes and reads XML to/from files, byte slices, strings and io interfaces. 17 | * Performs simple or complex searches with lightweight XPath-like query APIs. 18 | * Auto-indents XML using spaces or tabs for better readability. 19 | * Implemented in pure go; depends only on standard go libraries. 20 | * Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml) 21 | package. 22 | 23 | ### Creating an XML document 24 | 25 | The following example creates an XML document from scratch using the etree 26 | package and outputs its indented contents to stdout. 27 | ```go 28 | doc := etree.NewDocument() 29 | doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`) 30 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) 31 | 32 | people := doc.CreateElement("People") 33 | people.CreateComment("These are all known people") 34 | 35 | jon := people.CreateElement("Person") 36 | jon.CreateAttr("name", "Jon") 37 | 38 | sally := people.CreateElement("Person") 39 | sally.CreateAttr("name", "Sally") 40 | 41 | doc.Indent(2) 42 | doc.WriteTo(os.Stdout) 43 | ``` 44 | 45 | Output: 46 | ```xml 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | ``` 55 | 56 | ### Reading an XML file 57 | 58 | Suppose you have a file on disk called `bookstore.xml` containing the 59 | following data: 60 | 61 | ```xml 62 | 63 | 64 | 65 | Everyday Italian 66 | Giada De Laurentiis 67 | 2005 68 | 30.00 69 | 70 | 71 | 72 | Harry Potter 73 | J K. Rowling 74 | 2005 75 | 29.99 76 | 77 | 78 | 79 | XQuery Kick Start 80 | James McGovern 81 | Per Bothner 82 | Kurt Cagle 83 | James Linn 84 | Vaidyanathan Nagarajan 85 | 2003 86 | 49.99 87 | 88 | 89 | 90 | Learning XML 91 | Erik T. Ray 92 | 2003 93 | 39.95 94 | 95 | 96 | 97 | ``` 98 | 99 | This code reads the file's contents into an etree document. 100 | ```go 101 | doc := etree.NewDocument() 102 | if err := doc.ReadFromFile("bookstore.xml"); err != nil { 103 | panic(err) 104 | } 105 | ``` 106 | 107 | You can also read XML from a string, a byte slice, or an `io.Reader`. 108 | 109 | ### Processing elements and attributes 110 | 111 | This example illustrates several ways to access elements and attributes using 112 | etree selection queries. 113 | ```go 114 | root := doc.SelectElement("bookstore") 115 | fmt.Println("ROOT element:", root.Tag) 116 | 117 | for _, book := range root.SelectElements("book") { 118 | fmt.Println("CHILD element:", book.Tag) 119 | if title := book.SelectElement("title"); title != nil { 120 | lang := title.SelectAttrValue("lang", "unknown") 121 | fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang) 122 | } 123 | for _, attr := range book.Attr { 124 | fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value) 125 | } 126 | } 127 | ``` 128 | Output: 129 | ``` 130 | ROOT element: bookstore 131 | CHILD element: book 132 | TITLE: Everyday Italian (en) 133 | ATTR: category=COOKING 134 | CHILD element: book 135 | TITLE: Harry Potter (en) 136 | ATTR: category=CHILDREN 137 | CHILD element: book 138 | TITLE: XQuery Kick Start (en) 139 | ATTR: category=WEB 140 | CHILD element: book 141 | TITLE: Learning XML (en) 142 | ATTR: category=WEB 143 | ``` 144 | 145 | ### Path queries 146 | 147 | This example uses etree's path functions to select all book titles that fall 148 | into the category of 'WEB'. The double-slash prefix in the path causes the 149 | search for book elements to occur recursively; book elements may appear at any 150 | level of the XML hierarchy. 151 | ```go 152 | for _, t := range doc.FindElements("//book[@category='WEB']/title") { 153 | fmt.Println("Title:", t.Text()) 154 | } 155 | ``` 156 | 157 | Output: 158 | ``` 159 | Title: XQuery Kick Start 160 | Title: Learning XML 161 | ``` 162 | 163 | This example finds the first book element under the root bookstore element and 164 | outputs the tag and text of each of its child elements. 165 | ```go 166 | for _, e := range doc.FindElements("./bookstore/book[1]/*") { 167 | fmt.Printf("%s: %s\n", e.Tag, e.Text()) 168 | } 169 | ``` 170 | 171 | Output: 172 | ``` 173 | title: Everyday Italian 174 | author: Giada De Laurentiis 175 | year: 2005 176 | price: 30.00 177 | ``` 178 | 179 | This example finds all books with a price of 49.99 and outputs their titles. 180 | ```go 181 | path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title") 182 | for _, e := range doc.FindElementsPath(path) { 183 | fmt.Println(e.Text()) 184 | } 185 | ``` 186 | 187 | Output: 188 | ``` 189 | XQuery Kick Start 190 | ``` 191 | 192 | Note that this example uses the FindElementsPath function, which takes as an 193 | argument a pre-compiled path object. Use precompiled paths when you plan to 194 | search with the same path more than once. 195 | 196 | ### Other features 197 | 198 | These are just a few examples of the things the etree package can do. See the 199 | [documentation](http://godoc.org/github.com/beevik/etree) for a complete 200 | description of its capabilities. 201 | 202 | ### Contributing 203 | 204 | This project accepts contributions. Just fork the repo and submit a pull 205 | request! 206 | -------------------------------------------------------------------------------- /RELEASE_NOTES.md: -------------------------------------------------------------------------------- 1 | Release 1.5.1 2 | ============= 3 | 4 | **Fixes** 5 | 6 | * Fixed a bug in `InsertChildAt`. 7 | 8 | Release 1.5.0 9 | ============= 10 | 11 | **Changes** 12 | 13 | * Added `Element` function `CreateChild`, which calls a continuation function 14 | after creating and adding a child element. 15 | 16 | **Fixes** 17 | 18 | * Removed a potential conflict between two `ReadSettings` values. When 19 | `AttrSingleQuote` is true, `CanonicalAttrVal` is forced to be false. 20 | 21 | Release 1.4.1 22 | ============= 23 | 24 | **Changes** 25 | 26 | * Minimal go version updated to 1.21. 27 | * Default-initialized CharsetReader causes same result as NewDocument(). 28 | * When reading an XML document, attributes are parsed more efficiently. 29 | 30 | Release v1.4.0 31 | ============== 32 | 33 | **New Features** 34 | 35 | * Add `AutoClose` option to `ReadSettings`. 36 | * Add `ValidateInput` to `ReadSettings`. 37 | * Add `NotNil` function to `Element`. 38 | * Add `NextSibling` and `PrevSibling` functions to `Element`. 39 | 40 | Release v1.3.0 41 | ============== 42 | 43 | **New Features** 44 | 45 | * Add support for double-quotes in filter path queries. 46 | * Add `PreserveDuplicateAttrs` to `ReadSettings`. 47 | * Add `ReindexChildren` to `Element`. 48 | 49 | Release v1.2.0 50 | ============== 51 | 52 | **New Features** 53 | 54 | * Add the ability to write XML fragments using Token WriteTo functions. 55 | * Add the ability to re-indent an XML element as though it were the root of 56 | the document. 57 | * Add a ReadSettings option to preserve CDATA blocks when reading and XML 58 | document. 59 | 60 | Release v1.1.4 61 | ============== 62 | 63 | **New Features** 64 | 65 | * Add the ability to preserve whitespace in leaf elements during indent. 66 | * Add the ability to suppress a document-trailing newline during indent. 67 | * Add choice of XML attribute quoting style (single-quote or double-quote). 68 | 69 | **Removed Features** 70 | 71 | * Removed the CDATA preservation change introduced in v1.1.3. It was 72 | implemented in a way that broke the ability to process XML documents 73 | encoded using non-UTF8 character sets. 74 | 75 | Release v1.1.3 76 | ============== 77 | 78 | * XML reads now preserve CDATA sections instead of converting them to 79 | standard character data. 80 | 81 | Release v1.1.2 82 | ============== 83 | 84 | * Fixed a path parsing bug. 85 | * The `Element.Text` function now handles comments embedded between 86 | character data spans. 87 | 88 | Release v1.1.1 89 | ============== 90 | 91 | * Updated go version in `go.mod` to 1.20 92 | 93 | Release v1.1.0 94 | ============== 95 | 96 | **New Features** 97 | 98 | * New attribute helpers. 99 | * Added the `Element.SortAttrs` method, which lexicographically sorts an 100 | element's attributes by key. 101 | * New `ReadSettings` properties. 102 | * Added `Entity` for the support of custom entity maps. 103 | * New `WriteSettings` properties. 104 | * Added `UseCRLF` to allow the output of CR-LF newlines instead of the 105 | default LF newlines. This is useful on Windows systems. 106 | * Additional support for text and CDATA sections. 107 | * The `Element.Text` method now returns the concatenation of all consecutive 108 | character data tokens immediately following an element's opening tag. 109 | * Added `Element.SetCData` to replace the character data immediately 110 | following an element's opening tag with a CDATA section. 111 | * Added `Element.CreateCData` to create and add a CDATA section child 112 | `CharData` token to an element. 113 | * Added `Element.CreateText` to create and add a child text `CharData` token 114 | to an element. 115 | * Added `NewCData` to create a parentless CDATA section `CharData` token. 116 | * Added `NewText` to create a parentless text `CharData` 117 | token. 118 | * Added `CharData.IsCData` to detect if the token contains a CDATA section. 119 | * Added `CharData.IsWhitespace` to detect if the token contains whitespace 120 | inserted by one of the document Indent functions. 121 | * Modified `Element.SetText` so that it replaces a run of consecutive 122 | character data tokens following the element's opening tag (instead of just 123 | the first one). 124 | * New "tail text" support. 125 | * Added the `Element.Tail` method, which returns the text immediately 126 | following an element's closing tag. 127 | * Added the `Element.SetTail` method, which modifies the text immediately 128 | following an element's closing tag. 129 | * New element child insertion and removal methods. 130 | * Added the `Element.InsertChildAt` method, which inserts a new child token 131 | before the specified child token index. 132 | * Added the `Element.RemoveChildAt` method, which removes the child token at 133 | the specified child token index. 134 | * New element and attribute queries. 135 | * Added the `Element.Index` method, which returns the element's index within 136 | its parent element's child token list. 137 | * Added the `Element.NamespaceURI` method to return the namespace URI 138 | associated with an element. 139 | * Added the `Attr.NamespaceURI` method to return the namespace URI 140 | associated with an element. 141 | * Added the `Attr.Element` method to return the element that an attribute 142 | belongs to. 143 | * New Path filter functions. 144 | * Added `[local-name()='val']` to keep elements whose unprefixed tag matches 145 | the desired value. 146 | * Added `[name()='val']` to keep elements whose full tag matches the desired 147 | value. 148 | * Added `[namespace-prefix()='val']` to keep elements whose namespace prefix 149 | matches the desired value. 150 | * Added `[namespace-uri()='val']` to keep elements whose namespace URI 151 | matches the desired value. 152 | 153 | **Bug Fixes** 154 | 155 | * A default XML `CharSetReader` is now used to prevent failed parsing of XML 156 | documents using certain encodings. 157 | ([Issue](https://github.com/beevik/etree/issues/53)). 158 | * All characters are now properly escaped according to XML parsing rules. 159 | ([Issue](https://github.com/beevik/etree/issues/55)). 160 | * The `Document.Indent` and `Document.IndentTabs` functions no longer insert 161 | empty string `CharData` tokens. 162 | 163 | **Deprecated** 164 | 165 | * `Element` 166 | * The `InsertChild` method is deprecated. Use `InsertChildAt` instead. 167 | * The `CreateCharData` method is deprecated. Use `CreateText` instead. 168 | * `CharData` 169 | * The `NewCharData` method is deprecated. Use `NewText` instead. 170 | 171 | 172 | Release v1.0.1 173 | ============== 174 | 175 | **Changes** 176 | 177 | * Added support for absolute etree Path queries. An absolute path begins with 178 | `/` or `//` and begins its search from the element's document root. 179 | * Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath) 180 | and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath) 181 | functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element) 182 | type. 183 | 184 | **Breaking changes** 185 | 186 | * A path starting with `//` is now interpreted as an absolute path. 187 | Previously, it was interpreted as a relative path starting from the element 188 | whose 189 | [`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement) 190 | method was called. To remain compatible with this release, all paths 191 | prefixed with `//` should be prefixed with `.//` when called from any 192 | element other than the document's root. 193 | * [**edit 2/1/2019**]: Minor releases should not contain breaking changes. 194 | Even though this breaking change was very minor, it was a mistake to include 195 | it in this minor release. In the future, all breaking changes will be 196 | limited to major releases (e.g., version 2.0.0). 197 | 198 | Release v1.0.0 199 | ============== 200 | 201 | Initial release. 202 | -------------------------------------------------------------------------------- /etree.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Brett Vickers. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package etree provides XML services through an Element Tree 6 | // abstraction. 7 | package etree 8 | 9 | import ( 10 | "bufio" 11 | "bytes" 12 | "encoding/xml" 13 | "errors" 14 | "io" 15 | "os" 16 | "slices" 17 | "strings" 18 | ) 19 | 20 | const ( 21 | // NoIndent is used with the IndentSettings record to remove all 22 | // indenting. 23 | NoIndent = -1 24 | ) 25 | 26 | // ErrXML is returned when XML parsing fails due to incorrect formatting. 27 | var ErrXML = errors.New("etree: invalid XML format") 28 | 29 | // cdataPrefix is used to detect CDATA text when ReadSettings.PreserveCData is 30 | // true. 31 | var cdataPrefix = []byte(". If false, XML character references 111 | // are also produced for " and '. Default: false. 112 | CanonicalText bool 113 | 114 | // CanonicalAttrVal forces the production of XML character references for 115 | // attribute value characters &, < and ". If false, XML character 116 | // references are also produced for > and '. Ignored when AttrSingleQuote 117 | // is true. Default: false. 118 | CanonicalAttrVal bool 119 | 120 | // AttrSingleQuote causes attributes to use single quotes (attr='example') 121 | // instead of double quotes (attr = "example") when set to true. Default: 122 | // false. 123 | AttrSingleQuote bool 124 | 125 | // UseCRLF causes the document's Indent* functions to use a carriage return 126 | // followed by a linefeed ("\r\n") when outputting a newline. If false, 127 | // only a linefeed is used ("\n"). Default: false. 128 | // 129 | // Deprecated: UseCRLF is deprecated. Use IndentSettings.UseCRLF instead. 130 | UseCRLF bool 131 | } 132 | 133 | // dup creates a duplicate of the WriteSettings object. 134 | func (s *WriteSettings) dup() WriteSettings { 135 | return *s 136 | } 137 | 138 | // IndentSettings determine the behavior of the Document's Indent* functions. 139 | type IndentSettings struct { 140 | // Spaces indicates the number of spaces to insert for each level of 141 | // indentation. Set to etree.NoIndent to remove all indentation. Ignored 142 | // when UseTabs is true. Default: 4. 143 | Spaces int 144 | 145 | // UseTabs causes tabs to be used instead of spaces when indenting. 146 | // Default: false. 147 | UseTabs bool 148 | 149 | // UseCRLF causes newlines to be written as a carriage return followed by 150 | // a linefeed ("\r\n"). If false, only a linefeed character is output 151 | // for a newline ("\n"). Default: false. 152 | UseCRLF bool 153 | 154 | // PreserveLeafWhitespace causes indent functions to preserve whitespace 155 | // within XML elements containing only non-CDATA character data. Default: 156 | // false. 157 | PreserveLeafWhitespace bool 158 | 159 | // SuppressTrailingWhitespace suppresses the generation of a trailing 160 | // whitespace characters (such as newlines) at the end of the indented 161 | // document. Default: false. 162 | SuppressTrailingWhitespace bool 163 | } 164 | 165 | // NewIndentSettings creates a default IndentSettings record. 166 | func NewIndentSettings() *IndentSettings { 167 | return &IndentSettings{ 168 | Spaces: 4, 169 | UseTabs: false, 170 | UseCRLF: false, 171 | PreserveLeafWhitespace: false, 172 | SuppressTrailingWhitespace: false, 173 | } 174 | } 175 | 176 | type indentFunc func(depth int) string 177 | 178 | func getIndentFunc(s *IndentSettings) indentFunc { 179 | if s.UseTabs { 180 | if s.UseCRLF { 181 | return func(depth int) string { return indentCRLF(depth, indentTabs) } 182 | } else { 183 | return func(depth int) string { return indentLF(depth, indentTabs) } 184 | } 185 | } else { 186 | if s.Spaces < 0 { 187 | return func(depth int) string { return "" } 188 | } else if s.UseCRLF { 189 | return func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) } 190 | } else { 191 | return func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) } 192 | } 193 | } 194 | } 195 | 196 | // Writer is the interface that wraps the Write* functions called by each token 197 | // type's WriteTo function. 198 | type Writer interface { 199 | io.StringWriter 200 | io.ByteWriter 201 | io.Writer 202 | } 203 | 204 | // A Token is an interface type used to represent XML elements, character 205 | // data, CDATA sections, XML comments, XML directives, and XML processing 206 | // instructions. 207 | type Token interface { 208 | Parent() *Element 209 | Index() int 210 | WriteTo(w Writer, s *WriteSettings) 211 | dup(parent *Element) Token 212 | setParent(parent *Element) 213 | setIndex(index int) 214 | } 215 | 216 | // A Document is a container holding a complete XML tree. 217 | // 218 | // A document has a single embedded element, which contains zero or more child 219 | // tokens, one of which is usually the root element. The embedded element may 220 | // include other children such as processing instruction tokens or character 221 | // data tokens. The document's embedded element is never directly serialized; 222 | // only its children are. 223 | // 224 | // A document also contains read and write settings, which influence the way 225 | // the document is deserialized, serialized, and indented. 226 | type Document struct { 227 | Element 228 | ReadSettings ReadSettings 229 | WriteSettings WriteSettings 230 | } 231 | 232 | // An Element represents an XML element, its attributes, and its child tokens. 233 | type Element struct { 234 | Space, Tag string // namespace prefix and tag 235 | Attr []Attr // key-value attribute pairs 236 | Child []Token // child tokens (elements, comments, etc.) 237 | parent *Element // parent element 238 | index int // token index in parent's children 239 | } 240 | 241 | // An Attr represents a key-value attribute within an XML element. 242 | type Attr struct { 243 | Space, Key string // The attribute's namespace prefix and key 244 | Value string // The attribute value string 245 | element *Element // element containing the attribute 246 | } 247 | 248 | // charDataFlags are used with CharData tokens to store additional settings. 249 | type charDataFlags uint8 250 | 251 | const ( 252 | // The CharData contains only whitespace. 253 | whitespaceFlag charDataFlags = 1 << iota 254 | 255 | // The CharData contains a CDATA section. 256 | cdataFlag 257 | ) 258 | 259 | // CharData may be used to represent simple text data or a CDATA section 260 | // within an XML document. The Data property should never be modified 261 | // directly; use the SetData function instead. 262 | type CharData struct { 263 | Data string // the simple text or CDATA section content 264 | parent *Element 265 | index int 266 | flags charDataFlags 267 | } 268 | 269 | // A Comment represents an XML comment. 270 | type Comment struct { 271 | Data string // the comment's text 272 | parent *Element 273 | index int 274 | } 275 | 276 | // A Directive represents an XML directive. 277 | type Directive struct { 278 | Data string // the directive string 279 | parent *Element 280 | index int 281 | } 282 | 283 | // A ProcInst represents an XML processing instruction. 284 | type ProcInst struct { 285 | Target string // the processing instruction target 286 | Inst string // the processing instruction value 287 | parent *Element 288 | index int 289 | } 290 | 291 | // NewDocument creates an XML document without a root element. 292 | func NewDocument() *Document { 293 | return &Document{ 294 | Element: Element{Child: make([]Token, 0)}, 295 | } 296 | } 297 | 298 | // NewDocumentWithRoot creates an XML document and sets the element 'e' as its 299 | // root element. If the element 'e' is already part of another document, it is 300 | // first removed from its existing document. 301 | func NewDocumentWithRoot(e *Element) *Document { 302 | d := NewDocument() 303 | d.SetRoot(e) 304 | return d 305 | } 306 | 307 | // Copy returns a recursive, deep copy of the document. 308 | func (d *Document) Copy() *Document { 309 | return &Document{ 310 | Element: *(d.Element.dup(nil).(*Element)), 311 | ReadSettings: d.ReadSettings.dup(), 312 | WriteSettings: d.WriteSettings.dup(), 313 | } 314 | } 315 | 316 | // Root returns the root element of the document. It returns nil if there is 317 | // no root element. 318 | func (d *Document) Root() *Element { 319 | for _, t := range d.Child { 320 | if c, ok := t.(*Element); ok { 321 | return c 322 | } 323 | } 324 | return nil 325 | } 326 | 327 | // SetRoot replaces the document's root element with the element 'e'. If the 328 | // document already has a root element when this function is called, then the 329 | // existing root element is unbound from the document. If the element 'e' is 330 | // part of another document, then it is unbound from the other document. 331 | func (d *Document) SetRoot(e *Element) { 332 | if e.parent != nil { 333 | e.parent.RemoveChild(e) 334 | } 335 | 336 | // If there is already a root element, replace it. 337 | p := &d.Element 338 | for i, t := range p.Child { 339 | if _, ok := t.(*Element); ok { 340 | t.setParent(nil) 341 | t.setIndex(-1) 342 | p.Child[i] = e 343 | e.setParent(p) 344 | e.setIndex(i) 345 | return 346 | } 347 | } 348 | 349 | // No existing root element, so add it. 350 | p.addChild(e) 351 | } 352 | 353 | // ReadFrom reads XML from the reader 'r' into this document. The function 354 | // returns the number of bytes read and any error encountered. 355 | func (d *Document) ReadFrom(r io.Reader) (n int64, err error) { 356 | if d.ReadSettings.ValidateInput { 357 | b, err := io.ReadAll(r) 358 | if err != nil { 359 | return 0, err 360 | } 361 | if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil { 362 | return 0, err 363 | } 364 | r = bytes.NewReader(b) 365 | } 366 | return d.Element.readFrom(r, d.ReadSettings) 367 | } 368 | 369 | // ReadFromFile reads XML from a local file at path 'filepath' into this 370 | // document. 371 | func (d *Document) ReadFromFile(filepath string) error { 372 | f, err := os.Open(filepath) 373 | if err != nil { 374 | return err 375 | } 376 | defer f.Close() 377 | 378 | _, err = d.ReadFrom(f) 379 | return err 380 | } 381 | 382 | // ReadFromBytes reads XML from the byte slice 'b' into the this document. 383 | func (d *Document) ReadFromBytes(b []byte) error { 384 | if d.ReadSettings.ValidateInput { 385 | if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil { 386 | return err 387 | } 388 | } 389 | _, err := d.Element.readFrom(bytes.NewReader(b), d.ReadSettings) 390 | return err 391 | } 392 | 393 | // ReadFromString reads XML from the string 's' into this document. 394 | func (d *Document) ReadFromString(s string) error { 395 | if d.ReadSettings.ValidateInput { 396 | if err := validateXML(strings.NewReader(s), d.ReadSettings); err != nil { 397 | return err 398 | } 399 | } 400 | _, err := d.Element.readFrom(strings.NewReader(s), d.ReadSettings) 401 | return err 402 | } 403 | 404 | // validateXML determines if the data read from the reader 'r' contains 405 | // well-formed XML according to the rules set by the go xml package. 406 | func validateXML(r io.Reader, settings ReadSettings) error { 407 | dec := newDecoder(r, settings) 408 | err := dec.Decode(new(interface{})) 409 | if err != nil { 410 | return err 411 | } 412 | 413 | // If there are any trailing tokens after unmarshalling with Decode(), 414 | // then the XML input didn't terminate properly. 415 | _, err = dec.Token() 416 | if err == io.EOF { 417 | return nil 418 | } 419 | return ErrXML 420 | } 421 | 422 | // newDecoder creates an XML decoder for the reader 'r' configured using 423 | // the provided read settings. 424 | func newDecoder(r io.Reader, settings ReadSettings) *xml.Decoder { 425 | d := xml.NewDecoder(r) 426 | d.CharsetReader = settings.CharsetReader 427 | if d.CharsetReader == nil { 428 | d.CharsetReader = defaultCharsetReader 429 | } 430 | d.Strict = !settings.Permissive 431 | d.Entity = settings.Entity 432 | d.AutoClose = settings.AutoClose 433 | return d 434 | } 435 | 436 | // WriteTo serializes the document out to the writer 'w'. The function returns 437 | // the number of bytes written and any error encountered. 438 | func (d *Document) WriteTo(w io.Writer) (n int64, err error) { 439 | xw := newXmlWriter(w) 440 | b := bufio.NewWriter(xw) 441 | for _, c := range d.Child { 442 | c.WriteTo(b, &d.WriteSettings) 443 | } 444 | err, n = b.Flush(), xw.bytes 445 | return 446 | } 447 | 448 | // WriteToFile serializes the document out to the file at path 'filepath'. 449 | func (d *Document) WriteToFile(filepath string) error { 450 | f, err := os.Create(filepath) 451 | if err != nil { 452 | return err 453 | } 454 | defer f.Close() 455 | _, err = d.WriteTo(f) 456 | return err 457 | } 458 | 459 | // WriteToBytes serializes this document into a slice of bytes. 460 | func (d *Document) WriteToBytes() (b []byte, err error) { 461 | var buf bytes.Buffer 462 | if _, err = d.WriteTo(&buf); err != nil { 463 | return 464 | } 465 | return buf.Bytes(), nil 466 | } 467 | 468 | // WriteToString serializes this document into a string. 469 | func (d *Document) WriteToString() (s string, err error) { 470 | var b []byte 471 | if b, err = d.WriteToBytes(); err != nil { 472 | return 473 | } 474 | return string(b), nil 475 | } 476 | 477 | // Indent modifies the document's element tree by inserting character data 478 | // tokens containing newlines and spaces for indentation. The amount of 479 | // indentation per depth level is given by the 'spaces' parameter. Other than 480 | // the number of spaces, default IndentSettings are used. 481 | func (d *Document) Indent(spaces int) { 482 | s := NewIndentSettings() 483 | s.Spaces = spaces 484 | d.IndentWithSettings(s) 485 | } 486 | 487 | // IndentTabs modifies the document's element tree by inserting CharData 488 | // tokens containing newlines and tabs for indentation. One tab is used per 489 | // indentation level. Other than the use of tabs, default IndentSettings 490 | // are used. 491 | func (d *Document) IndentTabs() { 492 | s := NewIndentSettings() 493 | s.UseTabs = true 494 | d.IndentWithSettings(s) 495 | } 496 | 497 | // IndentWithSettings modifies the document's element tree by inserting 498 | // character data tokens containing newlines and indentation. The behavior 499 | // of the indentation algorithm is configured by the indent settings. 500 | func (d *Document) IndentWithSettings(s *IndentSettings) { 501 | // WriteSettings.UseCRLF is deprecated. Until removed from the package, it 502 | // overrides IndentSettings.UseCRLF when true. 503 | if d.WriteSettings.UseCRLF { 504 | s.UseCRLF = true 505 | } 506 | 507 | d.Element.indent(0, getIndentFunc(s), s) 508 | 509 | if s.SuppressTrailingWhitespace { 510 | d.Element.stripTrailingWhitespace() 511 | } 512 | } 513 | 514 | // Unindent modifies the document's element tree by removing character data 515 | // tokens containing only whitespace. Other than the removal of indentation, 516 | // default IndentSettings are used. 517 | func (d *Document) Unindent() { 518 | s := NewIndentSettings() 519 | s.Spaces = NoIndent 520 | d.IndentWithSettings(s) 521 | } 522 | 523 | // NewElement creates an unparented element with the specified tag (i.e., 524 | // name). The tag may include a namespace prefix followed by a colon. 525 | func NewElement(tag string) *Element { 526 | space, stag := spaceDecompose(tag) 527 | return newElement(space, stag, nil) 528 | } 529 | 530 | // newElement is a helper function that creates an element and binds it to 531 | // a parent element if possible. 532 | func newElement(space, tag string, parent *Element) *Element { 533 | e := &Element{ 534 | Space: space, 535 | Tag: tag, 536 | Attr: make([]Attr, 0), 537 | Child: make([]Token, 0), 538 | parent: parent, 539 | index: -1, 540 | } 541 | if parent != nil { 542 | parent.addChild(e) 543 | } 544 | return e 545 | } 546 | 547 | // Copy creates a recursive, deep copy of the element and all its attributes 548 | // and children. The returned element has no parent but can be parented to a 549 | // another element using AddChild, or added to a document with SetRoot or 550 | // NewDocumentWithRoot. 551 | func (e *Element) Copy() *Element { 552 | return e.dup(nil).(*Element) 553 | } 554 | 555 | // FullTag returns the element e's complete tag, including namespace prefix if 556 | // present. 557 | func (e *Element) FullTag() string { 558 | if e.Space == "" { 559 | return e.Tag 560 | } 561 | return e.Space + ":" + e.Tag 562 | } 563 | 564 | // NamespaceURI returns the XML namespace URI associated with the element. If 565 | // the element is part of the XML default namespace, NamespaceURI returns the 566 | // empty string. 567 | func (e *Element) NamespaceURI() string { 568 | if e.Space == "" { 569 | return e.findDefaultNamespaceURI() 570 | } 571 | return e.findLocalNamespaceURI(e.Space) 572 | } 573 | 574 | // findLocalNamespaceURI finds the namespace URI corresponding to the 575 | // requested prefix. 576 | func (e *Element) findLocalNamespaceURI(prefix string) string { 577 | for _, a := range e.Attr { 578 | if a.Space == "xmlns" && a.Key == prefix { 579 | return a.Value 580 | } 581 | } 582 | 583 | if e.parent == nil { 584 | return "" 585 | } 586 | 587 | return e.parent.findLocalNamespaceURI(prefix) 588 | } 589 | 590 | // findDefaultNamespaceURI finds the default namespace URI of the element. 591 | func (e *Element) findDefaultNamespaceURI() string { 592 | for _, a := range e.Attr { 593 | if a.Space == "" && a.Key == "xmlns" { 594 | return a.Value 595 | } 596 | } 597 | 598 | if e.parent == nil { 599 | return "" 600 | } 601 | 602 | return e.parent.findDefaultNamespaceURI() 603 | } 604 | 605 | // namespacePrefix returns the namespace prefix associated with the element. 606 | func (e *Element) namespacePrefix() string { 607 | return e.Space 608 | } 609 | 610 | // name returns the tag associated with the element. 611 | func (e *Element) name() string { 612 | return e.Tag 613 | } 614 | 615 | // ReindexChildren recalculates the index values of the element's child 616 | // tokens. This is necessary only if you have manually manipulated the 617 | // element's `Child` array. 618 | func (e *Element) ReindexChildren() { 619 | for i := 0; i < len(e.Child); i++ { 620 | e.Child[i].setIndex(i) 621 | } 622 | } 623 | 624 | // Text returns all character data immediately following the element's opening 625 | // tag. 626 | func (e *Element) Text() string { 627 | if len(e.Child) == 0 { 628 | return "" 629 | } 630 | 631 | text := "" 632 | for _, ch := range e.Child { 633 | if cd, ok := ch.(*CharData); ok { 634 | if text == "" { 635 | text = cd.Data 636 | } else { 637 | text += cd.Data 638 | } 639 | } else if _, ok := ch.(*Comment); ok { 640 | // ignore 641 | } else { 642 | break 643 | } 644 | } 645 | return text 646 | } 647 | 648 | // SetText replaces all character data immediately following an element's 649 | // opening tag with the requested string. 650 | func (e *Element) SetText(text string) { 651 | e.replaceText(0, text, 0) 652 | } 653 | 654 | // SetCData replaces all character data immediately following an element's 655 | // opening tag with a CDATA section. 656 | func (e *Element) SetCData(text string) { 657 | e.replaceText(0, text, cdataFlag) 658 | } 659 | 660 | // Tail returns all character data immediately following the element's end 661 | // tag. 662 | func (e *Element) Tail() string { 663 | if e.Parent() == nil { 664 | return "" 665 | } 666 | 667 | p := e.Parent() 668 | i := e.Index() 669 | 670 | text := "" 671 | for _, ch := range p.Child[i+1:] { 672 | if cd, ok := ch.(*CharData); ok { 673 | if text == "" { 674 | text = cd.Data 675 | } else { 676 | text += cd.Data 677 | } 678 | } else { 679 | break 680 | } 681 | } 682 | return text 683 | } 684 | 685 | // SetTail replaces all character data immediately following the element's end 686 | // tag with the requested string. 687 | func (e *Element) SetTail(text string) { 688 | if e.Parent() == nil { 689 | return 690 | } 691 | 692 | p := e.Parent() 693 | p.replaceText(e.Index()+1, text, 0) 694 | } 695 | 696 | // replaceText is a helper function that replaces a series of chardata tokens 697 | // starting at index i with the requested text. 698 | func (e *Element) replaceText(i int, text string, flags charDataFlags) { 699 | end := e.findTermCharDataIndex(i) 700 | 701 | switch { 702 | case end == i: 703 | if text != "" { 704 | // insert a new chardata token at index i 705 | cd := newCharData(text, flags, nil) 706 | e.InsertChildAt(i, cd) 707 | } 708 | 709 | case end == i+1: 710 | if text == "" { 711 | // remove the chardata token at index i 712 | e.RemoveChildAt(i) 713 | } else { 714 | // replace the first and only character token at index i 715 | cd := e.Child[i].(*CharData) 716 | cd.Data, cd.flags = text, flags 717 | } 718 | 719 | default: 720 | if text == "" { 721 | // remove all chardata tokens starting from index i 722 | copy(e.Child[i:], e.Child[end:]) 723 | removed := end - i 724 | e.Child = e.Child[:len(e.Child)-removed] 725 | for j := i; j < len(e.Child); j++ { 726 | e.Child[j].setIndex(j) 727 | } 728 | } else { 729 | // replace the first chardata token at index i and remove all 730 | // subsequent chardata tokens 731 | cd := e.Child[i].(*CharData) 732 | cd.Data, cd.flags = text, flags 733 | copy(e.Child[i+1:], e.Child[end:]) 734 | removed := end - (i + 1) 735 | e.Child = e.Child[:len(e.Child)-removed] 736 | for j := i + 1; j < len(e.Child); j++ { 737 | e.Child[j].setIndex(j) 738 | } 739 | } 740 | } 741 | } 742 | 743 | // findTermCharDataIndex finds the index of the first child token that isn't 744 | // a CharData token. It starts from the requested start index. 745 | func (e *Element) findTermCharDataIndex(start int) int { 746 | for i := start; i < len(e.Child); i++ { 747 | if _, ok := e.Child[i].(*CharData); !ok { 748 | return i 749 | } 750 | } 751 | return len(e.Child) 752 | } 753 | 754 | // CreateElement creates a new element with the specified tag (i.e., name) and 755 | // adds it as the last child of element 'e'. The tag may include a prefix 756 | // followed by a colon. 757 | func (e *Element) CreateElement(tag string) *Element { 758 | space, stag := spaceDecompose(tag) 759 | return newElement(space, stag, e) 760 | } 761 | 762 | // CreateChild performs the same task as CreateElement but calls a 763 | // continuation function after the child element is created, allowing 764 | // additional actions to be performed on the child element before returning. 765 | // 766 | // This method of element creation is particularly useful when building nested 767 | // XML documents from code. For example: 768 | // 769 | // org := doc.CreateChild("organization", func(e *Element) { 770 | // e.CreateComment("Mary") 771 | // e.CreateChild("person", func(e *Element) { 772 | // e.CreateAttr("name", "Mary") 773 | // e.CreateAttr("age", "30") 774 | // e.CreateAttr("hair", "brown") 775 | // }) 776 | // }) 777 | func (e *Element) CreateChild(tag string, cont func(e *Element)) *Element { 778 | child := e.CreateElement(tag) 779 | cont(child) 780 | return child 781 | } 782 | 783 | // AddChild adds the token 't' as the last child of the element. If token 't' 784 | // was already the child of another element, it is first removed from its 785 | // parent element. 786 | func (e *Element) AddChild(t Token) { 787 | if t.Parent() != nil { 788 | t.Parent().RemoveChild(t) 789 | } 790 | e.addChild(t) 791 | } 792 | 793 | // InsertChild inserts the token 't' into this element's list of children just 794 | // before the element's existing child token 'ex'. If the existing element 795 | // 'ex' does not appear in this element's list of child tokens, then 't' is 796 | // added to the end of this element's list of child tokens. If token 't' is 797 | // already the child of another element, it is first removed from the other 798 | // element's list of child tokens. 799 | // 800 | // Deprecated: InsertChild is deprecated. Use InsertChildAt instead. 801 | func (e *Element) InsertChild(ex Token, t Token) { 802 | if ex == nil || ex.Parent() != e { 803 | e.AddChild(t) 804 | return 805 | } 806 | 807 | if t.Parent() != nil { 808 | t.Parent().RemoveChild(t) 809 | } 810 | 811 | t.setParent(e) 812 | 813 | i := ex.Index() 814 | e.Child = append(e.Child, nil) 815 | copy(e.Child[i+1:], e.Child[i:]) 816 | e.Child[i] = t 817 | 818 | for j := i; j < len(e.Child); j++ { 819 | e.Child[j].setIndex(j) 820 | } 821 | } 822 | 823 | // InsertChildAt inserts the token 't' into this element's list of child 824 | // tokens just before the requested 'index'. If the index is greater than or 825 | // equal to the length of the list of child tokens, then the token 't' is 826 | // added to the end of the list of child tokens. 827 | func (e *Element) InsertChildAt(index int, t Token) { 828 | if index >= len(e.Child) { 829 | e.AddChild(t) 830 | return 831 | } 832 | 833 | if t.Parent() != nil { 834 | if t.Parent() == e && t.Index() < index { 835 | index-- 836 | } 837 | t.Parent().RemoveChild(t) 838 | } 839 | 840 | t.setParent(e) 841 | 842 | e.Child = append(e.Child, nil) 843 | copy(e.Child[index+1:], e.Child[index:]) 844 | e.Child[index] = t 845 | 846 | for j := index; j < len(e.Child); j++ { 847 | e.Child[j].setIndex(j) 848 | } 849 | } 850 | 851 | // RemoveChild attempts to remove the token 't' from this element's list of 852 | // child tokens. If the token 't' was a child of this element, then it is 853 | // removed and returned. Otherwise, nil is returned. 854 | func (e *Element) RemoveChild(t Token) Token { 855 | if t.Parent() != e { 856 | return nil 857 | } 858 | return e.RemoveChildAt(t.Index()) 859 | } 860 | 861 | // RemoveChildAt removes the child token appearing in slot 'index' of this 862 | // element's list of child tokens. The removed child token is then returned. 863 | // If the index is out of bounds, no child is removed and nil is returned. 864 | func (e *Element) RemoveChildAt(index int) Token { 865 | if index >= len(e.Child) { 866 | return nil 867 | } 868 | 869 | t := e.Child[index] 870 | for j := index + 1; j < len(e.Child); j++ { 871 | e.Child[j].setIndex(j - 1) 872 | } 873 | e.Child = append(e.Child[:index], e.Child[index+1:]...) 874 | t.setIndex(-1) 875 | t.setParent(nil) 876 | return t 877 | } 878 | 879 | // autoClose analyzes the stack's top element and the current token to decide 880 | // whether the top element should be closed. 881 | func (e *Element) autoClose(stack *stack[*Element], t xml.Token, tags []string) { 882 | if stack.empty() { 883 | return 884 | } 885 | 886 | top := stack.peek() 887 | 888 | for _, tag := range tags { 889 | if strings.EqualFold(tag, top.FullTag()) { 890 | if e, ok := t.(xml.EndElement); !ok || 891 | !strings.EqualFold(e.Name.Space, top.Space) || 892 | !strings.EqualFold(e.Name.Local, top.Tag) { 893 | stack.pop() 894 | } 895 | break 896 | } 897 | } 898 | } 899 | 900 | // ReadFrom reads XML from the reader 'ri' and stores the result as a new 901 | // child of this element. 902 | func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) { 903 | var r xmlReader 904 | var pr *xmlPeekReader 905 | if settings.PreserveCData { 906 | pr = newXmlPeekReader(ri) 907 | r = pr 908 | } else { 909 | r = newXmlSimpleReader(ri) 910 | } 911 | 912 | attrCheck := make(map[xml.Name]int) 913 | dec := newDecoder(r, settings) 914 | 915 | var stack stack[*Element] 916 | stack.push(e) 917 | for { 918 | if pr != nil { 919 | pr.PeekPrepare(dec.InputOffset(), len(cdataPrefix)) 920 | } 921 | 922 | t, err := dec.RawToken() 923 | 924 | if settings.Permissive && settings.AutoClose != nil { 925 | e.autoClose(&stack, t, settings.AutoClose) 926 | } 927 | 928 | switch { 929 | case err == io.EOF: 930 | if len(stack.data) != 1 { 931 | return r.Bytes(), ErrXML 932 | } 933 | return r.Bytes(), nil 934 | case err != nil: 935 | return r.Bytes(), err 936 | case stack.empty(): 937 | return r.Bytes(), ErrXML 938 | } 939 | 940 | top := stack.peek() 941 | 942 | switch t := t.(type) { 943 | case xml.StartElement: 944 | e := newElement(t.Name.Space, t.Name.Local, top) 945 | if settings.PreserveDuplicateAttrs || len(t.Attr) < 2 { 946 | for _, a := range t.Attr { 947 | e.addAttr(a.Name.Space, a.Name.Local, a.Value) 948 | } 949 | } else { 950 | for _, a := range t.Attr { 951 | if i, contains := attrCheck[a.Name]; contains { 952 | e.Attr[i].Value = a.Value 953 | } else { 954 | attrCheck[a.Name] = e.addAttr(a.Name.Space, a.Name.Local, a.Value) 955 | } 956 | } 957 | clear(attrCheck) 958 | } 959 | stack.push(e) 960 | case xml.EndElement: 961 | if top.Tag != t.Name.Local || top.Space != t.Name.Space { 962 | return r.Bytes(), ErrXML 963 | } 964 | stack.pop() 965 | case xml.CharData: 966 | data := string(t) 967 | var flags charDataFlags 968 | if pr != nil { 969 | peekBuf := pr.PeekFinalize() 970 | if bytes.Equal(peekBuf, cdataPrefix) { 971 | flags = cdataFlag 972 | } else if isWhitespace(data) { 973 | flags = whitespaceFlag 974 | } 975 | } else { 976 | if isWhitespace(data) { 977 | flags = whitespaceFlag 978 | } 979 | } 980 | newCharData(data, flags, top) 981 | case xml.Comment: 982 | newComment(string(t), top) 983 | case xml.Directive: 984 | newDirective(string(t), top) 985 | case xml.ProcInst: 986 | newProcInst(t.Target, string(t.Inst), top) 987 | } 988 | } 989 | } 990 | 991 | // SelectAttr finds an element attribute matching the requested 'key' and, if 992 | // found, returns a pointer to the matching attribute. The function returns 993 | // nil if no matching attribute is found. The key may include a namespace 994 | // prefix followed by a colon. 995 | func (e *Element) SelectAttr(key string) *Attr { 996 | space, skey := spaceDecompose(key) 997 | for i, a := range e.Attr { 998 | if spaceMatch(space, a.Space) && skey == a.Key { 999 | return &e.Attr[i] 1000 | } 1001 | } 1002 | return nil 1003 | } 1004 | 1005 | // SelectAttrValue finds an element attribute matching the requested 'key' and 1006 | // returns its value if found. If no matching attribute is found, the function 1007 | // returns the 'dflt' value instead. The key may include a namespace prefix 1008 | // followed by a colon. 1009 | func (e *Element) SelectAttrValue(key, dflt string) string { 1010 | space, skey := spaceDecompose(key) 1011 | for _, a := range e.Attr { 1012 | if spaceMatch(space, a.Space) && skey == a.Key { 1013 | return a.Value 1014 | } 1015 | } 1016 | return dflt 1017 | } 1018 | 1019 | // ChildElements returns all elements that are children of this element. 1020 | func (e *Element) ChildElements() []*Element { 1021 | var elements []*Element 1022 | for _, t := range e.Child { 1023 | if c, ok := t.(*Element); ok { 1024 | elements = append(elements, c) 1025 | } 1026 | } 1027 | return elements 1028 | } 1029 | 1030 | // SelectElement returns the first child element with the given 'tag' (i.e., 1031 | // name). The function returns nil if no child element matching the tag is 1032 | // found. The tag may include a namespace prefix followed by a colon. 1033 | func (e *Element) SelectElement(tag string) *Element { 1034 | space, stag := spaceDecompose(tag) 1035 | for _, t := range e.Child { 1036 | if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag { 1037 | return c 1038 | } 1039 | } 1040 | return nil 1041 | } 1042 | 1043 | // SelectElements returns a slice of all child elements with the given 'tag' 1044 | // (i.e., name). The tag may include a namespace prefix followed by a colon. 1045 | func (e *Element) SelectElements(tag string) []*Element { 1046 | space, stag := spaceDecompose(tag) 1047 | var elements []*Element 1048 | for _, t := range e.Child { 1049 | if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag { 1050 | elements = append(elements, c) 1051 | } 1052 | } 1053 | return elements 1054 | } 1055 | 1056 | // FindElement returns the first element matched by the XPath-like 'path' 1057 | // string. The function returns nil if no child element is found using the 1058 | // path. It panics if an invalid path string is supplied. 1059 | func (e *Element) FindElement(path string) *Element { 1060 | return e.FindElementPath(MustCompilePath(path)) 1061 | } 1062 | 1063 | // FindElementPath returns the first element matched by the 'path' object. The 1064 | // function returns nil if no element is found using the path. 1065 | func (e *Element) FindElementPath(path Path) *Element { 1066 | p := newPather() 1067 | elements := p.traverse(e, path) 1068 | if len(elements) > 0 { 1069 | return elements[0] 1070 | } 1071 | return nil 1072 | } 1073 | 1074 | // FindElements returns a slice of elements matched by the XPath-like 'path' 1075 | // string. The function returns nil if no child element is found using the 1076 | // path. It panics if an invalid path string is supplied. 1077 | func (e *Element) FindElements(path string) []*Element { 1078 | return e.FindElementsPath(MustCompilePath(path)) 1079 | } 1080 | 1081 | // FindElementsPath returns a slice of elements matched by the 'path' object. 1082 | func (e *Element) FindElementsPath(path Path) []*Element { 1083 | p := newPather() 1084 | return p.traverse(e, path) 1085 | } 1086 | 1087 | // NotNil returns the receiver element if it isn't nil; otherwise, it returns 1088 | // an unparented element with an empty string tag. This function simplifies 1089 | // the task of writing code to ignore not-found results from element queries. 1090 | // For example, instead of writing this: 1091 | // 1092 | // if e := doc.SelectElement("enabled"); e != nil { 1093 | // e.SetText("true") 1094 | // } 1095 | // 1096 | // You could write this: 1097 | // 1098 | // doc.SelectElement("enabled").NotNil().SetText("true") 1099 | func (e *Element) NotNil() *Element { 1100 | if e == nil { 1101 | return NewElement("") 1102 | } 1103 | return e 1104 | } 1105 | 1106 | // GetPath returns the absolute path of the element. The absolute path is the 1107 | // full path from the document's root. 1108 | func (e *Element) GetPath() string { 1109 | path := []string{} 1110 | for seg := e; seg != nil; seg = seg.Parent() { 1111 | if seg.Tag != "" { 1112 | path = append(path, seg.Tag) 1113 | } 1114 | } 1115 | 1116 | // Reverse the path. 1117 | for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { 1118 | path[i], path[j] = path[j], path[i] 1119 | } 1120 | 1121 | return "/" + strings.Join(path, "/") 1122 | } 1123 | 1124 | // GetRelativePath returns the path of this element relative to the 'source' 1125 | // element. If the two elements are not part of the same element tree, then 1126 | // the function returns the empty string. 1127 | func (e *Element) GetRelativePath(source *Element) string { 1128 | var path []*Element 1129 | 1130 | if source == nil { 1131 | return "" 1132 | } 1133 | 1134 | // Build a reverse path from the element toward the root. Stop if the 1135 | // source element is encountered. 1136 | var seg *Element 1137 | for seg = e; seg != nil && seg != source; seg = seg.Parent() { 1138 | path = append(path, seg) 1139 | } 1140 | 1141 | // If we found the source element, reverse the path and compose the 1142 | // string. 1143 | if seg == source { 1144 | if len(path) == 0 { 1145 | return "." 1146 | } 1147 | parts := []string{} 1148 | for i := len(path) - 1; i >= 0; i-- { 1149 | parts = append(parts, path[i].Tag) 1150 | } 1151 | return "./" + strings.Join(parts, "/") 1152 | } 1153 | 1154 | // The source wasn't encountered, so climb from the source element toward 1155 | // the root of the tree until an element in the reversed path is 1156 | // encountered. 1157 | 1158 | findPathIndex := func(e *Element, path []*Element) int { 1159 | for i, ee := range path { 1160 | if e == ee { 1161 | return i 1162 | } 1163 | } 1164 | return -1 1165 | } 1166 | 1167 | climb := 0 1168 | for seg = source; seg != nil; seg = seg.Parent() { 1169 | i := findPathIndex(seg, path) 1170 | if i >= 0 { 1171 | path = path[:i] // truncate at found segment 1172 | break 1173 | } 1174 | climb++ 1175 | } 1176 | 1177 | // No element in the reversed path was encountered, so the two elements 1178 | // must not be part of the same tree. 1179 | if seg == nil { 1180 | return "" 1181 | } 1182 | 1183 | // Reverse the (possibly truncated) path and prepend ".." segments to 1184 | // climb. 1185 | parts := []string{} 1186 | for i := 0; i < climb; i++ { 1187 | parts = append(parts, "..") 1188 | } 1189 | for i := len(path) - 1; i >= 0; i-- { 1190 | parts = append(parts, path[i].Tag) 1191 | } 1192 | return strings.Join(parts, "/") 1193 | } 1194 | 1195 | // IndentWithSettings modifies the element and its child tree by inserting 1196 | // character data tokens containing newlines and indentation. The behavior of 1197 | // the indentation algorithm is configured by the indent settings. Because 1198 | // this function indents the element as if it were at the root of a document, 1199 | // it is most useful when called just before writing the element as an XML 1200 | // fragment using WriteTo. 1201 | func (e *Element) IndentWithSettings(s *IndentSettings) { 1202 | e.indent(1, getIndentFunc(s), s) 1203 | } 1204 | 1205 | // indent recursively inserts proper indentation between an XML element's 1206 | // child tokens. 1207 | func (e *Element) indent(depth int, indent indentFunc, s *IndentSettings) { 1208 | e.stripIndent(s) 1209 | n := len(e.Child) 1210 | if n == 0 { 1211 | return 1212 | } 1213 | 1214 | oldChild := e.Child 1215 | e.Child = make([]Token, 0, n*2+1) 1216 | isCharData, firstNonCharData := false, true 1217 | for _, c := range oldChild { 1218 | // Insert NL+indent before child if it's not character data. 1219 | // Exceptions: when it's the first non-character-data child, or when 1220 | // the child is at root depth. 1221 | _, isCharData = c.(*CharData) 1222 | if !isCharData { 1223 | if !firstNonCharData || depth > 0 { 1224 | s := indent(depth) 1225 | if s != "" { 1226 | newCharData(s, whitespaceFlag, e) 1227 | } 1228 | } 1229 | firstNonCharData = false 1230 | } 1231 | 1232 | e.addChild(c) 1233 | 1234 | // Recursively process child elements. 1235 | if ce, ok := c.(*Element); ok { 1236 | ce.indent(depth+1, indent, s) 1237 | } 1238 | } 1239 | 1240 | // Insert NL+indent before the last child. 1241 | if !isCharData { 1242 | if !firstNonCharData || depth > 0 { 1243 | s := indent(depth - 1) 1244 | if s != "" { 1245 | newCharData(s, whitespaceFlag, e) 1246 | } 1247 | } 1248 | } 1249 | } 1250 | 1251 | // stripIndent removes any previously inserted indentation. 1252 | func (e *Element) stripIndent(s *IndentSettings) { 1253 | // Count the number of non-indent child tokens 1254 | n := len(e.Child) 1255 | for _, c := range e.Child { 1256 | if cd, ok := c.(*CharData); ok && cd.IsWhitespace() { 1257 | n-- 1258 | } 1259 | } 1260 | if n == len(e.Child) { 1261 | return 1262 | } 1263 | if n == 0 && len(e.Child) == 1 && s.PreserveLeafWhitespace { 1264 | return 1265 | } 1266 | 1267 | // Strip out indent CharData 1268 | newChild := make([]Token, n) 1269 | j := 0 1270 | for _, c := range e.Child { 1271 | if cd, ok := c.(*CharData); ok && cd.IsWhitespace() { 1272 | continue 1273 | } 1274 | newChild[j] = c 1275 | newChild[j].setIndex(j) 1276 | j++ 1277 | } 1278 | e.Child = newChild 1279 | } 1280 | 1281 | // stripTrailingWhitespace removes any trailing whitespace CharData tokens 1282 | // from the element's children. 1283 | func (e *Element) stripTrailingWhitespace() { 1284 | for i := len(e.Child) - 1; i >= 0; i-- { 1285 | if cd, ok := e.Child[i].(*CharData); !ok || !cd.IsWhitespace() { 1286 | e.Child = e.Child[:i+1] 1287 | return 1288 | } 1289 | } 1290 | } 1291 | 1292 | // dup duplicates the element. 1293 | func (e *Element) dup(parent *Element) Token { 1294 | ne := &Element{ 1295 | Space: e.Space, 1296 | Tag: e.Tag, 1297 | Attr: make([]Attr, len(e.Attr)), 1298 | Child: make([]Token, len(e.Child)), 1299 | parent: parent, 1300 | index: e.index, 1301 | } 1302 | for i, t := range e.Child { 1303 | ne.Child[i] = t.dup(ne) 1304 | } 1305 | copy(ne.Attr, e.Attr) 1306 | return ne 1307 | } 1308 | 1309 | // NextSibling returns this element's next sibling element. It returns nil if 1310 | // there is no next sibling element. 1311 | func (e *Element) NextSibling() *Element { 1312 | if e.parent == nil { 1313 | return nil 1314 | } 1315 | for i := e.index + 1; i < len(e.parent.Child); i++ { 1316 | if s, ok := e.parent.Child[i].(*Element); ok { 1317 | return s 1318 | } 1319 | } 1320 | return nil 1321 | } 1322 | 1323 | // PrevSibling returns this element's preceding sibling element. It returns 1324 | // nil if there is no preceding sibling element. 1325 | func (e *Element) PrevSibling() *Element { 1326 | if e.parent == nil { 1327 | return nil 1328 | } 1329 | for i := e.index - 1; i >= 0; i-- { 1330 | if s, ok := e.parent.Child[i].(*Element); ok { 1331 | return s 1332 | } 1333 | } 1334 | return nil 1335 | } 1336 | 1337 | // Parent returns this element's parent element. It returns nil if this 1338 | // element has no parent. 1339 | func (e *Element) Parent() *Element { 1340 | return e.parent 1341 | } 1342 | 1343 | // Index returns the index of this element within its parent element's 1344 | // list of child tokens. If this element has no parent, then the function 1345 | // returns -1. 1346 | func (e *Element) Index() int { 1347 | return e.index 1348 | } 1349 | 1350 | // WriteTo serializes the element to the writer w. 1351 | func (e *Element) WriteTo(w Writer, s *WriteSettings) { 1352 | w.WriteByte('<') 1353 | w.WriteString(e.FullTag()) 1354 | for _, a := range e.Attr { 1355 | w.WriteByte(' ') 1356 | a.WriteTo(w, s) 1357 | } 1358 | if len(e.Child) > 0 { 1359 | w.WriteByte('>') 1360 | for _, c := range e.Child { 1361 | c.WriteTo(w, s) 1362 | } 1363 | w.Write([]byte{'<', '/'}) 1364 | w.WriteString(e.FullTag()) 1365 | w.WriteByte('>') 1366 | } else { 1367 | if s.CanonicalEndTags { 1368 | w.Write([]byte{'>', '<', '/'}) 1369 | w.WriteString(e.FullTag()) 1370 | w.WriteByte('>') 1371 | } else { 1372 | w.Write([]byte{'/', '>'}) 1373 | } 1374 | } 1375 | } 1376 | 1377 | // setParent replaces this element token's parent. 1378 | func (e *Element) setParent(parent *Element) { 1379 | e.parent = parent 1380 | } 1381 | 1382 | // setIndex sets this element token's index within its parent's Child slice. 1383 | func (e *Element) setIndex(index int) { 1384 | e.index = index 1385 | } 1386 | 1387 | // addChild adds a child token to the element e. 1388 | func (e *Element) addChild(t Token) { 1389 | t.setParent(e) 1390 | t.setIndex(len(e.Child)) 1391 | e.Child = append(e.Child, t) 1392 | } 1393 | 1394 | // CreateAttr creates an attribute with the specified 'key' and 'value' and 1395 | // adds it to this element. If an attribute with same key already exists on 1396 | // this element, then its value is replaced. The key may include a namespace 1397 | // prefix followed by a colon. 1398 | func (e *Element) CreateAttr(key, value string) *Attr { 1399 | space, skey := spaceDecompose(key) 1400 | 1401 | for i, a := range e.Attr { 1402 | if space == a.Space && skey == a.Key { 1403 | e.Attr[i].Value = value 1404 | return &e.Attr[i] 1405 | } 1406 | } 1407 | 1408 | i := e.addAttr(space, skey, value) 1409 | return &e.Attr[i] 1410 | } 1411 | 1412 | // addAttr is a helper function that adds an attribute to an element. Returns 1413 | // the index of the added attribute. 1414 | func (e *Element) addAttr(space, key, value string) int { 1415 | a := Attr{ 1416 | Space: space, 1417 | Key: key, 1418 | Value: value, 1419 | element: e, 1420 | } 1421 | e.Attr = append(e.Attr, a) 1422 | return len(e.Attr) - 1 1423 | } 1424 | 1425 | // RemoveAttr removes the first attribute of this element whose key matches 1426 | // 'key'. It returns a copy of the removed attribute if a match is found. If 1427 | // no match is found, it returns nil. The key may include a namespace prefix 1428 | // followed by a colon. 1429 | func (e *Element) RemoveAttr(key string) *Attr { 1430 | space, skey := spaceDecompose(key) 1431 | for i, a := range e.Attr { 1432 | if space == a.Space && skey == a.Key { 1433 | e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...) 1434 | return &Attr{ 1435 | Space: a.Space, 1436 | Key: a.Key, 1437 | Value: a.Value, 1438 | element: nil, 1439 | } 1440 | } 1441 | } 1442 | return nil 1443 | } 1444 | 1445 | // SortAttrs sorts this element's attributes lexicographically by key. 1446 | func (e *Element) SortAttrs() { 1447 | slices.SortFunc(e.Attr, func(a, b Attr) int { 1448 | if v := strings.Compare(a.Space, b.Space); v != 0 { 1449 | return v 1450 | } 1451 | return strings.Compare(a.Key, b.Key) 1452 | }) 1453 | } 1454 | 1455 | // FullKey returns this attribute's complete key, including namespace prefix 1456 | // if present. 1457 | func (a *Attr) FullKey() string { 1458 | if a.Space == "" { 1459 | return a.Key 1460 | } 1461 | return a.Space + ":" + a.Key 1462 | } 1463 | 1464 | // Element returns a pointer to the element containing this attribute. 1465 | func (a *Attr) Element() *Element { 1466 | return a.element 1467 | } 1468 | 1469 | // NamespaceURI returns the XML namespace URI associated with this attribute. 1470 | // The function returns the empty string if the attribute is unprefixed or 1471 | // if the attribute is part of the XML default namespace. 1472 | func (a *Attr) NamespaceURI() string { 1473 | if a.Space == "" { 1474 | return "" 1475 | } 1476 | return a.element.findLocalNamespaceURI(a.Space) 1477 | } 1478 | 1479 | // WriteTo serializes the attribute to the writer. 1480 | func (a *Attr) WriteTo(w Writer, s *WriteSettings) { 1481 | w.WriteString(a.FullKey()) 1482 | if s.AttrSingleQuote { 1483 | w.WriteString(`='`) 1484 | } else { 1485 | w.WriteString(`="`) 1486 | } 1487 | var m escapeMode 1488 | if s.CanonicalAttrVal && !s.AttrSingleQuote { 1489 | m = escapeCanonicalAttr 1490 | } else { 1491 | m = escapeNormal 1492 | } 1493 | escapeString(w, a.Value, m) 1494 | if s.AttrSingleQuote { 1495 | w.WriteByte('\'') 1496 | } else { 1497 | w.WriteByte('"') 1498 | } 1499 | } 1500 | 1501 | // NewText creates an unparented CharData token containing simple text data. 1502 | func NewText(text string) *CharData { 1503 | return newCharData(text, 0, nil) 1504 | } 1505 | 1506 | // NewCData creates an unparented XML character CDATA section with 'data' as 1507 | // its content. 1508 | func NewCData(data string) *CharData { 1509 | return newCharData(data, cdataFlag, nil) 1510 | } 1511 | 1512 | // NewCharData creates an unparented CharData token containing simple text 1513 | // data. 1514 | // 1515 | // Deprecated: NewCharData is deprecated. Instead, use NewText, which does the 1516 | // same thing. 1517 | func NewCharData(data string) *CharData { 1518 | return newCharData(data, 0, nil) 1519 | } 1520 | 1521 | // newCharData creates a character data token and binds it to a parent 1522 | // element. If parent is nil, the CharData token remains unbound. 1523 | func newCharData(data string, flags charDataFlags, parent *Element) *CharData { 1524 | c := &CharData{ 1525 | Data: data, 1526 | parent: nil, 1527 | index: -1, 1528 | flags: flags, 1529 | } 1530 | if parent != nil { 1531 | parent.addChild(c) 1532 | } 1533 | return c 1534 | } 1535 | 1536 | // CreateText creates a CharData token containing simple text data and adds it 1537 | // to the end of this element's list of child tokens. 1538 | func (e *Element) CreateText(text string) *CharData { 1539 | return newCharData(text, 0, e) 1540 | } 1541 | 1542 | // CreateCData creates a CharData token containing a CDATA section with 'data' 1543 | // as its content and adds it to the end of this element's list of child 1544 | // tokens. 1545 | func (e *Element) CreateCData(data string) *CharData { 1546 | return newCharData(data, cdataFlag, e) 1547 | } 1548 | 1549 | // CreateCharData creates a CharData token containing simple text data and 1550 | // adds it to the end of this element's list of child tokens. 1551 | // 1552 | // Deprecated: CreateCharData is deprecated. Instead, use CreateText, which 1553 | // does the same thing. 1554 | func (e *Element) CreateCharData(data string) *CharData { 1555 | return e.CreateText(data) 1556 | } 1557 | 1558 | // SetData modifies the content of the CharData token. In the case of a 1559 | // CharData token containing simple text, the simple text is modified. In the 1560 | // case of a CharData token containing a CDATA section, the CDATA section's 1561 | // content is modified. 1562 | func (c *CharData) SetData(text string) { 1563 | c.Data = text 1564 | if isWhitespace(text) { 1565 | c.flags |= whitespaceFlag 1566 | } else { 1567 | c.flags &= ^whitespaceFlag 1568 | } 1569 | } 1570 | 1571 | // IsCData returns true if this CharData token is contains a CDATA section. It 1572 | // returns false if the CharData token contains simple text. 1573 | func (c *CharData) IsCData() bool { 1574 | return (c.flags & cdataFlag) != 0 1575 | } 1576 | 1577 | // IsWhitespace returns true if this CharData token contains only whitespace. 1578 | func (c *CharData) IsWhitespace() bool { 1579 | return (c.flags & whitespaceFlag) != 0 1580 | } 1581 | 1582 | // Parent returns this CharData token's parent element, or nil if it has no 1583 | // parent. 1584 | func (c *CharData) Parent() *Element { 1585 | return c.parent 1586 | } 1587 | 1588 | // Index returns the index of this CharData token within its parent element's 1589 | // list of child tokens. If this CharData token has no parent, then the 1590 | // function returns -1. 1591 | func (c *CharData) Index() int { 1592 | return c.index 1593 | } 1594 | 1595 | // WriteTo serializes character data to the writer. 1596 | func (c *CharData) WriteTo(w Writer, s *WriteSettings) { 1597 | if c.IsCData() { 1598 | w.WriteString(``) 1601 | } else { 1602 | var m escapeMode 1603 | if s.CanonicalText { 1604 | m = escapeCanonicalText 1605 | } else { 1606 | m = escapeNormal 1607 | } 1608 | escapeString(w, c.Data, m) 1609 | } 1610 | } 1611 | 1612 | // dup duplicates the character data. 1613 | func (c *CharData) dup(parent *Element) Token { 1614 | return &CharData{ 1615 | Data: c.Data, 1616 | flags: c.flags, 1617 | parent: parent, 1618 | index: c.index, 1619 | } 1620 | } 1621 | 1622 | // setParent replaces the character data token's parent. 1623 | func (c *CharData) setParent(parent *Element) { 1624 | c.parent = parent 1625 | } 1626 | 1627 | // setIndex sets the CharData token's index within its parent element's Child 1628 | // slice. 1629 | func (c *CharData) setIndex(index int) { 1630 | c.index = index 1631 | } 1632 | 1633 | // NewComment creates an unparented comment token. 1634 | func NewComment(comment string) *Comment { 1635 | return newComment(comment, nil) 1636 | } 1637 | 1638 | // NewComment creates a comment token and sets its parent element to 'parent'. 1639 | func newComment(comment string, parent *Element) *Comment { 1640 | c := &Comment{ 1641 | Data: comment, 1642 | parent: nil, 1643 | index: -1, 1644 | } 1645 | if parent != nil { 1646 | parent.addChild(c) 1647 | } 1648 | return c 1649 | } 1650 | 1651 | // CreateComment creates a comment token using the specified 'comment' string 1652 | // and adds it as the last child token of this element. 1653 | func (e *Element) CreateComment(comment string) *Comment { 1654 | return newComment(comment, e) 1655 | } 1656 | 1657 | // dup duplicates the comment. 1658 | func (c *Comment) dup(parent *Element) Token { 1659 | return &Comment{ 1660 | Data: c.Data, 1661 | parent: parent, 1662 | index: c.index, 1663 | } 1664 | } 1665 | 1666 | // Parent returns comment token's parent element, or nil if it has no parent. 1667 | func (c *Comment) Parent() *Element { 1668 | return c.parent 1669 | } 1670 | 1671 | // Index returns the index of this Comment token within its parent element's 1672 | // list of child tokens. If this Comment token has no parent, then the 1673 | // function returns -1. 1674 | func (c *Comment) Index() int { 1675 | return c.index 1676 | } 1677 | 1678 | // WriteTo serialies the comment to the writer. 1679 | func (c *Comment) WriteTo(w Writer, s *WriteSettings) { 1680 | w.WriteString("") 1683 | } 1684 | 1685 | // setParent replaces the comment token's parent. 1686 | func (c *Comment) setParent(parent *Element) { 1687 | c.parent = parent 1688 | } 1689 | 1690 | // setIndex sets the Comment token's index within its parent element's Child 1691 | // slice. 1692 | func (c *Comment) setIndex(index int) { 1693 | c.index = index 1694 | } 1695 | 1696 | // NewDirective creates an unparented XML directive token. 1697 | func NewDirective(data string) *Directive { 1698 | return newDirective(data, nil) 1699 | } 1700 | 1701 | // newDirective creates an XML directive and binds it to a parent element. If 1702 | // parent is nil, the Directive remains unbound. 1703 | func newDirective(data string, parent *Element) *Directive { 1704 | d := &Directive{ 1705 | Data: data, 1706 | parent: nil, 1707 | index: -1, 1708 | } 1709 | if parent != nil { 1710 | parent.addChild(d) 1711 | } 1712 | return d 1713 | } 1714 | 1715 | // CreateDirective creates an XML directive token with the specified 'data' 1716 | // value and adds it as the last child token of this element. 1717 | func (e *Element) CreateDirective(data string) *Directive { 1718 | return newDirective(data, e) 1719 | } 1720 | 1721 | // dup duplicates the directive. 1722 | func (d *Directive) dup(parent *Element) Token { 1723 | return &Directive{ 1724 | Data: d.Data, 1725 | parent: parent, 1726 | index: d.index, 1727 | } 1728 | } 1729 | 1730 | // Parent returns directive token's parent element, or nil if it has no 1731 | // parent. 1732 | func (d *Directive) Parent() *Element { 1733 | return d.parent 1734 | } 1735 | 1736 | // Index returns the index of this Directive token within its parent element's 1737 | // list of child tokens. If this Directive token has no parent, then the 1738 | // function returns -1. 1739 | func (d *Directive) Index() int { 1740 | return d.index 1741 | } 1742 | 1743 | // WriteTo serializes the XML directive to the writer. 1744 | func (d *Directive) WriteTo(w Writer, s *WriteSettings) { 1745 | w.WriteString("") 1748 | } 1749 | 1750 | // setParent replaces the directive token's parent. 1751 | func (d *Directive) setParent(parent *Element) { 1752 | d.parent = parent 1753 | } 1754 | 1755 | // setIndex sets the Directive token's index within its parent element's Child 1756 | // slice. 1757 | func (d *Directive) setIndex(index int) { 1758 | d.index = index 1759 | } 1760 | 1761 | // NewProcInst creates an unparented XML processing instruction. 1762 | func NewProcInst(target, inst string) *ProcInst { 1763 | return newProcInst(target, inst, nil) 1764 | } 1765 | 1766 | // newProcInst creates an XML processing instruction and binds it to a parent 1767 | // element. If parent is nil, the ProcInst remains unbound. 1768 | func newProcInst(target, inst string, parent *Element) *ProcInst { 1769 | p := &ProcInst{ 1770 | Target: target, 1771 | Inst: inst, 1772 | parent: nil, 1773 | index: -1, 1774 | } 1775 | if parent != nil { 1776 | parent.addChild(p) 1777 | } 1778 | return p 1779 | } 1780 | 1781 | // CreateProcInst creates an XML processing instruction token with the 1782 | // specified 'target' and instruction 'inst'. It is then added as the last 1783 | // child token of this element. 1784 | func (e *Element) CreateProcInst(target, inst string) *ProcInst { 1785 | return newProcInst(target, inst, e) 1786 | } 1787 | 1788 | // dup duplicates the procinst. 1789 | func (p *ProcInst) dup(parent *Element) Token { 1790 | return &ProcInst{ 1791 | Target: p.Target, 1792 | Inst: p.Inst, 1793 | parent: parent, 1794 | index: p.index, 1795 | } 1796 | } 1797 | 1798 | // Parent returns processing instruction token's parent element, or nil if it 1799 | // has no parent. 1800 | func (p *ProcInst) Parent() *Element { 1801 | return p.parent 1802 | } 1803 | 1804 | // Index returns the index of this ProcInst token within its parent element's 1805 | // list of child tokens. If this ProcInst token has no parent, then the 1806 | // function returns -1. 1807 | func (p *ProcInst) Index() int { 1808 | return p.index 1809 | } 1810 | 1811 | // WriteTo serializes the processing instruction to the writer. 1812 | func (p *ProcInst) WriteTo(w Writer, s *WriteSettings) { 1813 | w.WriteString("") 1820 | } 1821 | 1822 | // setParent replaces the processing instruction token's parent. 1823 | func (p *ProcInst) setParent(parent *Element) { 1824 | p.parent = parent 1825 | } 1826 | 1827 | // setIndex sets the processing instruction token's index within its parent 1828 | // element's Child slice. 1829 | func (p *ProcInst) setIndex(index int) { 1830 | p.index = index 1831 | } 1832 | -------------------------------------------------------------------------------- /etree_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Brett Vickers. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package etree 6 | 7 | import ( 8 | "bytes" 9 | "encoding/xml" 10 | "errors" 11 | "io" 12 | "io/fs" 13 | "math/rand" 14 | "os" 15 | "path" 16 | "strings" 17 | "testing" 18 | ) 19 | 20 | func newDocumentFromString(t *testing.T, s string) *Document { 21 | return newDocumentFromString2(t, s, ReadSettings{}) 22 | } 23 | 24 | func newDocumentFromString2(t *testing.T, s string, settings ReadSettings) *Document { 25 | t.Helper() 26 | doc := NewDocument() 27 | doc.ReadSettings = settings 28 | err := doc.ReadFromString(s) 29 | if err != nil { 30 | t.Fatal("etree: failed to parse document") 31 | } 32 | return doc 33 | } 34 | 35 | func checkStrEq(t *testing.T, got, want string) { 36 | t.Helper() 37 | if got != want { 38 | t.Errorf("etree: unexpected result.\nGot:\n%s\nWanted:\n%s\n", got, want) 39 | } 40 | } 41 | 42 | func checkStrBinaryEq(t *testing.T, got, want string) { 43 | t.Helper() 44 | if got != want { 45 | t.Errorf("etree: unexpected result.\nGot:\n%v\nWanted:\n%v\n", []byte(got), []byte(want)) 46 | } 47 | } 48 | 49 | func checkIntEq(t *testing.T, got, want int) { 50 | t.Helper() 51 | if got != want { 52 | t.Errorf("etree: unexpected integer. Got: %d. Wanted: %d\n", got, want) 53 | } 54 | } 55 | 56 | func checkBoolEq(t *testing.T, got, want bool) { 57 | t.Helper() 58 | if got != want { 59 | t.Errorf("etree: unexpected boolean. Got: %v. Wanted: %v\n", got, want) 60 | } 61 | } 62 | 63 | func checkElementEq(t *testing.T, got, want *Element) { 64 | t.Helper() 65 | if got != want { 66 | t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want) 67 | } 68 | } 69 | 70 | func checkDocEq(t *testing.T, doc *Document, expected string) { 71 | t.Helper() 72 | doc.Indent(NoIndent) 73 | s, err := doc.WriteToString() 74 | if err != nil { 75 | t.Error("etree: failed to serialize document") 76 | } 77 | if s != expected { 78 | t.Errorf("etree: unexpected document.\nGot:\n%s\nWanted:\n%s\n", s, expected) 79 | } 80 | } 81 | 82 | func checkIndexes(t *testing.T, e *Element) { 83 | t.Helper() 84 | for i := 0; i < len(e.Child); i++ { 85 | c := e.Child[i] 86 | if c.Index() != i { 87 | t.Errorf("Child index mismatch. Got %d, expected %d.", c.Index(), i) 88 | } 89 | if ce, ok := c.(*Element); ok { 90 | checkIndexes(t, ce) 91 | } 92 | } 93 | } 94 | 95 | func TestDocument(t *testing.T) { 96 | // Create a document 97 | doc := NewDocument() 98 | doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`) 99 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) 100 | store := doc.CreateElement("store") 101 | store.CreateAttr("xmlns:t", "urn:books-com:titles") 102 | store.CreateDirective("Directive") 103 | store.CreateComment("This is a comment") 104 | book := store.CreateElement("book") 105 | book.CreateAttr("lang", "fr") 106 | book.CreateAttr("lang", "en") 107 | title := book.CreateElement("t:title") 108 | title.SetText("Nicholas Nickleby") 109 | title.SetText("Great Expectations") 110 | author := book.CreateElement("author") 111 | author.CreateCharData("Charles Dickens") 112 | review := book.CreateElement("review") 113 | review.CreateCData("<<< Will be replaced") 114 | review.SetCData(">>> Excellent book") 115 | doc.IndentTabs() 116 | 117 | checkIndexes(t, &doc.Element) 118 | 119 | // Serialize the document to a string 120 | s, err := doc.WriteToString() 121 | if err != nil { 122 | t.Error("etree: failed to serialize document") 123 | } 124 | 125 | // Make sure the serialized XML matches expectation. 126 | expected := ` 127 | 128 | 129 | 130 | 131 | 132 | Great Expectations 133 | Charles Dickens 134 | >> Excellent book]]> 135 | 136 | 137 | ` 138 | checkStrEq(t, s, expected) 139 | 140 | // Test the structure of the XML 141 | if doc.Root() != store { 142 | t.Error("etree: root mismatch") 143 | } 144 | if len(store.ChildElements()) != 1 || len(store.Child) != 7 { 145 | t.Error("etree: incorrect tree structure") 146 | } 147 | if len(book.ChildElements()) != 3 || len(book.Attr) != 1 || len(book.Child) != 7 { 148 | t.Error("etree: incorrect tree structure") 149 | } 150 | if len(title.ChildElements()) != 0 || len(title.Child) != 1 || len(title.Attr) != 0 { 151 | t.Error("etree: incorrect tree structure") 152 | } 153 | if len(author.ChildElements()) != 0 || len(author.Child) != 1 || len(author.Attr) != 0 { 154 | t.Error("etree: incorrect tree structure") 155 | } 156 | if len(review.ChildElements()) != 0 || len(review.Child) != 1 || len(review.Attr) != 0 { 157 | t.Error("etree: incorrect tree structure") 158 | } 159 | if book.parent != store || store.parent != &doc.Element || doc.parent != nil { 160 | t.Error("etree: incorrect tree structure") 161 | } 162 | if title.parent != book || author.parent != book { 163 | t.Error("etree: incorrect tree structure") 164 | } 165 | 166 | // Perform some basic queries on the document 167 | elements := doc.SelectElements("store") 168 | if len(elements) != 1 || elements[0] != store { 169 | t.Error("etree: incorrect SelectElements result") 170 | } 171 | element := doc.SelectElement("store") 172 | if element != store { 173 | t.Error("etree: incorrect SelectElement result") 174 | } 175 | elements = store.SelectElements("book") 176 | if len(elements) != 1 || elements[0] != book { 177 | t.Error("etree: incorrect SelectElements result") 178 | } 179 | element = store.SelectElement("book") 180 | if element != book { 181 | t.Error("etree: incorrect SelectElement result") 182 | } 183 | attr := book.SelectAttr("lang") 184 | if attr == nil || attr.Key != "lang" || attr.Value != "en" { 185 | t.Error("etree: incorrect SelectAttr result") 186 | } 187 | if book.SelectAttrValue("lang", "unknown") != "en" { 188 | t.Error("etree: incorrect SelectAttrValue result") 189 | } 190 | if book.SelectAttrValue("t:missing", "unknown") != "unknown" { 191 | t.Error("etree: incorrect SelectAttrValue result") 192 | } 193 | attr = book.RemoveAttr("lang") 194 | if attr.Value != "en" { 195 | t.Error("etree: incorrect RemoveAttr result") 196 | } 197 | book.CreateAttr("lang", "de") 198 | attr = book.RemoveAttr("lang") 199 | if attr.Value != "de" { 200 | t.Error("etree: incorrect RemoveAttr result") 201 | } 202 | element = book.SelectElement("t:title") 203 | if element != title || element.Text() != "Great Expectations" || len(element.Attr) != 0 { 204 | t.Error("etree: incorrect SelectElement result") 205 | } 206 | element = book.SelectElement("title") 207 | if element != title { 208 | t.Error("etree: incorrect SelectElement result") 209 | } 210 | element = book.SelectElement("p:title") 211 | if element != nil { 212 | t.Error("etree: incorrect SelectElement result") 213 | } 214 | element = book.RemoveChildAt(title.Index()).(*Element) 215 | if element != title { 216 | t.Error("etree: incorrect RemoveElement result") 217 | } 218 | element = book.SelectElement("title") 219 | if element != nil { 220 | t.Error("etree: incorrect SelectElement result") 221 | } 222 | element = book.SelectElement("review") 223 | if element != review || element.Text() != ">>> Excellent book" || len(element.Attr) != 0 { 224 | t.Error("etree: incorrect SelectElement result") 225 | } 226 | } 227 | 228 | func TestImbalancedXML(t *testing.T) { 229 | cases := []string{ 230 | ``, 231 | ``, 232 | ``, 233 | ``, 234 | ``, 235 | `malformed`, 236 | `malformed`, 237 | ``, 238 | ``, 239 | ``, 240 | ``, 241 | } 242 | for _, c := range cases { 243 | doc := NewDocument() 244 | err := doc.ReadFromString(c) 245 | if err == nil { 246 | t.Errorf("etree: imbalanced XML should have failed:\n%s", c) 247 | } 248 | } 249 | } 250 | 251 | func TestDocumentCharsetReader(t *testing.T) { 252 | s := ` 253 | 254 | 255 | Great Expectations 256 | Charles Dickens 257 | 258 | ` 259 | 260 | doc := newDocumentFromString2(t, s, ReadSettings{ 261 | CharsetReader: func(label string, input io.Reader) (io.Reader, error) { 262 | if label == "lowercase" { 263 | return &lowercaseCharsetReader{input}, nil 264 | } 265 | return nil, errors.New("unknown charset") 266 | }, 267 | }) 268 | 269 | cases := []struct { 270 | path string 271 | text string 272 | }{ 273 | {"/store/book/title", "great expectations"}, 274 | {"/store/book/author", "charles dickens"}, 275 | } 276 | for _, c := range cases { 277 | e := doc.FindElement(c.path) 278 | if e == nil { 279 | t.Errorf("etree: failed to find element '%s'", c.path) 280 | } else if e.Text() != c.text { 281 | t.Errorf("etree: expected path '%s' to contain '%s', got '%s'", c.path, c.text, e.Text()) 282 | } 283 | } 284 | } 285 | 286 | type lowercaseCharsetReader struct { 287 | r io.Reader 288 | } 289 | 290 | func (c *lowercaseCharsetReader) Read(p []byte) (n int, err error) { 291 | n, err = c.r.Read(p) 292 | if err != nil { 293 | return n, err 294 | } 295 | for i := 0; i < n; i++ { 296 | if p[i] >= 'A' && p[i] <= 'Z' { 297 | p[i] = p[i] - 'A' + 'a' 298 | } 299 | } 300 | return n, nil 301 | } 302 | 303 | func TestDocumentReadPermissive(t *testing.T) { 304 | s := "" 305 | 306 | doc := NewDocument() 307 | err := doc.ReadFromString(s) 308 | if err == nil { 309 | t.Fatal("etree: incorrect ReadFromString result") 310 | } 311 | 312 | doc.ReadSettings.Permissive = true 313 | err = doc.ReadFromString(s) 314 | if err != nil { 315 | t.Fatal("etree: incorrect ReadFromString result") 316 | } 317 | } 318 | 319 | func TestEmbeddedComment(t *testing.T) { 320 | s := `123456` 321 | 322 | doc := NewDocument() 323 | err := doc.ReadFromString(s) 324 | if err != nil { 325 | t.Fatal("etree: incorrect ReadFromString result") 326 | } 327 | 328 | a := doc.SelectElement("a") 329 | checkStrEq(t, a.Text(), "123456") 330 | } 331 | 332 | func TestDocumentReadHTMLEntities(t *testing.T) { 333 | s := ` 334 | 335 | → Great Expectations 336 | Charles Dickens 337 | 338 | ` 339 | 340 | doc := NewDocument() 341 | err := doc.ReadFromString(s) 342 | if err == nil { 343 | t.Fatal("etree: incorrect ReadFromString result") 344 | } 345 | 346 | doc.ReadSettings.Entity = xml.HTMLEntity 347 | err = doc.ReadFromString(s) 348 | if err != nil { 349 | t.Fatal("etree: incorrect ReadFromString result") 350 | } 351 | } 352 | 353 | func TestDocumentReadHTMLAutoClose(t *testing.T) { 354 | cases := []struct { 355 | name string 356 | input string 357 | want string 358 | }{ 359 | {"empty", ``, ``}, 360 | {"oneSelfClosing", `
`, `
`}, 361 | {"twoSelfClosingAndText", `
some text
`, `
some text
`}, 362 | { 363 | name: "largerExample", 364 | input: ` 365 |
366 | Author: Charles Dickens
367 | Book: Great Expectations
`, 368 | want: ` 369 |
370 | Author: Charles Dickens
371 | Book: Great Expectations
`}, 372 | } 373 | 374 | for _, c := range cases { 375 | t.Run(c.name, func(t *testing.T) { 376 | doc := NewDocument() 377 | doc.ReadSettings.Permissive = true 378 | doc.ReadSettings.AutoClose = xml.HTMLAutoClose 379 | err := doc.ReadFromString(c.input) 380 | if err != nil { 381 | t.Fatal("etree: ReadFromString() error = ", err) 382 | } 383 | s, err := doc.WriteToString() 384 | if err != nil { 385 | t.Fatal("etree: WriteToString() error = ", err) 386 | } 387 | checkStrEq(t, s, c.want) 388 | }) 389 | } 390 | } 391 | 392 | func TestEscapeCodes(t *testing.T) { 393 | cases := []struct { 394 | input string 395 | normal string 396 | attrCanonical string 397 | textCanonical string 398 | }{ 399 | { 400 | "&<>'\"\t\n\r", 401 | "&<>'"\t\n\r", 402 | "'" \">&<>'"\t\n\r", 403 | "&<>'\"\t\n ", 404 | }, 405 | { 406 | "\x00\x1f\x08\x09\x0a\x0d", 407 | "���\t\n\r", 408 | "���\t\n\r", 409 | "���\t\n ", 410 | }, 411 | } 412 | for _, c := range cases { 413 | doc := NewDocument() 414 | 415 | e := doc.CreateElement("e") 416 | e.SetText(c.input) 417 | e.CreateAttr("a", c.input) 418 | 419 | doc.WriteSettings.CanonicalText = false 420 | doc.WriteSettings.CanonicalAttrVal = false 421 | s, err := doc.WriteToString() 422 | if err != nil { 423 | t.Error("etree: Escape test produced inocrrect result.") 424 | } 425 | checkStrEq(t, s, c.normal) 426 | 427 | doc.WriteSettings.CanonicalText = false 428 | doc.WriteSettings.CanonicalAttrVal = true 429 | s, err = doc.WriteToString() 430 | if err != nil { 431 | t.Error("etree: Escape test produced inocrrect result.") 432 | } 433 | checkStrEq(t, s, c.attrCanonical) 434 | 435 | doc.WriteSettings.CanonicalText = true 436 | doc.WriteSettings.CanonicalAttrVal = false 437 | s, err = doc.WriteToString() 438 | if err != nil { 439 | t.Error("etree: Escape test produced inocrrect result.") 440 | } 441 | checkStrEq(t, s, c.textCanonical) 442 | } 443 | } 444 | 445 | func TestCanonical(t *testing.T) { 446 | BOM := "\xef\xbb\xbf" 447 | 448 | doc := NewDocument() 449 | doc.WriteSettings.CanonicalEndTags = true 450 | doc.WriteSettings.CanonicalText = true 451 | doc.WriteSettings.CanonicalAttrVal = true 452 | doc.CreateCharData(BOM) 453 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) 454 | 455 | people := doc.CreateElement("People") 456 | people.CreateComment("These are all known people") 457 | 458 | jon := people.CreateElement("Person") 459 | jon.CreateAttr("name", "Jon O'Reilly") 460 | jon.SetText("\r<'\">&\u0004\u0005\u001f�") 461 | 462 | sally := people.CreateElement("Person") 463 | sally.CreateAttr("name", "Sally") 464 | sally.CreateAttr("escape", "\r\n\t<'\">&") 465 | 466 | doc.Indent(2) 467 | s, err := doc.WriteToString() 468 | if err != nil { 469 | t.Error("etree: WriteSettings WriteTo produced incorrect result.") 470 | } 471 | 472 | expected := BOM + ` 473 | 474 | 475 | <'">&���� 476 | 477 | 478 | ` 479 | checkStrEq(t, s, expected) 480 | } 481 | 482 | func TestCopy(t *testing.T) { 483 | s := ` 484 | 485 | Great Expectations 486 | Charles Dickens 487 | 488 | ` 489 | 490 | doc := newDocumentFromString(t, s) 491 | 492 | s1, err := doc.WriteToString() 493 | if err != nil { 494 | t.Error("etree: incorrect WriteToString result") 495 | } 496 | 497 | doc2 := doc.Copy() 498 | checkIndexes(t, &doc2.Element) 499 | s2, err := doc2.WriteToString() 500 | if err != nil { 501 | t.Error("etree: incorrect Copy result") 502 | } 503 | 504 | if s1 != s2 { 505 | t.Error("etree: mismatched Copy result") 506 | t.Error("wanted:\n" + s1) 507 | t.Error("got:\n" + s2) 508 | } 509 | 510 | e1 := doc.FindElement("./store/book/title") 511 | e2 := doc2.FindElement("./store/book/title") 512 | if e1 == nil || e2 == nil || e1.parent == nil || e1 == e2 { 513 | t.Error("etree: incorrect FindElement result") 514 | } 515 | 516 | e1.parent.RemoveChildAt(e1.Index()) 517 | s1, _ = doc.WriteToString() 518 | s2, _ = doc2.WriteToString() 519 | if s1 == s2 { 520 | t.Error("etree: incorrect result after RemoveElement") 521 | } 522 | } 523 | 524 | func TestGetPath(t *testing.T) { 525 | s := ` 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | ` 538 | 539 | doc := newDocumentFromString(t, s) 540 | 541 | cases := []struct { 542 | from string 543 | to string 544 | relpath string 545 | topath string 546 | }{ 547 | {"a", ".", "..", "/"}, 548 | {".", "a", "./a", "/a"}, 549 | {"a/b1/c1/d1", ".", "../../../..", "/"}, 550 | {".", "a/b1/c1/d1", "./a/b1/c1/d1", "/a/b1/c1/d1"}, 551 | {"a", "a", ".", "/a"}, 552 | {"a/b1", "a/b1/c1", "./c1", "/a/b1/c1"}, 553 | {"a/b1/c1", "a/b1", "..", "/a/b1"}, 554 | {"a/b1/c1", "a/b1/c1", ".", "/a/b1/c1"}, 555 | {"a", "a/b1", "./b1", "/a/b1"}, 556 | {"a/b1", "a", "..", "/a"}, 557 | {"a", "a/b1/c1", "./b1/c1", "/a/b1/c1"}, 558 | {"a/b1/c1", "a", "../..", "/a"}, 559 | {"a/b1/c1/d1", "a", "../../..", "/a"}, 560 | {"a", "a/b1/c1/d1", "./b1/c1/d1", "/a/b1/c1/d1"}, 561 | {"a/b1", "a/b2", "../b2", "/a/b2"}, 562 | {"a/b2", "a/b1", "../b1", "/a/b1"}, 563 | {"a/b1/c1/d1", "a/b2/c2/d2", "../../../b2/c2/d2", "/a/b2/c2/d2"}, 564 | {"a/b2/c2/d2", "a/b1/c1/d1", "../../../b1/c1/d1", "/a/b1/c1/d1"}, 565 | {"a/b1/c1/d1", "a/b1/c1/d1a", "../d1a", "/a/b1/c1/d1a"}, 566 | } 567 | 568 | for _, c := range cases { 569 | fe := doc.FindElement(c.from) 570 | te := doc.FindElement(c.to) 571 | 572 | rp := te.GetRelativePath(fe) 573 | if rp != c.relpath { 574 | t.Errorf("GetRelativePath from '%s' to '%s'. Expected '%s', got '%s'.\n", c.from, c.to, c.relpath, rp) 575 | } 576 | 577 | p := te.GetPath() 578 | if p != c.topath { 579 | t.Errorf("GetPath for '%s'. Expected '%s', got '%s'.\n", c.to, c.topath, p) 580 | } 581 | } 582 | } 583 | 584 | func TestInsertChild(t *testing.T) { 585 | s := ` 586 | Great Expectations 587 | Charles Dickens 588 | 589 | ` 590 | 591 | doc := newDocumentFromString(t, s) 592 | 593 | year := NewElement("year") 594 | year.SetText("1861") 595 | 596 | book := doc.FindElement("//book") 597 | book.InsertChildAt(book.SelectElement("t:title").Index(), year) 598 | 599 | expected1 := ` 600 | 1861 601 | Great Expectations 602 | Charles Dickens 603 | 604 | ` 605 | doc.Indent(2) 606 | s1, _ := doc.WriteToString() 607 | checkStrEq(t, s1, expected1) 608 | 609 | book.RemoveChildAt(year.Index()) 610 | book.InsertChildAt(book.SelectElement("author").Index(), year) 611 | 612 | expected2 := ` 613 | Great Expectations 614 | 1861 615 | Charles Dickens 616 | 617 | ` 618 | doc.Indent(2) 619 | s2, _ := doc.WriteToString() 620 | checkStrEq(t, s2, expected2) 621 | 622 | book.RemoveChildAt(year.Index()) 623 | book.InsertChildAt(len(book.Child), year) 624 | 625 | expected3 := ` 626 | Great Expectations 627 | Charles Dickens 628 | 1861 629 | 630 | ` 631 | doc.Indent(2) 632 | s3, _ := doc.WriteToString() 633 | checkStrEq(t, s3, expected3) 634 | 635 | book.RemoveChildAt(year.Index()) 636 | book.InsertChildAt(999, year) 637 | 638 | expected4 := ` 639 | Great Expectations 640 | Charles Dickens 641 | 1861 642 | 643 | ` 644 | doc.Indent(2) 645 | s4, _ := doc.WriteToString() 646 | checkStrEq(t, s4, expected4) 647 | 648 | year = doc.FindElement("//book/year") 649 | book.InsertChildAt(0, year) 650 | 651 | expected5 := ` 652 | 1861 653 | Great Expectations 654 | Charles Dickens 655 | 656 | ` 657 | 658 | doc.Indent(2) 659 | s5, _ := doc.WriteToString() 660 | checkStrEq(t, s5, expected5) 661 | 662 | author := doc.FindElement("//book/author") 663 | year = doc.FindElement("//book/year") 664 | book.InsertChildAt(author.Index(), year) 665 | 666 | expected6 := ` 667 | Great Expectations 668 | 1861 669 | Charles Dickens 670 | 671 | ` 672 | doc.Indent(2) 673 | s6, _ := doc.WriteToString() 674 | checkStrEq(t, s6, expected6) 675 | } 676 | 677 | func TestCdata(t *testing.T) { 678 | var tests = []struct { 679 | in, out string 680 | }{ 681 | {`1234567`, "1234567"}, 682 | {``, "1234567"}, 683 | {`1357`, "1234567"}, 684 | {`13457`, "123"}, 685 | {`1457`, "1"}, 686 | {`457`, "1"}, 687 | } 688 | 689 | for _, test := range tests { 690 | doc := NewDocument() 691 | err := doc.ReadFromString(test.in) 692 | if err != nil { 693 | t.Fatal("etree ReadFromString: " + err.Error()) 694 | } 695 | 696 | tag := doc.FindElement("tag") 697 | if tag.Text() != test.out { 698 | t.Fatalf("etree invalid cdata. Expected: %v. Got: %v\n", test.out, tag.Text()) 699 | } 700 | } 701 | } 702 | 703 | func TestAddChild(t *testing.T) { 704 | s := ` 705 | Great Expectations 706 | Charles Dickens 707 | 708 | ` 709 | doc1 := newDocumentFromString(t, s) 710 | 711 | doc2 := NewDocument() 712 | root := doc2.CreateElement("root") 713 | 714 | for _, e := range doc1.FindElements("//book/*") { 715 | root.AddChild(e) 716 | } 717 | 718 | expected1 := ` 719 | ` 720 | doc1.Indent(2) 721 | s1, _ := doc1.WriteToString() 722 | checkStrEq(t, s1, expected1) 723 | 724 | expected2 := ` 725 | Great Expectations 726 | Charles Dickens 727 | 728 | ` 729 | doc2.Indent(2) 730 | s2, _ := doc2.WriteToString() 731 | checkStrEq(t, s2, expected2) 732 | } 733 | 734 | func TestSetRoot(t *testing.T) { 735 | s := ` 736 | 737 | Great Expectations 738 | Charles Dickens 739 | 740 | ` 741 | doc := newDocumentFromString(t, s) 742 | 743 | origroot := doc.Root() 744 | if origroot.Parent() != &doc.Element { 745 | t.Error("Root incorrect") 746 | } 747 | 748 | newroot := NewElement("root") 749 | doc.SetRoot(newroot) 750 | 751 | if doc.Root() != newroot { 752 | t.Error("doc.Root() != newroot") 753 | } 754 | if origroot.Parent() != nil { 755 | t.Error("origroot.Parent() != nil") 756 | } 757 | 758 | expected1 := ` 759 | 760 | ` 761 | doc.Indent(2) 762 | s1, _ := doc.WriteToString() 763 | checkStrEq(t, s1, expected1) 764 | 765 | doc.SetRoot(origroot) 766 | doc.Indent(2) 767 | expected2 := s 768 | s2, _ := doc.WriteToString() 769 | checkStrEq(t, s2, expected2) 770 | 771 | doc2 := NewDocument() 772 | doc2.CreateProcInst("test", `a="wow"`) 773 | doc2.SetRoot(NewElement("root")) 774 | doc2.Indent(2) 775 | expected3 := expected1 776 | s3, _ := doc2.WriteToString() 777 | checkStrEq(t, s3, expected3) 778 | 779 | doc2.SetRoot(doc.Root()) 780 | doc2.Indent(2) 781 | expected4 := s 782 | s4, _ := doc2.WriteToString() 783 | checkStrEq(t, s4, expected4) 784 | 785 | expected5 := ` 786 | ` 787 | doc.Indent(2) 788 | s5, _ := doc.WriteToString() 789 | checkStrEq(t, s5, expected5) 790 | } 791 | 792 | func TestSortAttrs(t *testing.T) { 793 | s := `` 794 | doc := newDocumentFromString(t, s) 795 | doc.Root().SortAttrs() 796 | doc.Indent(2) 797 | out, _ := doc.WriteToString() 798 | checkStrEq(t, out, ``+"\n") 799 | } 800 | 801 | func TestCharsetReaderDefaultSetting(t *testing.T) { 802 | // Test encodings where the default pass-through charset conversion 803 | // should work for common single-byte character encodings. 804 | cases := []string{ 805 | ``, 806 | ``, 807 | ``, 808 | ``, 809 | ``, 810 | } 811 | 812 | for _, c := range cases { 813 | doc := NewDocument() 814 | if err := doc.ReadFromBytes([]byte(c)); err != nil { 815 | t.Error(err) 816 | } 817 | } 818 | } 819 | 820 | func TestCharData(t *testing.T) { 821 | doc := NewDocument() 822 | root := doc.CreateElement("root") 823 | root.CreateCharData("This ") 824 | root.CreateCData("is ") 825 | e1 := NewText("a ") 826 | e2 := NewCData("text ") 827 | root.AddChild(e1) 828 | root.AddChild(e2) 829 | root.CreateCharData("Element!!") 830 | 831 | s, err := doc.WriteToString() 832 | if err != nil { 833 | t.Error("etree: failed to serialize document") 834 | } 835 | 836 | checkStrEq(t, s, `This a Element!!`) 837 | 838 | // Check we can parse the output 839 | err = doc.ReadFromString(s) 840 | if err != nil { 841 | t.Fatal("etree: incorrect ReadFromString result") 842 | } 843 | if doc.Root().Text() != "This is a text Element!!" { 844 | t.Error("etree: invalid text") 845 | } 846 | } 847 | 848 | func TestIndentSimple(t *testing.T) { 849 | doc := NewDocument() 850 | root := doc.CreateElement("root") 851 | ch1 := root.CreateElement("child1") 852 | ch1.CreateElement("child2") 853 | 854 | // First test Unindent. 855 | doc.Unindent() 856 | s, err := doc.WriteToString() 857 | if err != nil { 858 | t.Error("etree: failed to serialize document") 859 | } 860 | expected := "" 861 | checkStrEq(t, s, expected) 862 | 863 | // Now test Indent with NoIndent (which should produce the same result 864 | // as Unindent). 865 | doc.Indent(NoIndent) 866 | s, err = doc.WriteToString() 867 | if err != nil { 868 | t.Error("etree: failed to serialize document") 869 | } 870 | checkStrEq(t, s, expected) 871 | 872 | // Run all indent test cases. 873 | tests := []struct { 874 | useTabs, useCRLF bool 875 | ws, nl string 876 | }{ 877 | {false, false, " ", "\n"}, 878 | {false, true, " ", "\r\n"}, 879 | {true, false, "\t", "\n"}, 880 | {true, true, "\t", "\r\n"}, 881 | } 882 | 883 | for _, test := range tests { 884 | doc.WriteSettings.UseCRLF = test.useCRLF 885 | if test.useTabs { 886 | doc.IndentTabs() 887 | s, err := doc.WriteToString() 888 | if err != nil { 889 | t.Error("etree: failed to serialize document") 890 | } 891 | tab := test.ws 892 | expected := "" + test.nl + tab + "" + test.nl + 893 | tab + tab + "" + test.nl + tab + 894 | "" + test.nl + "" + test.nl 895 | checkStrEq(t, s, expected) 896 | } else { 897 | for i := 0; i < 256; i++ { 898 | doc.Indent(i) 899 | s, err := doc.WriteToString() 900 | if err != nil { 901 | t.Error("etree: failed to serialize document") 902 | } 903 | tab := strings.Repeat(test.ws, i) 904 | expected := "" + test.nl + tab + "" + test.nl + 905 | tab + tab + "" + test.nl + tab + 906 | "" + test.nl + "" + test.nl 907 | checkStrEq(t, s, expected) 908 | } 909 | } 910 | } 911 | } 912 | 913 | func TestIndentWithDefaultSettings(t *testing.T) { 914 | input := ` 915 | 916 | 917 | 918 | ` 919 | 920 | doc := NewDocument() 921 | err := doc.ReadFromString(input) 922 | if err != nil { 923 | t.Error("etree: failed to read string") 924 | } 925 | 926 | settings := NewIndentSettings() 927 | doc.IndentWithSettings(settings) 928 | s, err := doc.WriteToString() 929 | if err != nil { 930 | t.Error("etree: failed to serialize document") 931 | } 932 | expected := "\n \n \n \n\n" 933 | checkStrEq(t, s, expected) 934 | } 935 | 936 | func TestIndentWithSettings(t *testing.T) { 937 | doc := NewDocument() 938 | root := doc.CreateElement("root") 939 | ch1 := root.CreateElement("child1") 940 | ch1.CreateElement("child2") 941 | 942 | // First test with NoIndent. 943 | settings := NewIndentSettings() 944 | settings.UseCRLF = false 945 | settings.UseTabs = false 946 | settings.Spaces = NoIndent 947 | doc.IndentWithSettings(settings) 948 | s, err := doc.WriteToString() 949 | if err != nil { 950 | t.Error("etree: failed to serialize document") 951 | } 952 | expected := "" 953 | checkStrEq(t, s, expected) 954 | 955 | // Run all indent test cases. 956 | tests := []struct { 957 | useTabs, useCRLF bool 958 | ws, nl string 959 | }{ 960 | {false, false, " ", "\n"}, 961 | {false, true, " ", "\r\n"}, 962 | {true, false, "\t", "\n"}, 963 | {true, true, "\t", "\r\n"}, 964 | } 965 | 966 | for _, test := range tests { 967 | if test.useTabs { 968 | settings := NewIndentSettings() 969 | settings.UseTabs = true 970 | settings.UseCRLF = test.useCRLF 971 | doc.IndentWithSettings(settings) 972 | s, err := doc.WriteToString() 973 | if err != nil { 974 | t.Error("etree: failed to serialize document") 975 | } 976 | tab := test.ws 977 | expected := "" + test.nl + tab + "" + test.nl + 978 | tab + tab + "" + test.nl + tab + 979 | "" + test.nl + "" + test.nl 980 | checkStrEq(t, s, expected) 981 | } else { 982 | for i := 0; i < 256; i++ { 983 | settings := NewIndentSettings() 984 | settings.Spaces = i 985 | settings.UseTabs = false 986 | settings.UseCRLF = test.useCRLF 987 | doc.IndentWithSettings(settings) 988 | s, err := doc.WriteToString() 989 | if err != nil { 990 | t.Error("etree: failed to serialize document") 991 | } 992 | tab := strings.Repeat(test.ws, i) 993 | expected := "" + test.nl + tab + "" + test.nl + 994 | tab + tab + "" + test.nl + tab + 995 | "" + test.nl + "" + test.nl 996 | checkStrEq(t, s, expected) 997 | } 998 | } 999 | } 1000 | } 1001 | 1002 | func TestIndentPreserveWhitespace(t *testing.T) { 1003 | tests := []struct { 1004 | input string 1005 | expected string 1006 | }{ 1007 | {"", ""}, 1008 | {" ", " "}, 1009 | {"\t", "\t"}, 1010 | {"\t\n \t", "\t\n \t"}, 1011 | {"", " "}, 1012 | {" ", ""}, 1013 | {" ", "\n \n"}, 1014 | } 1015 | 1016 | for _, test := range tests { 1017 | doc := NewDocument() 1018 | err := doc.ReadFromString(test.input) 1019 | if err != nil { 1020 | t.Error("etree: failed to read string") 1021 | } 1022 | 1023 | s := NewIndentSettings() 1024 | s.Spaces = 2 1025 | s.PreserveLeafWhitespace = true 1026 | s.SuppressTrailingWhitespace = true 1027 | doc.IndentWithSettings(s) 1028 | 1029 | output, err := doc.WriteToString() 1030 | if err != nil { 1031 | t.Error("etree: failed to read string") 1032 | } 1033 | checkStrEq(t, output, test.expected) 1034 | } 1035 | } 1036 | 1037 | func TestPreserveCData(t *testing.T) { 1038 | tests := []struct { 1039 | input string 1040 | expectedWithPreserve string 1041 | expectedWithoutPreserve string 1042 | }{ 1043 | { 1044 | "", 1045 | "", 1046 | "x", 1047 | }, 1048 | { 1049 | "foo]]>", 1050 | "foo]]>", 1051 | "x <b>foo</b>", 1052 | }, 1053 | { 1054 | " name ", 1055 | " name ", 1056 | "My name is", 1057 | }, 1058 | } 1059 | 1060 | for _, test := range tests { 1061 | doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: true}) 1062 | output, _ := doc.WriteToString() 1063 | checkStrEq(t, output, test.expectedWithPreserve) 1064 | } 1065 | 1066 | for _, test := range tests { 1067 | doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: false}) 1068 | output, _ := doc.WriteToString() 1069 | checkStrEq(t, output, test.expectedWithoutPreserve) 1070 | } 1071 | } 1072 | 1073 | func TestTokenIndexing(t *testing.T) { 1074 | s := ` 1075 | 1076 | 1077 | 1078 | 1079 | 1080 | Great Expectations 1081 | Charles Dickens 1082 | 1083 | 1084 | ` 1085 | 1086 | doc := newDocumentFromString(t, s) 1087 | review := doc.FindElement("/store/book/review") 1088 | review.SetText("Excellent") 1089 | 1090 | checkIndexes(t, &doc.Element) 1091 | 1092 | doc.Indent(4) 1093 | checkIndexes(t, &doc.Element) 1094 | 1095 | doc.Indent(NoIndent) 1096 | checkIndexes(t, &doc.Element) 1097 | 1098 | e := NewElement("foo") 1099 | store := doc.SelectElement("store") 1100 | store.InsertChildAt(0, e) 1101 | checkIndexes(t, &doc.Element) 1102 | 1103 | store.RemoveChildAt(0) 1104 | checkIndexes(t, &doc.Element) 1105 | } 1106 | 1107 | func TestSetText(t *testing.T) { 1108 | doc := NewDocument() 1109 | root := doc.CreateElement("root") 1110 | 1111 | checkDocEq(t, doc, ``) 1112 | checkStrEq(t, root.Text(), "") 1113 | checkIntEq(t, len(root.Child), 0) 1114 | 1115 | root.SetText("foo") 1116 | checkDocEq(t, doc, `foo`) 1117 | checkStrEq(t, root.Text(), "foo") 1118 | checkIntEq(t, len(root.Child), 1) 1119 | 1120 | root.SetText("bar") 1121 | checkDocEq(t, doc, `bar`) 1122 | checkStrEq(t, root.Text(), "bar") 1123 | checkIntEq(t, len(root.Child), 1) 1124 | 1125 | root.CreateCData("cdata") 1126 | checkDocEq(t, doc, `bar`) 1127 | checkStrEq(t, root.Text(), "barcdata") 1128 | checkIntEq(t, len(root.Child), 2) 1129 | 1130 | root.SetText("qux") 1131 | checkDocEq(t, doc, `qux`) 1132 | checkStrEq(t, root.Text(), "qux") 1133 | checkIntEq(t, len(root.Child), 1) 1134 | 1135 | root.CreateCData("cdata") 1136 | checkDocEq(t, doc, `qux`) 1137 | checkStrEq(t, root.Text(), "quxcdata") 1138 | checkIntEq(t, len(root.Child), 2) 1139 | 1140 | root.SetCData("baz") 1141 | checkDocEq(t, doc, ``) 1142 | checkStrEq(t, root.Text(), "baz") 1143 | checkIntEq(t, len(root.Child), 1) 1144 | 1145 | root.CreateText("corge") 1146 | root.CreateCData("grault") 1147 | root.CreateText("waldo") 1148 | root.CreateCData("fred") 1149 | root.CreateElement("child") 1150 | checkDocEq(t, doc, `corgewaldo`) 1151 | checkStrEq(t, root.Text(), "bazcorgegraultwaldofred") 1152 | checkIntEq(t, len(root.Child), 6) 1153 | 1154 | root.SetText("plugh") 1155 | checkDocEq(t, doc, `plugh`) 1156 | checkStrEq(t, root.Text(), "plugh") 1157 | checkIntEq(t, len(root.Child), 2) 1158 | 1159 | root.SetText("") 1160 | checkDocEq(t, doc, ``) 1161 | checkStrEq(t, root.Text(), "") 1162 | checkIntEq(t, len(root.Child), 1) 1163 | 1164 | root.SetText("") 1165 | checkDocEq(t, doc, ``) 1166 | checkStrEq(t, root.Text(), "") 1167 | checkIntEq(t, len(root.Child), 1) 1168 | 1169 | root.RemoveChildAt(0) 1170 | root.CreateText("corge") 1171 | root.CreateCData("grault") 1172 | root.CreateText("waldo") 1173 | root.CreateCData("fred") 1174 | root.CreateElement("child") 1175 | checkDocEq(t, doc, `corgewaldo`) 1176 | checkStrEq(t, root.Text(), "corgegraultwaldofred") 1177 | checkIntEq(t, len(root.Child), 5) 1178 | 1179 | root.SetText("") 1180 | checkDocEq(t, doc, ``) 1181 | checkStrEq(t, root.Text(), "") 1182 | checkIntEq(t, len(root.Child), 1) 1183 | } 1184 | 1185 | func TestSetTail(t *testing.T) { 1186 | doc := NewDocument() 1187 | root := doc.CreateElement("root") 1188 | child := root.CreateElement("child") 1189 | root.CreateText("\n\t") 1190 | child.SetText("foo") 1191 | checkDocEq(t, doc, "foo\n\t") 1192 | checkStrEq(t, child.Tail(), "\n\t") 1193 | checkIntEq(t, len(root.Child), 2) 1194 | checkIntEq(t, len(child.Child), 1) 1195 | 1196 | root.CreateCData(" ") 1197 | checkDocEq(t, doc, "foo\n\t") 1198 | checkStrEq(t, child.Tail(), "\n\t ") 1199 | checkIntEq(t, len(root.Child), 3) 1200 | checkIntEq(t, len(child.Child), 1) 1201 | 1202 | child.SetTail("") 1203 | checkDocEq(t, doc, "foo") 1204 | checkStrEq(t, child.Tail(), "") 1205 | checkIntEq(t, len(root.Child), 1) 1206 | checkIntEq(t, len(child.Child), 1) 1207 | 1208 | child.SetTail("\t\t\t") 1209 | checkDocEq(t, doc, "foo\t\t\t") 1210 | checkStrEq(t, child.Tail(), "\t\t\t") 1211 | checkIntEq(t, len(root.Child), 2) 1212 | checkIntEq(t, len(child.Child), 1) 1213 | 1214 | child.SetTail("\t\n\n\t") 1215 | checkDocEq(t, doc, "foo\t\n\n\t") 1216 | checkStrEq(t, child.Tail(), "\t\n\n\t") 1217 | checkIntEq(t, len(root.Child), 2) 1218 | checkIntEq(t, len(child.Child), 1) 1219 | 1220 | child.SetTail("") 1221 | checkDocEq(t, doc, "foo") 1222 | checkStrEq(t, child.Tail(), "") 1223 | checkIntEq(t, len(root.Child), 1) 1224 | checkIntEq(t, len(child.Child), 1) 1225 | } 1226 | 1227 | func TestAttrParent(t *testing.T) { 1228 | doc := NewDocument() 1229 | root := doc.CreateElement("root") 1230 | attr1 := root.CreateAttr("bar", "1") 1231 | attr2 := root.CreateAttr("qux", "2") 1232 | 1233 | checkIntEq(t, len(root.Attr), 2) 1234 | checkElementEq(t, attr1.Element(), root) 1235 | checkElementEq(t, attr2.Element(), root) 1236 | 1237 | attr1 = root.RemoveAttr("bar") 1238 | attr2 = root.RemoveAttr("qux") 1239 | checkElementEq(t, attr1.Element(), nil) 1240 | checkElementEq(t, attr2.Element(), nil) 1241 | 1242 | s := `` 1243 | err := doc.ReadFromString(s) 1244 | if err != nil { 1245 | t.Error("etree: failed to parse document") 1246 | } 1247 | 1248 | root = doc.SelectElement("root") 1249 | for i := range root.Attr { 1250 | checkElementEq(t, root.Attr[i].Element(), root) 1251 | } 1252 | } 1253 | 1254 | func TestDefaultNamespaceURI(t *testing.T) { 1255 | s := ` 1256 | 1257 | 1258 | 1259 | 1260 | 1261 | 1262 | 1263 | 1264 | 1265 | ` 1266 | 1267 | doc := newDocumentFromString(t, s) 1268 | root := doc.SelectElement("root") 1269 | child1 := root.SelectElement("child1") 1270 | child2 := root.SelectElement("child2") 1271 | grandchild1 := child1.SelectElement("grandchild1") 1272 | grandchild2 := child1.SelectElement("grandchild2") 1273 | greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1") 1274 | 1275 | checkStrEq(t, doc.NamespaceURI(), "") 1276 | checkStrEq(t, root.NamespaceURI(), "https://root.example.com") 1277 | checkStrEq(t, child1.NamespaceURI(), "https://child.example.com") 1278 | checkStrEq(t, child2.NamespaceURI(), "https://root.example.com") 1279 | checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com") 1280 | checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com") 1281 | checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://child.example.com") 1282 | 1283 | checkStrEq(t, root.Attr[0].NamespaceURI(), "") 1284 | checkStrEq(t, root.Attr[1].NamespaceURI(), "") 1285 | checkStrEq(t, root.Attr[2].NamespaceURI(), "https://attrib.example.com") 1286 | checkStrEq(t, root.Attr[3].NamespaceURI(), "") 1287 | checkStrEq(t, child1.Attr[0].NamespaceURI(), "") 1288 | checkStrEq(t, child1.Attr[1].NamespaceURI(), "https://attrib.example.com") 1289 | checkStrEq(t, child2.Attr[0].NamespaceURI(), "") 1290 | checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "") 1291 | checkStrEq(t, grandchild1.Attr[1].NamespaceURI(), "") 1292 | checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "") 1293 | checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "https://attrib.example.com") 1294 | 1295 | f := doc.FindElements("//*[namespace-uri()='https://root.example.com']") 1296 | if len(f) != 2 || f[0] != root || f[1] != child2 { 1297 | t.Error("etree: failed namespace-uri test") 1298 | } 1299 | 1300 | f = doc.FindElements("//*[namespace-uri()='https://child.example.com']") 1301 | if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 { 1302 | t.Error("etree: failed namespace-uri test") 1303 | } 1304 | 1305 | f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']") 1306 | if len(f) != 1 || f[0] != grandchild1 { 1307 | t.Error("etree: failed namespace-uri test") 1308 | } 1309 | 1310 | f = doc.FindElements("//*[namespace-uri()='']") 1311 | if len(f) != 0 { 1312 | t.Error("etree: failed namespace-uri test") 1313 | } 1314 | 1315 | f = doc.FindElements("//*[namespace-uri()='foo']") 1316 | if len(f) != 0 { 1317 | t.Error("etree: failed namespace-uri test") 1318 | } 1319 | } 1320 | 1321 | func TestLocalNamespaceURI(t *testing.T) { 1322 | s := ` 1323 | 1324 | 1325 | 1326 | 1327 | 1328 | 1329 | 1330 | 1331 | 1332 | 1333 | 1334 | 1335 | 1336 | ` 1337 | 1338 | doc := newDocumentFromString(t, s) 1339 | root := doc.SelectElement("root") 1340 | child1 := root.SelectElement("child1") 1341 | child2 := root.SelectElement("child2") 1342 | child3 := root.SelectElement("child3") 1343 | grandchild1 := child1.SelectElement("grandchild1") 1344 | grandchild2 := child1.SelectElement("grandchild2") 1345 | grandchild3 := child1.SelectElement("grandchild3") 1346 | grandchild4 := child1.SelectElement("grandchild4") 1347 | greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1") 1348 | 1349 | checkStrEq(t, doc.NamespaceURI(), "") 1350 | checkStrEq(t, root.NamespaceURI(), "https://root.example.com") 1351 | checkStrEq(t, child1.NamespaceURI(), "https://child.example.com") 1352 | checkStrEq(t, child2.NamespaceURI(), "https://root.example.com") 1353 | checkStrEq(t, child3.NamespaceURI(), "") 1354 | checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com") 1355 | checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com") 1356 | checkStrEq(t, grandchild3.NamespaceURI(), "https://root.example.com") 1357 | checkStrEq(t, grandchild4.NamespaceURI(), "") 1358 | checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://root.example.com") 1359 | 1360 | f := doc.FindElements("//*[namespace-uri()='https://root.example.com']") 1361 | if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 { 1362 | t.Error("etree: failed namespace-uri test") 1363 | } 1364 | 1365 | f = doc.FindElements("//*[namespace-uri()='https://child.example.com']") 1366 | if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 { 1367 | t.Error("etree: failed namespace-uri test") 1368 | } 1369 | 1370 | f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']") 1371 | if len(f) != 1 || f[0] != grandchild1 { 1372 | t.Error("etree: failed namespace-uri test") 1373 | } 1374 | 1375 | f = doc.FindElements("//*[namespace-uri()='']") 1376 | if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 { 1377 | t.Error("etree: failed namespace-uri test") 1378 | } 1379 | 1380 | f = doc.FindElements("//*[namespace-uri()='foo']") 1381 | if len(f) != 0 { 1382 | t.Error("etree: failed namespace-uri test") 1383 | } 1384 | } 1385 | 1386 | func TestWhitespace(t *testing.T) { 1387 | s := "\n\t\n\t\t x\n \n" 1388 | 1389 | doc := newDocumentFromString(t, s) 1390 | root := doc.Root() 1391 | checkIntEq(t, len(root.Child), 3) 1392 | 1393 | cd := root.Child[0].(*CharData) 1394 | checkBoolEq(t, cd.IsWhitespace(), true) 1395 | checkStrBinaryEq(t, cd.Data, "\n\t") 1396 | 1397 | cd = root.Child[2].(*CharData) 1398 | checkBoolEq(t, cd.IsWhitespace(), true) 1399 | checkStrBinaryEq(t, cd.Data, "\n") 1400 | 1401 | child := root.SelectElement("child") 1402 | checkIntEq(t, len(child.Child), 3) 1403 | 1404 | cd = child.Child[0].(*CharData) 1405 | checkBoolEq(t, cd.IsWhitespace(), true) 1406 | checkStrBinaryEq(t, cd.Data, "\n\t\t") 1407 | 1408 | cd = child.Child[2].(*CharData) 1409 | checkBoolEq(t, cd.IsWhitespace(), true) 1410 | checkStrBinaryEq(t, cd.Data, "\n ") 1411 | 1412 | grandchild := child.SelectElement("grandchild") 1413 | checkIntEq(t, len(grandchild.Child), 1) 1414 | 1415 | cd = grandchild.Child[0].(*CharData) 1416 | checkBoolEq(t, cd.IsWhitespace(), false) 1417 | 1418 | cd.SetData(" ") 1419 | checkBoolEq(t, cd.IsWhitespace(), true) 1420 | 1421 | cd.SetData(" x") 1422 | checkBoolEq(t, cd.IsWhitespace(), false) 1423 | 1424 | cd.SetData("\t\n\r ") 1425 | checkBoolEq(t, cd.IsWhitespace(), true) 1426 | 1427 | cd.SetData("\uFFFD") 1428 | checkBoolEq(t, cd.IsWhitespace(), false) 1429 | 1430 | cd.SetData("") 1431 | checkBoolEq(t, cd.IsWhitespace(), true) 1432 | } 1433 | 1434 | func TestTokenWriteTo(t *testing.T) { 1435 | s := ` 1436 | 1437 | 1438 | Great Expectations 1439 | 1440 | ` 1441 | doc := newDocumentFromString(t, s) 1442 | 1443 | writeSettings := WriteSettings{} 1444 | indentSettings := IndentSettings{UseTabs: true} 1445 | 1446 | tests := []struct { 1447 | path string 1448 | expected string 1449 | }{ 1450 | {"//store", "\n\t\n\t\n\t\tGreat Expectations\n\t\n"}, 1451 | {"//store/book", "\n\tGreat Expectations\n"}, 1452 | {"//store/book/title", "Great Expectations"}, 1453 | } 1454 | for _, test := range tests { 1455 | var buffer bytes.Buffer 1456 | 1457 | c := doc.FindElement(test.path) 1458 | c.IndentWithSettings(&indentSettings) 1459 | c.WriteTo(&buffer, &writeSettings) 1460 | checkStrEq(t, buffer.String(), test.expected) 1461 | } 1462 | } 1463 | 1464 | func TestReindexChildren(t *testing.T) { 1465 | s := ` 1466 | 1467 | 1468 | 1469 | 1470 | 1471 | ` 1472 | doc := newDocumentFromString(t, s) 1473 | doc.Unindent() 1474 | 1475 | root := doc.Root() 1476 | if root == nil || root.Tag != "root" || len(root.Child) != 5 { 1477 | t.Error("etree: expected root element not found") 1478 | } 1479 | 1480 | for i := 0; i < len(root.Child); i++ { 1481 | if root.Child[i].Index() != i { 1482 | t.Error("etree: incorrect child index found in root element child") 1483 | } 1484 | } 1485 | 1486 | rand.Shuffle(len(root.Child), func(i, j int) { 1487 | root.Child[i], root.Child[j] = root.Child[j], root.Child[i] 1488 | }) 1489 | 1490 | root.ReindexChildren() 1491 | 1492 | for i := 0; i < len(root.Child); i++ { 1493 | if root.Child[i].Index() != i { 1494 | t.Error("etree: incorrect child index found in root element child") 1495 | } 1496 | } 1497 | } 1498 | 1499 | func TestPreserveDuplicateAttrs(t *testing.T) { 1500 | s := `` 1501 | 1502 | checkAttrCount := func(e *Element, n int) { 1503 | if len(e.Attr) != n { 1504 | t.Errorf("etree: expected %d attributes, got %d", n, len(e.Attr)) 1505 | } 1506 | } 1507 | checkAttr := func(e *Element, i int, key, value string) { 1508 | if i >= len(e.Attr) { 1509 | t.Errorf("etree: attr[%d] out of bounds", i) 1510 | return 1511 | } 1512 | if e.Attr[i].Key != key { 1513 | t.Errorf("etree: attr[%d] expected key %s, got %s", i, key, e.Attr[i].Key) 1514 | } 1515 | if e.Attr[i].Value != value { 1516 | t.Errorf("etree: attr[%d] expected value %s, got %s", i, value, e.Attr[i].Value) 1517 | } 1518 | } 1519 | 1520 | t.Run("enabled", func(t *testing.T) { 1521 | doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: true}) 1522 | e := doc.FindElement("element") 1523 | checkAttrCount(e, 5) 1524 | checkAttr(e, 0, "x", "value1") 1525 | checkAttr(e, 1, "y", "value2") 1526 | checkAttr(e, 2, "x", "value3") 1527 | checkAttr(e, 3, "x", "value4") 1528 | checkAttr(e, 4, "y", "value5") 1529 | }) 1530 | 1531 | t.Run("disabled", func(t *testing.T) { 1532 | doc := newDocumentFromString2(t, s, ReadSettings{}) 1533 | e := doc.FindElement("element") 1534 | checkAttrCount(e, 2) 1535 | checkAttr(e, 0, "x", "value4") 1536 | checkAttr(e, 1, "y", "value5") 1537 | }) 1538 | } 1539 | 1540 | func TestNotNil(t *testing.T) { 1541 | s := `true` 1542 | 1543 | doc := newDocumentFromString(t, s) 1544 | doc.SelectElement("enabled").NotNil().SetText("false") 1545 | doc.SelectElement("visible").NotNil().SetText("true") 1546 | 1547 | want := `false` 1548 | got, err := doc.WriteToString() 1549 | if err != nil { 1550 | t.Fatal("etree: failed to write document to string") 1551 | } 1552 | if got != want { 1553 | t.Error("etree: unexpected NotNil result") 1554 | t.Error("wanted:\n" + want) 1555 | t.Error("got:\n" + got) 1556 | } 1557 | } 1558 | 1559 | func TestValidateInput(t *testing.T) { 1560 | tests := []struct { 1561 | s string 1562 | err string 1563 | }{ 1564 | {`x`, ""}, 1565 | {``, ""}, 1566 | {`x`, `XML syntax error on line 1: unexpected EOF`}, 1567 | {``, `XML syntax error on line 1: unexpected end element `}, 1568 | {`<>`, `XML syntax error on line 1: expected element name after <`}, 1569 | {`xtrailing`, "etree: invalid XML format"}, 1570 | {`x<`, "etree: invalid XML format"}, 1571 | {`x`, `XML syntax error on line 1: element closed by `}, 1572 | } 1573 | 1574 | type readFunc func(doc *Document, s string) error 1575 | runTests := func(t *testing.T, read readFunc) { 1576 | for i, test := range tests { 1577 | doc := NewDocument() 1578 | doc.ReadSettings.ValidateInput = true 1579 | err := read(doc, test.s) 1580 | if err == nil { 1581 | if test.err != "" { 1582 | t.Errorf("etree: test #%d:\nExpected error:\n %s\nReceived error:\n nil", i, test.err) 1583 | } 1584 | root := doc.Root() 1585 | if root == nil || root.Tag != "root" { 1586 | t.Errorf("etree: test #%d: failed to read document after input validation", i) 1587 | } 1588 | } else { 1589 | te := err.Error() 1590 | if te != test.err { 1591 | t.Errorf("etree: test #%d:\nExpected error;\n %s\nReceived error:\n %s", i, test.err, te) 1592 | } 1593 | } 1594 | } 1595 | } 1596 | 1597 | readFromString := func(doc *Document, s string) error { 1598 | return doc.ReadFromString(s) 1599 | } 1600 | t.Run("ReadFromString", func(t *testing.T) { runTests(t, readFromString) }) 1601 | 1602 | readFromBytes := func(doc *Document, s string) error { 1603 | return doc.ReadFromBytes([]byte(s)) 1604 | } 1605 | t.Run("ReadFromBytes", func(t *testing.T) { runTests(t, readFromBytes) }) 1606 | 1607 | readFromFile := func(doc *Document, s string) error { 1608 | pathtmp := path.Join(t.TempDir(), "etree-test") 1609 | err := os.WriteFile(pathtmp, []byte(s), fs.ModePerm) 1610 | if err != nil { 1611 | return errors.New("unable to write tmp file for input validation") 1612 | } 1613 | return doc.ReadFromFile(pathtmp) 1614 | } 1615 | t.Run("ReadFromFile", func(t *testing.T) { runTests(t, readFromFile) }) 1616 | } 1617 | 1618 | func TestSiblingElement(t *testing.T) { 1619 | doc := newDocumentFromString(t, ` `) 1620 | 1621 | root := doc.SelectElement("root") 1622 | a := root.SelectElement("a") 1623 | b := root.SelectElement("b") 1624 | c := root.SelectElement("c") 1625 | b1 := b.SelectElement("b1") 1626 | 1627 | tests := []struct { 1628 | e *Element 1629 | next *Element 1630 | prev *Element 1631 | }{ 1632 | {root, nil, nil}, 1633 | {a, b, nil}, 1634 | {b, c, a}, 1635 | {c, nil, b}, 1636 | {b1, nil, nil}, 1637 | } 1638 | 1639 | toString := func(e *Element) string { 1640 | if e == nil { 1641 | return "nil" 1642 | } 1643 | return e.Tag 1644 | } 1645 | 1646 | for i, test := range tests { 1647 | next := test.e.NextSibling() 1648 | if next != test.next { 1649 | t.Errorf("etree: test #%d unexpected NextSibling result.\n Expected: %s\n Received: %s\n", 1650 | i, toString(next), toString(test.next)) 1651 | } 1652 | 1653 | prev := test.e.PrevSibling() 1654 | if prev != test.prev { 1655 | t.Errorf("etree: test #%d unexpected PrevSibling result.\n Expected: %s\n Received: %s\n", 1656 | i, toString(prev), toString(test.prev)) 1657 | } 1658 | } 1659 | } 1660 | 1661 | func TestContinuations(t *testing.T) { 1662 | doc := NewDocument() 1663 | root := doc.CreateChild("root", func(e *Element) { 1664 | e.CreateChild("child1", func(e *Element) { 1665 | e.CreateComment("Grandchildren of child #1") 1666 | e.CreateChild("grandchild1", func(e *Element) { 1667 | e.CreateAttr("attr1", "1") 1668 | e.CreateAttr("attr2", "2") 1669 | }) 1670 | e.CreateChild("grandchild2", func(e *Element) { 1671 | e.CreateAttr("attr1", "3") 1672 | e.CreateAttr("attr2", "4") 1673 | }) 1674 | }) 1675 | e.CreateChild("child2", func(e *Element) { 1676 | e.CreateComment("Grandchildren of child #2") 1677 | e.CreateChild("grandchild1", func(e *Element) { 1678 | e.CreateAttr("attr1", "5") 1679 | e.CreateAttr("attr2", "6") 1680 | }) 1681 | e.CreateChild("grandchild2", func(e *Element) { 1682 | e.CreateAttr("attr1", "7") 1683 | e.CreateAttr("attr2", "8") 1684 | }) 1685 | }) 1686 | }) 1687 | checkStrEq(t, root.Tag, "root") 1688 | 1689 | // Serialize the document to a string 1690 | doc.IndentTabs() 1691 | s, err := doc.WriteToString() 1692 | if err != nil { 1693 | t.Error("etree: failed to serialize document") 1694 | } 1695 | 1696 | // Make sure the serialized XML matches expectation. 1697 | expected := ` 1698 | 1699 | 1700 | 1701 | 1702 | 1703 | 1704 | 1705 | 1706 | 1707 | 1708 | 1709 | ` 1710 | 1711 | checkStrEq(t, s, expected) 1712 | } 1713 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Brett Vickers. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package etree 6 | 7 | import "os" 8 | 9 | // Create an etree Document, add XML entities to it, and serialize it 10 | // to stdout. 11 | func ExampleDocument_creating() { 12 | doc := NewDocument() 13 | doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`) 14 | doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) 15 | 16 | people := doc.CreateElement("People") 17 | people.CreateComment("These are all known people") 18 | 19 | jon := people.CreateElement("Person") 20 | jon.CreateAttr("name", "Jon O'Reilly") 21 | 22 | sally := people.CreateElement("Person") 23 | sally.CreateAttr("name", "Sally") 24 | 25 | doc.Indent(2) 26 | doc.WriteTo(os.Stdout) 27 | // Output: 28 | // 29 | // 30 | // 31 | // 32 | // 33 | // 34 | // 35 | } 36 | 37 | func ExampleDocument_reading() { 38 | doc := NewDocument() 39 | if err := doc.ReadFromFile("document.xml"); err != nil { 40 | panic(err) 41 | } 42 | } 43 | 44 | func ExamplePath() { 45 | xml := ` 46 | 47 | 48 | Great Expectations 49 | Charles Dickens 50 | 51 | 52 | Ulysses 53 | James Joyce 54 | 55 | ` 56 | 57 | doc := NewDocument() 58 | doc.ReadFromString(xml) 59 | for _, e := range doc.FindElements(".//book[author='Charles Dickens']") { 60 | doc := NewDocumentWithRoot(e.Copy()) 61 | doc.Indent(2) 62 | doc.WriteTo(os.Stdout) 63 | } 64 | // Output: 65 | // 66 | // Great Expectations 67 | // Charles Dickens 68 | // 69 | } 70 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/beevik/etree 2 | 3 | go 1.21.0 4 | -------------------------------------------------------------------------------- /helpers.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Brett Vickers. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package etree 6 | 7 | import ( 8 | "io" 9 | "strings" 10 | "unicode/utf8" 11 | ) 12 | 13 | type stack[E any] struct { 14 | data []E 15 | } 16 | 17 | func (s *stack[E]) empty() bool { 18 | return len(s.data) == 0 19 | } 20 | 21 | func (s *stack[E]) push(value E) { 22 | s.data = append(s.data, value) 23 | } 24 | 25 | func (s *stack[E]) pop() E { 26 | value := s.data[len(s.data)-1] 27 | var empty E 28 | s.data[len(s.data)-1] = empty 29 | s.data = s.data[:len(s.data)-1] 30 | return value 31 | } 32 | 33 | func (s *stack[E]) peek() E { 34 | return s.data[len(s.data)-1] 35 | } 36 | 37 | type queue[E any] struct { 38 | data []E 39 | head, tail int 40 | } 41 | 42 | func (f *queue[E]) add(value E) { 43 | if f.len()+1 >= len(f.data) { 44 | f.grow() 45 | } 46 | f.data[f.tail] = value 47 | if f.tail++; f.tail == len(f.data) { 48 | f.tail = 0 49 | } 50 | } 51 | 52 | func (f *queue[E]) remove() E { 53 | value := f.data[f.head] 54 | var empty E 55 | f.data[f.head] = empty 56 | if f.head++; f.head == len(f.data) { 57 | f.head = 0 58 | } 59 | return value 60 | } 61 | 62 | func (f *queue[E]) len() int { 63 | if f.tail >= f.head { 64 | return f.tail - f.head 65 | } 66 | return len(f.data) - f.head + f.tail 67 | } 68 | 69 | func (f *queue[E]) grow() { 70 | c := len(f.data) * 2 71 | if c == 0 { 72 | c = 4 73 | } 74 | buf, count := make([]E, c), f.len() 75 | if f.tail >= f.head { 76 | copy(buf[:count], f.data[f.head:f.tail]) 77 | } else { 78 | hindex := len(f.data) - f.head 79 | copy(buf[:hindex], f.data[f.head:]) 80 | copy(buf[hindex:count], f.data[:f.tail]) 81 | } 82 | f.data, f.head, f.tail = buf, 0, count 83 | } 84 | 85 | // xmlReader provides the interface by which an XML byte stream is 86 | // processed and decoded. 87 | type xmlReader interface { 88 | Bytes() int64 89 | Read(p []byte) (n int, err error) 90 | } 91 | 92 | // xmlSimpleReader implements a proxy reader that counts the number of 93 | // bytes read from its encapsulated reader. 94 | type xmlSimpleReader struct { 95 | r io.Reader 96 | bytes int64 97 | } 98 | 99 | func newXmlSimpleReader(r io.Reader) xmlReader { 100 | return &xmlSimpleReader{r, 0} 101 | } 102 | 103 | func (xr *xmlSimpleReader) Bytes() int64 { 104 | return xr.bytes 105 | } 106 | 107 | func (xr *xmlSimpleReader) Read(p []byte) (n int, err error) { 108 | n, err = xr.r.Read(p) 109 | xr.bytes += int64(n) 110 | return n, err 111 | } 112 | 113 | // xmlPeekReader implements a proxy reader that counts the number of 114 | // bytes read from its encapsulated reader. It also allows the caller to 115 | // "peek" at the previous portions of the buffer after they have been 116 | // parsed. 117 | type xmlPeekReader struct { 118 | r io.Reader 119 | bytes int64 // total bytes read by the Read function 120 | buf []byte // internal read buffer 121 | bufSize int // total bytes used in the read buffer 122 | bufOffset int64 // total bytes read when buf was last filled 123 | window []byte // current read buffer window 124 | peekBuf []byte // buffer used to store data to be peeked at later 125 | peekOffset int64 // total read offset of the start of the peek buffer 126 | } 127 | 128 | func newXmlPeekReader(r io.Reader) *xmlPeekReader { 129 | buf := make([]byte, 4096) 130 | return &xmlPeekReader{ 131 | r: r, 132 | bytes: 0, 133 | buf: buf, 134 | bufSize: 0, 135 | bufOffset: 0, 136 | window: buf[0:0], 137 | peekBuf: make([]byte, 0), 138 | peekOffset: -1, 139 | } 140 | } 141 | 142 | func (xr *xmlPeekReader) Bytes() int64 { 143 | return xr.bytes 144 | } 145 | 146 | func (xr *xmlPeekReader) Read(p []byte) (n int, err error) { 147 | if len(xr.window) == 0 { 148 | err = xr.fill() 149 | if err != nil { 150 | return 0, err 151 | } 152 | if len(xr.window) == 0 { 153 | return 0, nil 154 | } 155 | } 156 | 157 | if len(xr.window) < len(p) { 158 | n = len(xr.window) 159 | } else { 160 | n = len(p) 161 | } 162 | 163 | copy(p, xr.window) 164 | xr.window = xr.window[n:] 165 | xr.bytes += int64(n) 166 | 167 | return n, err 168 | } 169 | 170 | func (xr *xmlPeekReader) PeekPrepare(offset int64, maxLen int) { 171 | if maxLen > cap(xr.peekBuf) { 172 | xr.peekBuf = make([]byte, 0, maxLen) 173 | } 174 | xr.peekBuf = xr.peekBuf[0:0] 175 | xr.peekOffset = offset 176 | xr.updatePeekBuf() 177 | } 178 | 179 | func (xr *xmlPeekReader) PeekFinalize() []byte { 180 | xr.updatePeekBuf() 181 | return xr.peekBuf 182 | } 183 | 184 | func (xr *xmlPeekReader) fill() error { 185 | xr.bufOffset = xr.bytes 186 | xr.bufSize = 0 187 | n, err := xr.r.Read(xr.buf) 188 | if err != nil { 189 | xr.window, xr.bufSize = xr.buf[0:0], 0 190 | return err 191 | } 192 | xr.window, xr.bufSize = xr.buf[:n], n 193 | xr.updatePeekBuf() 194 | return nil 195 | } 196 | 197 | func (xr *xmlPeekReader) updatePeekBuf() { 198 | peekRemain := cap(xr.peekBuf) - len(xr.peekBuf) 199 | if xr.peekOffset >= 0 && peekRemain > 0 { 200 | rangeMin := xr.peekOffset 201 | rangeMax := xr.peekOffset + int64(cap(xr.peekBuf)) 202 | bufMin := xr.bufOffset 203 | bufMax := xr.bufOffset + int64(xr.bufSize) 204 | if rangeMin < bufMin { 205 | rangeMin = bufMin 206 | } 207 | if rangeMax > bufMax { 208 | rangeMax = bufMax 209 | } 210 | if rangeMax > rangeMin { 211 | rangeMin -= xr.bufOffset 212 | rangeMax -= xr.bufOffset 213 | if int(rangeMax-rangeMin) > peekRemain { 214 | rangeMax = rangeMin + int64(peekRemain) 215 | } 216 | xr.peekBuf = append(xr.peekBuf, xr.buf[rangeMin:rangeMax]...) 217 | } 218 | } 219 | } 220 | 221 | // xmlWriter implements a proxy writer that counts the number of 222 | // bytes written by its encapsulated writer. 223 | type xmlWriter struct { 224 | w io.Writer 225 | bytes int64 226 | } 227 | 228 | func newXmlWriter(w io.Writer) *xmlWriter { 229 | return &xmlWriter{w: w} 230 | } 231 | 232 | func (xw *xmlWriter) Write(p []byte) (n int, err error) { 233 | n, err = xw.w.Write(p) 234 | xw.bytes += int64(n) 235 | return n, err 236 | } 237 | 238 | // isWhitespace returns true if the byte slice contains only 239 | // whitespace characters. 240 | func isWhitespace(s string) bool { 241 | for i := 0; i < len(s); i++ { 242 | if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' { 243 | return false 244 | } 245 | } 246 | return true 247 | } 248 | 249 | // spaceMatch returns true if namespace a is the empty string 250 | // or if namespace a equals namespace b. 251 | func spaceMatch(a, b string) bool { 252 | switch { 253 | case a == "": 254 | return true 255 | default: 256 | return a == b 257 | } 258 | } 259 | 260 | // spaceDecompose breaks a namespace:tag identifier at the ':' 261 | // and returns the two parts. 262 | func spaceDecompose(str string) (space, key string) { 263 | colon := strings.IndexByte(str, ':') 264 | if colon == -1 { 265 | return "", str 266 | } 267 | return str[:colon], str[colon+1:] 268 | } 269 | 270 | // Strings used by indentCRLF and indentLF 271 | const ( 272 | indentSpaces = "\r\n " 273 | indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" 274 | ) 275 | 276 | // indentCRLF returns a CRLF newline followed by n copies of the first 277 | // non-CRLF character in the source string. 278 | func indentCRLF(n int, source string) string { 279 | switch { 280 | case n < 0: 281 | return source[:2] 282 | case n < len(source)-1: 283 | return source[:n+2] 284 | default: 285 | return source + strings.Repeat(source[2:3], n-len(source)+2) 286 | } 287 | } 288 | 289 | // indentLF returns a LF newline followed by n copies of the first non-LF 290 | // character in the source string. 291 | func indentLF(n int, source string) string { 292 | switch { 293 | case n < 0: 294 | return source[1:2] 295 | case n < len(source)-1: 296 | return source[1 : n+2] 297 | default: 298 | return source[1:] + strings.Repeat(source[2:3], n-len(source)+2) 299 | } 300 | } 301 | 302 | // nextIndex returns the index of the next occurrence of byte ch in s, 303 | // starting from offset. It returns -1 if the byte is not found. 304 | func nextIndex(s string, ch byte, offset int) int { 305 | switch i := strings.IndexByte(s[offset:], ch); i { 306 | case -1: 307 | return -1 308 | default: 309 | return offset + i 310 | } 311 | } 312 | 313 | // isInteger returns true if the string s contains an integer. 314 | func isInteger(s string) bool { 315 | for i := 0; i < len(s); i++ { 316 | if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') { 317 | return false 318 | } 319 | } 320 | return true 321 | } 322 | 323 | type escapeMode byte 324 | 325 | const ( 326 | escapeNormal escapeMode = iota 327 | escapeCanonicalText 328 | escapeCanonicalAttr 329 | ) 330 | 331 | // escapeString writes an escaped version of a string to the writer. 332 | func escapeString(w Writer, s string, m escapeMode) { 333 | var esc []byte 334 | last := 0 335 | for i := 0; i < len(s); { 336 | r, width := utf8.DecodeRuneInString(s[i:]) 337 | i += width 338 | switch r { 339 | case '&': 340 | esc = []byte("&") 341 | case '<': 342 | esc = []byte("<") 343 | case '>': 344 | if m == escapeCanonicalAttr { 345 | continue 346 | } 347 | esc = []byte(">") 348 | case '\'': 349 | if m != escapeNormal { 350 | continue 351 | } 352 | esc = []byte("'") 353 | case '"': 354 | if m == escapeCanonicalText { 355 | continue 356 | } 357 | esc = []byte(""") 358 | case '\t': 359 | if m != escapeCanonicalAttr { 360 | continue 361 | } 362 | esc = []byte(" ") 363 | case '\n': 364 | if m != escapeCanonicalAttr { 365 | continue 366 | } 367 | esc = []byte(" ") 368 | case '\r': 369 | if m == escapeNormal { 370 | continue 371 | } 372 | esc = []byte(" ") 373 | default: 374 | if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { 375 | esc = []byte("\uFFFD") 376 | break 377 | } 378 | continue 379 | } 380 | w.WriteString(s[last : i-width]) 381 | w.Write(esc) 382 | last = i 383 | } 384 | w.WriteString(s[last:]) 385 | } 386 | 387 | func isInCharacterRange(r rune) bool { 388 | return r == 0x09 || 389 | r == 0x0A || 390 | r == 0x0D || 391 | r >= 0x20 && r <= 0xD7FF || 392 | r >= 0xE000 && r <= 0xFFFD || 393 | r >= 0x10000 && r <= 0x10FFFF 394 | } 395 | -------------------------------------------------------------------------------- /path.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Brett Vickers. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package etree 6 | 7 | import ( 8 | "strconv" 9 | "strings" 10 | ) 11 | 12 | /* 13 | A Path is a string that represents a search path through an etree starting 14 | from the document root or an arbitrary element. Paths are used with the 15 | Element object's Find* methods to locate and return desired elements. 16 | 17 | A Path consists of a series of slash-separated "selectors", each of which may 18 | be modified by one or more bracket-enclosed "filters". Selectors are used to 19 | traverse the etree from element to element, while filters are used to narrow 20 | the list of candidate elements at each node. 21 | 22 | Although etree Path strings are structurally and behaviorally similar to XPath 23 | strings (https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more 24 | limited set of selectors and filtering options. 25 | 26 | The following selectors are supported by etree paths: 27 | 28 | . Select the current element. 29 | .. Select the parent of the current element. 30 | * Select all child elements of the current element. 31 | / Select the root element when used at the start of a path. 32 | // Select all descendants of the current element. 33 | tag Select all child elements with a name matching the tag. 34 | 35 | The following basic filters are supported: 36 | 37 | [@attrib] Keep elements with an attribute named attrib. 38 | [@attrib='val'] Keep elements with an attribute named attrib and value matching val. 39 | [tag] Keep elements with a child element named tag. 40 | [tag='val'] Keep elements with a child element named tag and text matching val. 41 | [n] Keep the n-th element, where n is a numeric index starting from 1. 42 | 43 | The following function-based filters are supported: 44 | 45 | [text()] Keep elements with non-empty text. 46 | [text()='val'] Keep elements whose text matches val. 47 | [local-name()='val'] Keep elements whose un-prefixed tag matches val. 48 | [name()='val'] Keep elements whose full tag exactly matches val. 49 | [namespace-prefix()] Keep elements with non-empty namespace prefixes. 50 | [namespace-prefix()='val'] Keep elements whose namespace prefix matches val. 51 | [namespace-uri()] Keep elements with non-empty namespace URIs. 52 | [namespace-uri()='val'] Keep elements whose namespace URI matches val. 53 | 54 | Below are some examples of etree path strings. 55 | 56 | Select the bookstore child element of the root element: 57 | 58 | /bookstore 59 | 60 | Beginning from the root element, select the title elements of all descendant 61 | book elements having a 'category' attribute of 'WEB': 62 | 63 | //book[@category='WEB']/title 64 | 65 | Beginning from the current element, select the first descendant book element 66 | with a title child element containing the text 'Great Expectations': 67 | 68 | .//book[title='Great Expectations'][1] 69 | 70 | Beginning from the current element, select all child elements of book elements 71 | with an attribute 'language' set to 'english': 72 | 73 | ./book/*[@language='english'] 74 | 75 | Beginning from the current element, select all child elements of book elements 76 | containing the text 'special': 77 | 78 | ./book/*[text()='special'] 79 | 80 | Beginning from the current element, select all descendant book elements whose 81 | title child element has a 'language' attribute of 'french': 82 | 83 | .//book/title[@language='french']/.. 84 | 85 | Beginning from the current element, select all descendant book elements 86 | belonging to the http://www.w3.org/TR/html4/ namespace: 87 | 88 | .//book[namespace-uri()='http://www.w3.org/TR/html4/'] 89 | */ 90 | type Path struct { 91 | segments []segment 92 | } 93 | 94 | // ErrPath is returned by path functions when an invalid etree path is provided. 95 | type ErrPath string 96 | 97 | // Error returns the string describing a path error. 98 | func (err ErrPath) Error() string { 99 | return "etree: " + string(err) 100 | } 101 | 102 | // CompilePath creates an optimized version of an XPath-like string that 103 | // can be used to query elements in an element tree. 104 | func CompilePath(path string) (Path, error) { 105 | var comp compiler 106 | segments := comp.parsePath(path) 107 | if comp.err != ErrPath("") { 108 | return Path{nil}, comp.err 109 | } 110 | return Path{segments}, nil 111 | } 112 | 113 | // MustCompilePath creates an optimized version of an XPath-like string that 114 | // can be used to query elements in an element tree. Panics if an error 115 | // occurs. Use this function to create Paths when you know the path is 116 | // valid (i.e., if it's hard-coded). 117 | func MustCompilePath(path string) Path { 118 | p, err := CompilePath(path) 119 | if err != nil { 120 | panic(err) 121 | } 122 | return p 123 | } 124 | 125 | // A segment is a portion of a path between "/" characters. 126 | // It contains one selector and zero or more [filters]. 127 | type segment struct { 128 | sel selector 129 | filters []filter 130 | } 131 | 132 | func (seg *segment) apply(e *Element, p *pather) { 133 | seg.sel.apply(e, p) 134 | for _, f := range seg.filters { 135 | f.apply(p) 136 | } 137 | } 138 | 139 | // A selector selects XML elements for consideration by the 140 | // path traversal. 141 | type selector interface { 142 | apply(e *Element, p *pather) 143 | } 144 | 145 | // A filter pares down a list of candidate XML elements based 146 | // on a path filter in [brackets]. 147 | type filter interface { 148 | apply(p *pather) 149 | } 150 | 151 | // A pather is helper object that traverses an element tree using 152 | // a Path object. It collects and deduplicates all elements matching 153 | // the path query. 154 | type pather struct { 155 | queue queue[node] 156 | results []*Element 157 | inResults map[*Element]bool 158 | candidates []*Element 159 | scratch []*Element // used by filters 160 | } 161 | 162 | // A node represents an element and the remaining path segments that 163 | // should be applied against it by the pather. 164 | type node struct { 165 | e *Element 166 | segments []segment 167 | } 168 | 169 | func newPather() *pather { 170 | return &pather{ 171 | results: make([]*Element, 0), 172 | inResults: make(map[*Element]bool), 173 | candidates: make([]*Element, 0), 174 | scratch: make([]*Element, 0), 175 | } 176 | } 177 | 178 | // traverse follows the path from the element e, collecting 179 | // and then returning all elements that match the path's selectors 180 | // and filters. 181 | func (p *pather) traverse(e *Element, path Path) []*Element { 182 | for p.queue.add(node{e, path.segments}); p.queue.len() > 0; { 183 | p.eval(p.queue.remove()) 184 | } 185 | return p.results 186 | } 187 | 188 | // eval evaluates the current path node by applying the remaining 189 | // path's selector rules against the node's element. 190 | func (p *pather) eval(n node) { 191 | p.candidates = p.candidates[0:0] 192 | seg, remain := n.segments[0], n.segments[1:] 193 | seg.apply(n.e, p) 194 | 195 | if len(remain) == 0 { 196 | for _, c := range p.candidates { 197 | if in := p.inResults[c]; !in { 198 | p.inResults[c] = true 199 | p.results = append(p.results, c) 200 | } 201 | } 202 | } else { 203 | for _, c := range p.candidates { 204 | p.queue.add(node{c, remain}) 205 | } 206 | } 207 | } 208 | 209 | // A compiler generates a compiled path from a path string. 210 | type compiler struct { 211 | err ErrPath 212 | } 213 | 214 | // parsePath parses an XPath-like string describing a path 215 | // through an element tree and returns a slice of segment 216 | // descriptors. 217 | func (c *compiler) parsePath(path string) []segment { 218 | // If path ends with //, fix it 219 | if strings.HasSuffix(path, "//") { 220 | path += "*" 221 | } 222 | 223 | var segments []segment 224 | 225 | // Check for an absolute path 226 | if strings.HasPrefix(path, "/") { 227 | segments = append(segments, segment{new(selectRoot), []filter{}}) 228 | path = path[1:] 229 | } 230 | 231 | // Split path into segments 232 | for _, s := range splitPath(path) { 233 | segments = append(segments, c.parseSegment(s)) 234 | if c.err != ErrPath("") { 235 | break 236 | } 237 | } 238 | return segments 239 | } 240 | 241 | func splitPath(path string) []string { 242 | var pieces []string 243 | start := 0 244 | inquote := false 245 | var quote byte 246 | for i := 0; i+1 <= len(path); i++ { 247 | if !inquote { 248 | if path[i] == '\'' || path[i] == '"' { 249 | inquote, quote = true, path[i] 250 | } else if path[i] == '/' { 251 | pieces = append(pieces, path[start:i]) 252 | start = i + 1 253 | } 254 | } else if path[i] == quote { 255 | inquote = false 256 | } 257 | } 258 | return append(pieces, path[start:]) 259 | } 260 | 261 | // parseSegment parses a path segment between / characters. 262 | func (c *compiler) parseSegment(path string) segment { 263 | pieces := strings.Split(path, "[") 264 | seg := segment{ 265 | sel: c.parseSelector(pieces[0]), 266 | filters: []filter{}, 267 | } 268 | for i := 1; i < len(pieces); i++ { 269 | fpath := pieces[i] 270 | if len(fpath) == 0 || fpath[len(fpath)-1] != ']' { 271 | c.err = ErrPath("path has invalid filter [brackets].") 272 | break 273 | } 274 | seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1])) 275 | } 276 | return seg 277 | } 278 | 279 | // parseSelector parses a selector at the start of a path segment. 280 | func (c *compiler) parseSelector(path string) selector { 281 | switch path { 282 | case ".": 283 | return new(selectSelf) 284 | case "..": 285 | return new(selectParent) 286 | case "*": 287 | return new(selectChildren) 288 | case "": 289 | return new(selectDescendants) 290 | default: 291 | return newSelectChildrenByTag(path) 292 | } 293 | } 294 | 295 | var fnTable = map[string]func(e *Element) string{ 296 | "local-name": (*Element).name, 297 | "name": (*Element).FullTag, 298 | "namespace-prefix": (*Element).namespacePrefix, 299 | "namespace-uri": (*Element).NamespaceURI, 300 | "text": (*Element).Text, 301 | } 302 | 303 | // parseFilter parses a path filter contained within [brackets]. 304 | func (c *compiler) parseFilter(path string) filter { 305 | if len(path) == 0 { 306 | c.err = ErrPath("path contains an empty filter expression.") 307 | return nil 308 | } 309 | 310 | // Filter contains [@attr='val'], [@attr="val"], [fn()='val'], 311 | // [fn()="val"], [tag='val'] or [tag="val"]? 312 | eqindex := strings.IndexByte(path, '=') 313 | if eqindex >= 0 && eqindex+1 < len(path) { 314 | quote := path[eqindex+1] 315 | if quote == '\'' || quote == '"' { 316 | rindex := nextIndex(path, quote, eqindex+2) 317 | if rindex != len(path)-1 { 318 | c.err = ErrPath("path has mismatched filter quotes.") 319 | return nil 320 | } 321 | 322 | key := path[:eqindex] 323 | value := path[eqindex+2 : rindex] 324 | 325 | switch { 326 | case key[0] == '@': 327 | return newFilterAttrVal(key[1:], value) 328 | case strings.HasSuffix(key, "()"): 329 | name := key[:len(key)-2] 330 | if fn, ok := fnTable[name]; ok { 331 | return newFilterFuncVal(fn, value) 332 | } 333 | c.err = ErrPath("path has unknown function " + name) 334 | return nil 335 | default: 336 | return newFilterChildText(key, value) 337 | } 338 | } 339 | } 340 | 341 | // Filter contains [@attr], [N], [tag] or [fn()] 342 | switch { 343 | case path[0] == '@': 344 | return newFilterAttr(path[1:]) 345 | case strings.HasSuffix(path, "()"): 346 | name := path[:len(path)-2] 347 | if fn, ok := fnTable[name]; ok { 348 | return newFilterFunc(fn) 349 | } 350 | c.err = ErrPath("path has unknown function " + name) 351 | return nil 352 | case isInteger(path): 353 | pos, _ := strconv.Atoi(path) 354 | switch { 355 | case pos > 0: 356 | return newFilterPos(pos - 1) 357 | default: 358 | return newFilterPos(pos) 359 | } 360 | default: 361 | return newFilterChild(path) 362 | } 363 | } 364 | 365 | // selectSelf selects the current element into the candidate list. 366 | type selectSelf struct{} 367 | 368 | func (s *selectSelf) apply(e *Element, p *pather) { 369 | p.candidates = append(p.candidates, e) 370 | } 371 | 372 | // selectRoot selects the element's root node. 373 | type selectRoot struct{} 374 | 375 | func (s *selectRoot) apply(e *Element, p *pather) { 376 | root := e 377 | for root.parent != nil { 378 | root = root.parent 379 | } 380 | p.candidates = append(p.candidates, root) 381 | } 382 | 383 | // selectParent selects the element's parent into the candidate list. 384 | type selectParent struct{} 385 | 386 | func (s *selectParent) apply(e *Element, p *pather) { 387 | if e.parent != nil { 388 | p.candidates = append(p.candidates, e.parent) 389 | } 390 | } 391 | 392 | // selectChildren selects the element's child elements into the 393 | // candidate list. 394 | type selectChildren struct{} 395 | 396 | func (s *selectChildren) apply(e *Element, p *pather) { 397 | for _, c := range e.Child { 398 | if c, ok := c.(*Element); ok { 399 | p.candidates = append(p.candidates, c) 400 | } 401 | } 402 | } 403 | 404 | // selectDescendants selects all descendant child elements 405 | // of the element into the candidate list. 406 | type selectDescendants struct{} 407 | 408 | func (s *selectDescendants) apply(e *Element, p *pather) { 409 | var queue queue[*Element] 410 | for queue.add(e); queue.len() > 0; { 411 | e := queue.remove() 412 | p.candidates = append(p.candidates, e) 413 | for _, c := range e.Child { 414 | if c, ok := c.(*Element); ok { 415 | queue.add(c) 416 | } 417 | } 418 | } 419 | } 420 | 421 | // selectChildrenByTag selects into the candidate list all child 422 | // elements of the element having the specified tag. 423 | type selectChildrenByTag struct { 424 | space, tag string 425 | } 426 | 427 | func newSelectChildrenByTag(path string) *selectChildrenByTag { 428 | s, l := spaceDecompose(path) 429 | return &selectChildrenByTag{s, l} 430 | } 431 | 432 | func (s *selectChildrenByTag) apply(e *Element, p *pather) { 433 | for _, c := range e.Child { 434 | if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag { 435 | p.candidates = append(p.candidates, c) 436 | } 437 | } 438 | } 439 | 440 | // filterPos filters the candidate list, keeping only the 441 | // candidate at the specified index. 442 | type filterPos struct { 443 | index int 444 | } 445 | 446 | func newFilterPos(pos int) *filterPos { 447 | return &filterPos{pos} 448 | } 449 | 450 | func (f *filterPos) apply(p *pather) { 451 | if f.index >= 0 { 452 | if f.index < len(p.candidates) { 453 | p.scratch = append(p.scratch, p.candidates[f.index]) 454 | } 455 | } else { 456 | if -f.index <= len(p.candidates) { 457 | p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index]) 458 | } 459 | } 460 | p.candidates, p.scratch = p.scratch, p.candidates[0:0] 461 | } 462 | 463 | // filterAttr filters the candidate list for elements having 464 | // the specified attribute. 465 | type filterAttr struct { 466 | space, key string 467 | } 468 | 469 | func newFilterAttr(str string) *filterAttr { 470 | s, l := spaceDecompose(str) 471 | return &filterAttr{s, l} 472 | } 473 | 474 | func (f *filterAttr) apply(p *pather) { 475 | for _, c := range p.candidates { 476 | for _, a := range c.Attr { 477 | if spaceMatch(f.space, a.Space) && f.key == a.Key { 478 | p.scratch = append(p.scratch, c) 479 | break 480 | } 481 | } 482 | } 483 | p.candidates, p.scratch = p.scratch, p.candidates[0:0] 484 | } 485 | 486 | // filterAttrVal filters the candidate list for elements having 487 | // the specified attribute with the specified value. 488 | type filterAttrVal struct { 489 | space, key, val string 490 | } 491 | 492 | func newFilterAttrVal(str, value string) *filterAttrVal { 493 | s, l := spaceDecompose(str) 494 | return &filterAttrVal{s, l, value} 495 | } 496 | 497 | func (f *filterAttrVal) apply(p *pather) { 498 | for _, c := range p.candidates { 499 | for _, a := range c.Attr { 500 | if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value { 501 | p.scratch = append(p.scratch, c) 502 | break 503 | } 504 | } 505 | } 506 | p.candidates, p.scratch = p.scratch, p.candidates[0:0] 507 | } 508 | 509 | // filterFunc filters the candidate list for elements satisfying a custom 510 | // boolean function. 511 | type filterFunc struct { 512 | fn func(e *Element) string 513 | } 514 | 515 | func newFilterFunc(fn func(e *Element) string) *filterFunc { 516 | return &filterFunc{fn} 517 | } 518 | 519 | func (f *filterFunc) apply(p *pather) { 520 | for _, c := range p.candidates { 521 | if f.fn(c) != "" { 522 | p.scratch = append(p.scratch, c) 523 | } 524 | } 525 | p.candidates, p.scratch = p.scratch, p.candidates[0:0] 526 | } 527 | 528 | // filterFuncVal filters the candidate list for elements containing a value 529 | // matching the result of a custom function. 530 | type filterFuncVal struct { 531 | fn func(e *Element) string 532 | val string 533 | } 534 | 535 | func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal { 536 | return &filterFuncVal{fn, value} 537 | } 538 | 539 | func (f *filterFuncVal) apply(p *pather) { 540 | for _, c := range p.candidates { 541 | if f.fn(c) == f.val { 542 | p.scratch = append(p.scratch, c) 543 | } 544 | } 545 | p.candidates, p.scratch = p.scratch, p.candidates[0:0] 546 | } 547 | 548 | // filterChild filters the candidate list for elements having 549 | // a child element with the specified tag. 550 | type filterChild struct { 551 | space, tag string 552 | } 553 | 554 | func newFilterChild(str string) *filterChild { 555 | s, l := spaceDecompose(str) 556 | return &filterChild{s, l} 557 | } 558 | 559 | func (f *filterChild) apply(p *pather) { 560 | for _, c := range p.candidates { 561 | for _, cc := range c.Child { 562 | if cc, ok := cc.(*Element); ok && 563 | spaceMatch(f.space, cc.Space) && 564 | f.tag == cc.Tag { 565 | p.scratch = append(p.scratch, c) 566 | } 567 | } 568 | } 569 | p.candidates, p.scratch = p.scratch, p.candidates[0:0] 570 | } 571 | 572 | // filterChildText filters the candidate list for elements having 573 | // a child element with the specified tag and text. 574 | type filterChildText struct { 575 | space, tag, text string 576 | } 577 | 578 | func newFilterChildText(str, text string) *filterChildText { 579 | s, l := spaceDecompose(str) 580 | return &filterChildText{s, l, text} 581 | } 582 | 583 | func (f *filterChildText) apply(p *pather) { 584 | for _, c := range p.candidates { 585 | for _, cc := range c.Child { 586 | if cc, ok := cc.(*Element); ok && 587 | spaceMatch(f.space, cc.Space) && 588 | f.tag == cc.Tag && 589 | f.text == cc.Text() { 590 | p.scratch = append(p.scratch, c) 591 | } 592 | } 593 | } 594 | p.candidates, p.scratch = p.scratch, p.candidates[0:0] 595 | } 596 | -------------------------------------------------------------------------------- /path_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Brett Vickers. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package etree 6 | 7 | import "testing" 8 | 9 | var testXML = ` 10 | 11 | 12 | 13 | 14 | 15 | 16 | Everyday Italian 17 | Giada De Laurentiis 18 | 2005 19 | 30.00 20 | Clarkson Potter 21 | 22 | 23 | 24 | Harry Potter 25 | J K. Rowling 26 | 2005 27 | 29.99 28 | 29 | 30 | 31 | 32 | 33 | XQuery Kick Start 34 | James McGovern 35 | Per Bothner 36 | Kurt Cagle 37 | James Linn 38 | Vaidyanathan Nagarajan 39 | 2003 40 | 49.99 41 | 42 | 43 | 44 | 45 | 46 | 47 | Learning XML 48 | Erik T. Ray 49 | 2003 50 | 39.95 51 | 52 | 53 | 54 | ` 55 | 56 | type test struct { 57 | path string 58 | result interface{} 59 | } 60 | 61 | type errorResult string 62 | 63 | var tests = []test{ 64 | // basic queries 65 | {"./bookstore/book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, 66 | {"./bookstore/book/author", []string{"Giada De Laurentiis", "J K. Rowling", "James McGovern", "Per Bothner", "Kurt Cagle", "James Linn", "Vaidyanathan Nagarajan", "Erik T. Ray"}}, 67 | {"./bookstore/book/year", []string{"2005", "2005", "2003", "2003"}}, 68 | {"./bookstore/book/p:price", []string{"30.00", "29.99", "39.95"}}, 69 | {"./bookstore/book/isbn", nil}, 70 | 71 | // descendant queries 72 | {"//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, 73 | {"//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, 74 | {".//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, 75 | {".//bookstore//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, 76 | {".//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, 77 | {".//p:price/.", []string{"30.00", "29.99", "39.95"}}, 78 | {".//price", []string{"30.00", "29.99", "49.99", "39.95"}}, 79 | 80 | // positional queries 81 | {"./bookstore/book[1]/title", "Everyday Italian"}, 82 | {"./bookstore/book[4]/title", "Learning XML"}, 83 | {"./bookstore/book[5]/title", nil}, 84 | {"./bookstore/book[3]/author[0]", "James McGovern"}, 85 | {"./bookstore/book[3]/author[1]", "James McGovern"}, 86 | {"./bookstore/book[3]/author[3]/./.", "Kurt Cagle"}, 87 | {"./bookstore/book[3]/author[6]", nil}, 88 | {"./bookstore/book[-1]/title", "Learning XML"}, 89 | {"./bookstore/book[-4]/title", "Everyday Italian"}, 90 | {"./bookstore/book[-5]/title", nil}, 91 | 92 | // text function queries 93 | {"./bookstore/book[author='James McGovern']/title", "XQuery Kick Start"}, 94 | {"./bookstore/book[author='Per Bothner']/title", "XQuery Kick Start"}, 95 | {"./bookstore/book[author='Kurt Cagle']/title", "XQuery Kick Start"}, 96 | {"./bookstore/book[author='James Linn']/title", "XQuery Kick Start"}, 97 | {"./bookstore/book[author='Vaidyanathan Nagarajan']/title", "XQuery Kick Start"}, 98 | {"//book[p:price='29.99']/title", "Harry Potter"}, 99 | {"//book[price='29.99']/title", "Harry Potter"}, 100 | {"//book/price[text()='29.99']", "29.99"}, 101 | {"//book/author[text()='Kurt Cagle']", "Kurt Cagle"}, 102 | {"//book/editor[text()]", []string{"Clarkson Potter", "\n\t\t"}}, 103 | 104 | // namespace function queries 105 | {"//*[namespace-uri()]", []string{"30.00", "29.99", "39.95"}}, 106 | {"//*[namespace-uri()='urn:books-com:prices']", []string{"30.00", "29.99", "39.95"}}, 107 | {"//*[namespace-uri()='foo']", nil}, 108 | {"//*[namespace-prefix()]", []string{"30.00", "29.99", "39.95"}}, 109 | {"//*[namespace-prefix()='p']", []string{"30.00", "29.99", "39.95"}}, 110 | {"//*[name()='p:price']", []string{"30.00", "29.99", "39.95"}}, 111 | {"//*[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}}, 112 | {"//price[namespace-uri()='']", []string{"49.99"}}, 113 | {"//price[namespace-prefix()='']", []string{"49.99"}}, 114 | {"//price[name()='price']", []string{"49.99"}}, 115 | {"//price[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}}, 116 | 117 | // attribute queries 118 | {"./bookstore/book[@category='WEB']/title", []string{"XQuery Kick Start", "Learning XML"}}, 119 | {"./bookstore/book[@path='/books/xml']/title", []string{"Learning XML"}}, 120 | {"./bookstore/book[@category='COOKING']/title[@lang='en']", "Everyday Italian"}, 121 | {`./bookstore/book[@category="COOKING"]/title[@lang="en"]`, "Everyday Italian"}, 122 | {"./bookstore/book/title[@lang='en'][@sku='150']", "Harry Potter"}, 123 | {"./bookstore/book/title[@lang='fr']", nil}, 124 | {"//p:price[@p:tax='1.99']", []string{"29.99"}}, 125 | {"//p:price[@tax='1.99']", []string{"29.99"}}, 126 | {"//p:price[@p:tax]", []string{"29.99"}}, 127 | {"//p:price[@tax]", []string{"29.99"}}, 128 | 129 | // parent queries 130 | {"./bookstore/book[@category='COOKING']/title/../../book[4]/title", "Learning XML"}, 131 | 132 | // root queries 133 | {"/bookstore/book[1]/title", "Everyday Italian"}, 134 | {"/bookstore/book[4]/title", "Learning XML"}, 135 | {"/bookstore/book[5]/title", nil}, 136 | {"/bookstore/book[3]/author[0]", "James McGovern"}, 137 | {"/bookstore/book[3]/author[1]", "James McGovern"}, 138 | {"/bookstore/book[3]/author[3]/./.", "Kurt Cagle"}, 139 | {"/bookstore/book[3]/author[6]", nil}, 140 | {"/bookstore/book[-1]/title", "Learning XML"}, 141 | {"/bookstore/book[-4]/title", "Everyday Italian"}, 142 | {"/bookstore/book[-5]/title", nil}, 143 | 144 | // bad paths 145 | {"./bookstore/book[]", errorResult("etree: path contains an empty filter expression.")}, 146 | {"./bookstore/book[@category='WEB'", errorResult("etree: path has invalid filter [brackets].")}, 147 | {"./bookstore/book[@category='WEB]", errorResult("etree: path has mismatched filter quotes.")}, 148 | {`./bookstore/book[@category='WEB"]`, errorResult("etree: path has mismatched filter quotes.")}, 149 | {`./bookstore/book[@category="WEB']`, errorResult("etree: path has mismatched filter quotes.")}, 150 | {"./bookstore/book[author]a", errorResult("etree: path has invalid filter [brackets].")}, 151 | {"/][", errorResult("etree: path has invalid filter [brackets].")}, 152 | } 153 | 154 | func TestPath(t *testing.T) { 155 | doc := NewDocument() 156 | err := doc.ReadFromString(testXML) 157 | if err != nil { 158 | t.Error(err) 159 | } 160 | 161 | for _, test := range tests { 162 | path, err := CompilePath(test.path) 163 | if err != nil { 164 | if r, ok := test.result.(errorResult); !ok || err.Error() != string(r) { 165 | fail(t, test) 166 | } 167 | continue 168 | } 169 | 170 | // Test both FindElementsPath and FindElementPath 171 | element := doc.FindElementPath(path) 172 | elements := doc.FindElementsPath(path) 173 | 174 | switch s := test.result.(type) { 175 | case errorResult: 176 | fail(t, test) 177 | case nil: 178 | if element != nil || len(elements) != 0 { 179 | fail(t, test) 180 | } 181 | case string: 182 | if element == nil || element.Text() != s || 183 | len(elements) != 1 || elements[0].Text() != s { 184 | fail(t, test) 185 | } 186 | case []string: 187 | if element == nil || element.Text() != s[0] || len(elements) != len(s) { 188 | fail(t, test) 189 | continue 190 | } 191 | for i := 0; i < len(elements); i++ { 192 | if elements[i].Text() != s[i] { 193 | fail(t, test) 194 | break 195 | } 196 | } 197 | } 198 | 199 | } 200 | } 201 | 202 | func fail(t *testing.T, test test) { 203 | t.Helper() 204 | t.Errorf("etree: failed test '%s'\n", test.path) 205 | } 206 | 207 | func TestAbsolutePath(t *testing.T) { 208 | doc := NewDocument() 209 | err := doc.ReadFromString(testXML) 210 | if err != nil { 211 | t.Error(err) 212 | } 213 | 214 | elements := doc.FindElements("//book/author") 215 | for _, e := range elements { 216 | title := e.FindElement("/bookstore/book[1]/title") 217 | if title == nil || title.Text() != "Everyday Italian" { 218 | t.Errorf("etree: absolute path test failed") 219 | } 220 | 221 | title = e.FindElement("//book[p:price='29.99']/title") 222 | if title == nil || title.Text() != "Harry Potter" { 223 | t.Errorf("etree: absolute path test failed") 224 | } 225 | } 226 | } 227 | --------------------------------------------------------------------------------