The Iliad and The Odyssey

├── LICENSE ├── README.md ├── element.go ├── error.xml ├── go.mod ├── go.sum ├── query.go ├── sample.xml ├── xmlparser.go └── xmlparser_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, Tamer Gur 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the copyright holder nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xml stream parser 2 | 3 | xml-stream-parser is xml parser for GO. It is efficient to parse large xml data with streaming fashion. 4 | 5 | ## Usage 6 | 7 | ```xml 8 | 9 | 10 | 11 | The Iliad and The Odyssey 12 | 12.95 13 | 14 | Best translation I've read. 15 | I like other versions better. 16 | 17 | 18 | 19 | Anthology of World Literature 20 | 24.95 21 | 22 | Needs more modern literature. 23 | Excellent overview of world literature. 24 | 25 | 26 | 27 | Journal of XML parsing 28 | 1 29 | 30 | 31 | ``` 32 | 33 | **Stream** over books and journals 34 | 35 | ```go 36 | f, _ := os.Open("input.xml") 37 | br := bufio.NewReaderSize(f,65536) 38 | parser := xmlparser.NewXMLParser(br, "book", "journal") 39 | 40 | for xml := range parser.Stream() { 41 | fmt.Println(xml.Childs["title"][0].InnerText) 42 | if xml.Name == "book" { 43 | fmt.Println(xml.Childs["comments"][0].Childs["userComment"][0].Attrs["rating"]) 44 | fmt.Println(xml.Childs["comments"][0].Childs["userComment"][0].InnerText) 45 | } 46 | } 47 | ``` 48 | 49 | **Skip** tags for speed 50 | 51 | ```go 52 | parser := xmlparser.NewXMLParser(br, "book").SkipElements([]string{"price", "comments"}) 53 | ``` 54 | 55 | **Attributes** only 56 | 57 | ```go 58 | parser := xmlparser.NewXMLParser(br, "bookstore", "book").ParseAttributesOnly("bookstore") 59 | ``` 60 | 61 | **Error** handlings 62 | 63 | ```go 64 | for xml := range parser.Stream() { 65 | if xml.Err !=nil { 66 | // handle error 67 | } 68 | } 69 | ``` 70 | 71 | **Progress** of parsing 72 | 73 | ```go 74 | // total byte read to calculate the progress of parsing 75 | parser.TotalReadSize 76 | ``` 77 | 78 | **Xpath** query provides alternative to default fast access for different usecases 79 | ```go 80 | 81 | parser := xmlparser.NewXMLParser(bufreader, "bookstore").EnableXpath() 82 | 83 | for xml := range p.Stream() { 84 | // select books 85 | xml.SelectElements("//book") 86 | xml.SelectElements("./book") 87 | xml.SelectElements("book") 88 | // select titles 89 | xml.SelectElements("./book/title") 90 | // select book with price condition 91 | xml.SelectElements("//book[price>=20.95]")) 92 | //comments with rating 4 93 | xml.SelectElements("//book/comments/userComment[@rating='4']") 94 | } 95 | // for evaluate function or reuse existing xpath expression 96 | // sum of all the book price 97 | expr, err := p.CompileXpath("sum(//book/price)") 98 | price := expr.Evaluate(p.CreateXPathNavigator(xml)).(float64) 99 | 100 | ``` 101 | xpath functionality implemented via [xpath](https://github.com/antchfx/xpath) library check more 102 | examples in its documentation 103 | 104 | If you interested check also [json parser](https://github.com/tamerh/jsparser) which works similarly 105 | -------------------------------------------------------------------------------- /element.go: -------------------------------------------------------------------------------- 1 | package xmlparser 2 | 3 | type XMLElement struct { 4 | Name string 5 | Attrs map[string]string 6 | InnerText string 7 | Childs map[string][]XMLElement 8 | Err error 9 | // filled when xpath enabled 10 | childs []*XMLElement 11 | parent *XMLElement 12 | attrs []*xmlAttr 13 | localName string 14 | prefix string 15 | } 16 | 17 | type xmlAttr struct { 18 | name string 19 | value string 20 | } 21 | 22 | // SelectElements finds child elements with the specified xpath expression. 23 | func (n *XMLElement) SelectElements(exp string) ([]*XMLElement, error) { 24 | return find(n, exp) 25 | } 26 | 27 | // SelectElement finds child elements with the specified xpath expression. 28 | func (n *XMLElement) SelectElement(exp string) (*XMLElement, error) { 29 | return findOne(n, exp) 30 | } 31 | 32 | func (n *XMLElement) FirstChild() *XMLElement { 33 | if len(n.childs) > 0 { 34 | return n.childs[0] 35 | } 36 | return nil 37 | } 38 | 39 | func (n *XMLElement) LastChild() *XMLElement { 40 | if l := len(n.childs); l > 0 { 41 | return n.childs[l-1] 42 | } 43 | return nil 44 | } 45 | 46 | func (n *XMLElement) PrevSibling() *XMLElement { 47 | if n.parent != nil { 48 | for i, c := range n.parent.childs { 49 | if c == n { 50 | if i >= 0 { 51 | return n.parent.childs[i-1] 52 | } 53 | return nil 54 | } 55 | } 56 | } 57 | return nil 58 | } 59 | 60 | func (n *XMLElement) NextSibling() *XMLElement { 61 | if n.parent != nil { 62 | for i, c := range n.parent.childs { 63 | if c == n { 64 | if i+1 < len(n.parent.childs) { 65 | return n.parent.childs[i+1] 66 | } 67 | return nil 68 | } 69 | } 70 | } 71 | return nil 72 | } 73 | -------------------------------------------------------------------------------- /error.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | InnerText110 4 | InnerText111 5 | 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tamerh/xml-stream-parser 2 | 3 | go 1.12 4 | 5 | require github.com/tamerh/xpath v1.0.0 // indirect 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/tamerh/xpath v1.0.0 h1:NccMES/Ej8slPCFDff73Kf6V1xu9hdbuKf2RyDsxf5Q= 2 | github.com/tamerh/xpath v1.0.0/go.mod h1:t0wnh72FQlOVEO20f2Dl3EoVxso9GnLREh1WTpvNmJQ= 3 | -------------------------------------------------------------------------------- /query.go: -------------------------------------------------------------------------------- 1 | package xmlparser 2 | 3 | import ( 4 | "github.com/tamerh/xpath" 5 | ) 6 | 7 | // CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node. 8 | func (x *XMLParser) CreateXPathNavigator(top *XMLElement) *XmlNodeNavigator { 9 | return &XmlNodeNavigator{curr: top, root: top, attr: -1} 10 | } 11 | 12 | // Compile the given xpath expression 13 | func (x *XMLParser) CompileXpath(expr string) (*xpath.Expr, error) { 14 | 15 | exp, err := xpath.Compile(expr) 16 | if err != nil { 17 | return nil, err 18 | } 19 | return exp, nil 20 | 21 | } 22 | 23 | // CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node. 24 | func createXPathNavigator(top *XMLElement) *XmlNodeNavigator { 25 | return &XmlNodeNavigator{curr: top, root: top, attr: -1} 26 | } 27 | 28 | type XmlNodeNavigator struct { 29 | root, curr *XMLElement 30 | attr int 31 | } 32 | 33 | // Find searches the Node that matches by the specified XPath expr. 34 | func find(top *XMLElement, expr string) ([]*XMLElement, error) { 35 | exp, err := xpath.Compile(expr) 36 | if err != nil { 37 | return []*XMLElement{}, err 38 | } 39 | t := exp.Select(createXPathNavigator(top)) 40 | var elems []*XMLElement 41 | for t.MoveNext() { 42 | elems = append(elems, t.Current().(*XmlNodeNavigator).curr) 43 | } 44 | return elems, nil 45 | } 46 | 47 | // FindOne searches the Node that matches by the specified XPath expr, 48 | // and returns first element of matched. 49 | func findOne(top *XMLElement, expr string) (*XMLElement, error) { 50 | exp, err := xpath.Compile(expr) 51 | if err != nil { 52 | return nil, err 53 | } 54 | t := exp.Select(createXPathNavigator(top)) 55 | var elem *XMLElement 56 | if t.MoveNext() { 57 | elem = t.Current().(*XmlNodeNavigator).curr //getCurrentNode(t) 58 | } 59 | return elem, nil 60 | } 61 | 62 | func (x *XmlNodeNavigator) Current() *XMLElement { 63 | return x.curr 64 | } 65 | 66 | func (x *XmlNodeNavigator) NodeType() xpath.NodeType { 67 | 68 | if x.curr == x.root { 69 | return xpath.RootNode 70 | } 71 | if x.attr != -1 { 72 | return xpath.AttributeNode 73 | } 74 | return xpath.ElementNode 75 | } 76 | 77 | func (x *XmlNodeNavigator) LocalName() string { 78 | if x.attr != -1 { 79 | return x.curr.attrs[x.attr].name 80 | } 81 | 82 | return x.curr.localName 83 | 84 | } 85 | 86 | func (x *XmlNodeNavigator) Prefix() string { 87 | 88 | return x.curr.prefix 89 | 90 | } 91 | 92 | func (x *XmlNodeNavigator) Value() string { 93 | 94 | if x.attr != -1 { 95 | return x.curr.attrs[x.attr].value 96 | } 97 | return x.curr.InnerText 98 | 99 | } 100 | 101 | func (x *XmlNodeNavigator) Copy() xpath.NodeNavigator { 102 | n := *x 103 | return &n 104 | } 105 | 106 | func (x *XmlNodeNavigator) MoveToRoot() { 107 | x.curr = x.root 108 | } 109 | 110 | func (x *XmlNodeNavigator) MoveToParent() bool { 111 | if x.attr != -1 { 112 | x.attr = -1 113 | return true 114 | } else if node := x.curr.parent; node != nil { 115 | x.curr = node 116 | return true 117 | } 118 | return false 119 | } 120 | 121 | func (x *XmlNodeNavigator) MoveToNextAttribute() bool { 122 | if x.attr >= len(x.curr.attrs)-1 { 123 | return false 124 | } 125 | x.attr++ 126 | return true 127 | } 128 | 129 | func (x *XmlNodeNavigator) MoveToChild() bool { 130 | if node := x.curr.FirstChild(); node != nil { 131 | x.curr = node 132 | return true 133 | } 134 | return false 135 | } 136 | 137 | func (x *XmlNodeNavigator) MoveToFirst() bool { 138 | if x.curr.parent != nil { 139 | node := x.curr.parent.FirstChild() 140 | if node != nil { 141 | x.curr = node 142 | return true 143 | } 144 | } 145 | return false 146 | } 147 | 148 | func (x *XmlNodeNavigator) MoveToPrevious() bool { 149 | node := x.curr.PrevSibling() 150 | if node != nil { 151 | x.curr = node 152 | return true 153 | } 154 | return false 155 | } 156 | 157 | func (x *XmlNodeNavigator) MoveToNext() bool { 158 | node := x.curr.NextSibling() 159 | if node != nil { 160 | x.curr = node 161 | return true 162 | } 163 | return false 164 | } 165 | 166 | func (x *XmlNodeNavigator) String() string { 167 | return x.Value() 168 | } 169 | 170 | func (x *XmlNodeNavigator) MoveTo(other xpath.NodeNavigator) bool { 171 | node, ok := other.(*XmlNodeNavigator) 172 | if !ok || node.root != x.root { 173 | return false 174 | } 175 | 176 | x.curr = node.curr 177 | x.attr = node.attr 178 | return true 179 | } 180 | -------------------------------------------------------------------------------- /sample.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | ]> 9 | 10 | 11 | 12 | Hello 13 | InnerText111 14 | 15 | InnerText13 16 | 17 | 18 | 19 | 20 | 21 | InnerText2 22 | 24 | InnerText213 25 | 26 | 27 | 28 | 29 | tag31 30 | tag32 31 | 32 | SkipTag 33 | 34 | 35 | InnerText0 36 | 37 | 38 | SkipTag 39 | 40 | 41 | -------------------------------------------------------------------------------- /xmlparser.go: -------------------------------------------------------------------------------- 1 | package xmlparser 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "strings" 7 | "unicode/utf8" 8 | ) 9 | 10 | type XMLParser struct { 11 | reader *bufio.Reader 12 | loopElements map[string]bool 13 | resultChannel chan *XMLElement 14 | skipElements map[string]bool 15 | attrOnlyElements map[string]bool 16 | skipOuterElements bool 17 | xpathEnabled bool 18 | scratch *scratch 19 | scratch2 *scratch 20 | TotalReadSize uint64 21 | } 22 | 23 | func NewXMLParser(reader *bufio.Reader, loopElements ...string) *XMLParser { 24 | 25 | x := &XMLParser{ 26 | reader: reader, 27 | loopElements: map[string]bool{}, 28 | attrOnlyElements: map[string]bool{}, 29 | resultChannel: make(chan *XMLElement, 256), 30 | skipElements: map[string]bool{}, 31 | scratch: &scratch{data: make([]byte, 1024)}, 32 | scratch2: &scratch{data: make([]byte, 1024)}, 33 | } 34 | 35 | // Register loop elements 36 | for _, e := range loopElements { 37 | x.loopElements[e] = true 38 | } 39 | 40 | return x 41 | } 42 | 43 | func (x *XMLParser) SkipElements(skipElements []string) *XMLParser { 44 | 45 | if len(skipElements) > 0 { 46 | for _, s := range skipElements { 47 | x.skipElements[s] = true 48 | } 49 | } 50 | return x 51 | 52 | } 53 | 54 | func (x *XMLParser) ParseAttributesOnly(loopElements ...string) *XMLParser { 55 | for _, e := range loopElements { 56 | x.attrOnlyElements[e] = true 57 | } 58 | return x 59 | } 60 | 61 | // by default skip elements works for stream elements childs 62 | // if this method called parser skip also outer elements 63 | func (x *XMLParser) SkipOuterElements() *XMLParser { 64 | 65 | x.skipOuterElements = true 66 | return x 67 | 68 | } 69 | 70 | func (x *XMLParser) EnableXpath() *XMLParser { 71 | 72 | x.xpathEnabled = true 73 | return x 74 | 75 | } 76 | 77 | func (x *XMLParser) Stream() chan *XMLElement { 78 | 79 | go x.parse() 80 | 81 | return x.resultChannel 82 | 83 | } 84 | 85 | func (x *XMLParser) parse() { 86 | 87 | defer close(x.resultChannel) 88 | var element *XMLElement 89 | var tagClosed bool 90 | var err error 91 | var b byte 92 | var iscomment bool 93 | 94 | err = x.skipDeclerations() 95 | 96 | if err != nil { 97 | x.sendError() 98 | return 99 | } 100 | 101 | for { 102 | b, err = x.readByte() 103 | 104 | if err != nil { 105 | return 106 | } 107 | 108 | if x.isWS(b) { 109 | continue 110 | } 111 | 112 | if b == '<' { 113 | 114 | iscdata, _, err := x.isCDATA() 115 | 116 | if err != nil { 117 | x.sendError() 118 | return 119 | } 120 | if iscdata { 121 | continue 122 | } 123 | 124 | iscomment, err = x.isComment() 125 | 126 | if err != nil { 127 | x.sendError() 128 | return 129 | } 130 | 131 | if iscomment { 132 | continue 133 | } 134 | 135 | element, tagClosed, err = x.startElement() 136 | 137 | if err != nil { 138 | x.sendError() 139 | return 140 | } 141 | 142 | if _, found := x.loopElements[element.Name]; found { 143 | if tagClosed { 144 | x.resultChannel <- element 145 | continue 146 | } 147 | 148 | if _, ok := x.attrOnlyElements[element.Name]; !ok { 149 | element = x.getElementTree(element) 150 | } 151 | x.resultChannel <- element 152 | if element.Err != nil { 153 | return 154 | } 155 | } else if x.skipOuterElements { 156 | 157 | if _, ok := x.skipElements[element.Name]; ok && !tagClosed { 158 | 159 | err = x.skipElement(element.Name) 160 | if err != nil { 161 | x.sendError() 162 | return 163 | } 164 | continue 165 | 166 | } 167 | 168 | } 169 | 170 | } 171 | } 172 | 173 | } 174 | 175 | func (x *XMLParser) getElementTree(result *XMLElement) *XMLElement { 176 | 177 | if result.Err != nil { 178 | return result 179 | } 180 | 181 | var cur byte 182 | var next byte 183 | var err error 184 | var element *XMLElement 185 | var tagClosed bool 186 | x.scratch2.reset() // this hold the inner text 187 | var iscomment bool 188 | 189 | for { 190 | 191 | cur, err = x.readByte() 192 | 193 | if err != nil { 194 | result.Err = err 195 | return result 196 | } 197 | 198 | if cur == '<' { 199 | 200 | iscdata, cddata, err := x.isCDATA() 201 | 202 | if err != nil { 203 | result.Err = err 204 | return result 205 | } 206 | if iscdata { 207 | for _, cd := range cddata { 208 | x.scratch2.add(cd) 209 | } 210 | continue 211 | } 212 | 213 | iscomment, err = x.isComment() 214 | 215 | if err != nil { 216 | result.Err = err 217 | return result 218 | } 219 | 220 | if iscomment { 221 | continue 222 | } 223 | 224 | next, err = x.readByte() 225 | 226 | if err != nil { 227 | result.Err = err 228 | return result 229 | } 230 | 231 | if next == '/' { // close tag 232 | tag, err := x.closeTagName() 233 | 234 | if err != nil { 235 | result.Err = err 236 | return result 237 | } 238 | 239 | if tag == result.Name { 240 | if len(result.Childs) == 0 { 241 | result.InnerText = string(x.scratch2.bytes()) 242 | } 243 | return result 244 | } 245 | } else { 246 | x.unreadByte() 247 | } 248 | 249 | element, tagClosed, err = x.startElement() 250 | 251 | if err != nil { 252 | result.Err = err 253 | return result 254 | } 255 | 256 | if _, ok := x.skipElements[element.Name]; ok && !tagClosed { 257 | err = x.skipElement(element.Name) 258 | if err != nil { 259 | result.Err = err 260 | return result 261 | } 262 | continue 263 | } 264 | if !tagClosed { 265 | element = x.getElementTree(element) 266 | } 267 | 268 | if x.xpathEnabled { 269 | element.parent = result 270 | } 271 | 272 | if _, ok := result.Childs[element.Name]; ok { 273 | result.Childs[element.Name] = append(result.Childs[element.Name], *element) 274 | if x.xpathEnabled { 275 | result.childs = append(result.childs, element) 276 | } 277 | } else { 278 | var childs []XMLElement 279 | childs = append(childs, *element) 280 | if result.Childs == nil { 281 | result.Childs = map[string][]XMLElement{} 282 | } 283 | result.Childs[element.Name] = childs 284 | 285 | if x.xpathEnabled { 286 | result.childs = append(result.childs, element) 287 | } 288 | 289 | } 290 | 291 | } else { 292 | x.scratch2.add(cur) 293 | } 294 | 295 | } 296 | } 297 | 298 | func (x *XMLParser) skipElement(elname string) error { 299 | 300 | var c byte 301 | var next byte 302 | var err error 303 | var curname string 304 | for { 305 | 306 | c, err = x.readByte() 307 | 308 | if err != nil { 309 | return err 310 | } 311 | if c == '<' { 312 | 313 | next, err = x.readByte() 314 | 315 | if err != nil { 316 | return err 317 | } 318 | 319 | if next == '/' { 320 | curname, err = x.closeTagName() 321 | if err != nil { 322 | return err 323 | } 324 | if curname == elname { 325 | return nil 326 | } 327 | } 328 | 329 | } 330 | 331 | } 332 | } 333 | 334 | func (x *XMLParser) startElement() (*XMLElement, bool, error) { 335 | 336 | x.scratch.reset() 337 | 338 | var cur byte 339 | var prev byte 340 | var err error 341 | var result = &XMLElement{} 342 | // a tag have 3 forms * ** *** 343 | var attr string 344 | var attrVal string 345 | for { 346 | 347 | cur, err = x.readByte() 348 | 349 | if err != nil { 350 | return nil, false, x.defaultError() 351 | } 352 | 353 | if x.isWS(cur) { 354 | result.Name = string(x.scratch.bytes()) 355 | 356 | if x.xpathEnabled { 357 | names := strings.Split(result.Name, ":") 358 | if len(names) > 1 { 359 | result.prefix = names[0] 360 | result.localName = names[1] 361 | } else { 362 | result.localName = names[0] 363 | } 364 | } 365 | 366 | x.scratch.reset() 367 | goto search_close_tag 368 | } 369 | 370 | if cur == '>' { 371 | if prev == '/' { 372 | result.Name = string(x.scratch.bytes()[:len(x.scratch.bytes())-1]) 373 | 374 | if x.xpathEnabled { 375 | names := strings.Split(result.Name, ":") 376 | if len(names) > 1 { 377 | result.prefix = names[0] 378 | result.localName = names[1] 379 | } else { 380 | result.localName = names[0] 381 | } 382 | } 383 | 384 | return result, true, nil 385 | } 386 | result.Name = string(x.scratch.bytes()) 387 | 388 | if x.xpathEnabled { 389 | names := strings.Split(result.Name, ":") 390 | if len(names) > 1 { 391 | result.prefix = names[0] 392 | result.localName = names[1] 393 | } else { 394 | result.localName = names[0] 395 | } 396 | } 397 | 398 | return result, false, nil 399 | } 400 | x.scratch.add(cur) 401 | prev = cur 402 | } 403 | 404 | search_close_tag: 405 | for { 406 | 407 | cur, err = x.readByte() 408 | 409 | if err != nil { 410 | return nil, false, x.defaultError() 411 | } 412 | 413 | if x.isWS(cur) { 414 | continue 415 | } 416 | 417 | if cur == '=' { 418 | if result.Attrs == nil { 419 | result.Attrs = map[string]string{} 420 | } 421 | 422 | cur, err = x.readByte() 423 | 424 | if err != nil { 425 | return nil, false, x.defaultError() 426 | } 427 | 428 | for x.isWS(cur) { 429 | cur, err = x.readByte() 430 | if err != nil { 431 | return nil, false, x.defaultError() 432 | } 433 | } 434 | 435 | if !(cur == '"' || cur == '\'') { 436 | return nil, false, x.defaultError() 437 | } 438 | 439 | attr = string(x.scratch.bytes()) 440 | attrVal, err = x.string(cur) 441 | if err != nil { 442 | return nil, false, x.defaultError() 443 | } 444 | result.Attrs[attr] = attrVal 445 | if x.xpathEnabled { 446 | result.attrs = append(result.attrs, &xmlAttr{name: attr, value: attrVal}) 447 | } 448 | x.scratch.reset() 449 | continue 450 | } 451 | 452 | if cur == '>' { //if tag name not found 453 | if prev == '/' { //tag special close 454 | return result, true, nil 455 | } 456 | return result, false, nil 457 | } 458 | 459 | x.scratch.add(cur) 460 | prev = cur 461 | 462 | } 463 | 464 | } 465 | 466 | func (x *XMLParser) isComment() (bool, error) { 467 | 468 | var c byte 469 | var err error 470 | 471 | c, err = x.readByte() 472 | 473 | if err != nil { 474 | return false, err 475 | } 476 | 477 | if c != '!' { 478 | x.unreadByte() 479 | return false, nil 480 | } 481 | 482 | var d, e byte 483 | 484 | d, err = x.readByte() 485 | 486 | if err != nil { 487 | return false, err 488 | } 489 | 490 | e, err = x.readByte() 491 | 492 | if err != nil { 493 | return false, err 494 | } 495 | 496 | if d != '-' || e != '-' { 497 | err = x.defaultError() 498 | return false, err 499 | } 500 | 501 | // skip part 502 | x.scratch.reset() 503 | for { 504 | 505 | c, err = x.readByte() 506 | 507 | if err != nil { 508 | return false, err 509 | } 510 | 511 | if c == '>' && len(x.scratch.bytes()) > 1 && x.scratch.bytes()[len(x.scratch.bytes())-1] == '-' && x.scratch.bytes()[len(x.scratch.bytes())-2] == '-' { 512 | return true, nil 513 | } 514 | 515 | x.scratch.add(c) 516 | 517 | } 518 | 519 | } 520 | 521 | func (x *XMLParser) isCDATA() (bool, []byte, error) { 522 | 523 | var c byte 524 | var b []byte 525 | var err error 526 | 527 | b, err = x.reader.Peek(2) 528 | 529 | if err != nil { 530 | return false, nil, err 531 | } 532 | 533 | if b[0] != '!' { 534 | return false, nil, nil 535 | } 536 | 537 | if err != nil { 538 | return false, nil, err 539 | } 540 | 541 | if b[1] != '[' { 542 | // this means this is not CDDATA either comment or or invalid xml which will be check during isComment 543 | return false, nil, nil 544 | } 545 | 546 | // read peaked byte 547 | _, err = x.readByte() 548 | 549 | if err != nil { 550 | return false, nil, err 551 | } 552 | 553 | _, err = x.readByte() 554 | 555 | if err != nil { 556 | return false, nil, err 557 | } 558 | 559 | c, err = x.readByte() 560 | 561 | if err != nil { 562 | return false, nil, err 563 | } 564 | 565 | if c != 'C' { 566 | err = x.defaultError() 567 | return false, nil, err 568 | } 569 | 570 | c, err = x.readByte() 571 | 572 | if err != nil { 573 | return false, nil, err 574 | } 575 | 576 | if c != 'D' { 577 | err = x.defaultError() 578 | return false, nil, err 579 | } 580 | 581 | c, err = x.readByte() 582 | 583 | if err != nil { 584 | return false, nil, err 585 | } 586 | 587 | if c != 'A' { 588 | err = x.defaultError() 589 | return false, nil, err 590 | } 591 | 592 | c, err = x.readByte() 593 | 594 | if err != nil { 595 | return false, nil, err 596 | } 597 | 598 | if c != 'T' { 599 | err = x.defaultError() 600 | return false, nil, err 601 | } 602 | 603 | c, err = x.readByte() 604 | 605 | if err != nil { 606 | return false, nil, err 607 | } 608 | 609 | if c != 'A' { 610 | err = x.defaultError() 611 | return false, nil, err 612 | } 613 | 614 | c, err = x.readByte() 615 | 616 | if err != nil { 617 | return false, nil, err 618 | } 619 | 620 | if c != '[' { 621 | err = x.defaultError() 622 | return false, nil, err 623 | } 624 | 625 | // this is possibly cdata // ]]> 626 | x.scratch.reset() 627 | for { 628 | 629 | c, err = x.readByte() 630 | 631 | if err != nil { 632 | return false, nil, err 633 | } 634 | 635 | if c == '>' && len(x.scratch.bytes()) > 1 && x.scratch.bytes()[len(x.scratch.bytes())-1] == ']' && x.scratch.bytes()[len(x.scratch.bytes())-2] == ']' { 636 | return true, x.scratch.bytes()[:len(x.scratch.bytes())-2], nil 637 | } 638 | 639 | x.scratch.add(c) 640 | 641 | } 642 | 643 | } 644 | 645 | func (x *XMLParser) skipDeclerations() error { 646 | 647 | var a, b []byte 648 | var c, d byte 649 | var err error 650 | 651 | scan_declartions: 652 | for { 653 | 654 | // when identifying a xml declaration we need to know 2 bytes ahead. Unread works 1 byte at a time so we use Peek and read together. 655 | a, err = x.reader.Peek(1) 656 | 657 | if err != nil { 658 | return err 659 | } 660 | 661 | if a[0] == '<' { 662 | 663 | b, err = x.reader.Peek(2) 664 | 665 | if err != nil { 666 | return err 667 | } 668 | 669 | if b[1] == '!' || b[1] == '?' { // either comment or decleration 670 | 671 | // read 2 peaked byte 672 | _, err = x.readByte() 673 | 674 | if err != nil { 675 | return err 676 | } 677 | 678 | _, err = x.readByte() 679 | if err != nil { 680 | return err 681 | } 682 | 683 | c, err = x.readByte() 684 | 685 | if err != nil { 686 | return err 687 | } 688 | 689 | d, err = x.readByte() 690 | 691 | if err != nil { 692 | return err 693 | } 694 | 695 | if c == '-' && d == '-' { 696 | goto skipComment 697 | } else { 698 | goto skipDecleration 699 | } 700 | 701 | } else { // declerations ends. 702 | 703 | return nil 704 | 705 | } 706 | 707 | } 708 | 709 | // read peaked byte 710 | _, err = x.readByte() 711 | 712 | if err != nil { 713 | return err 714 | } 715 | 716 | } 717 | 718 | skipComment: 719 | x.scratch.reset() 720 | for { 721 | 722 | c, err = x.readByte() 723 | 724 | if err != nil { 725 | return err 726 | } 727 | 728 | if c == '>' && len(x.scratch.bytes()) > 1 && x.scratch.bytes()[len(x.scratch.bytes())-1] == '-' && x.scratch.bytes()[len(x.scratch.bytes())-2] == '-' { 729 | goto scan_declartions 730 | } 731 | 732 | x.scratch.add(c) 733 | 734 | } 735 | 736 | skipDecleration: 737 | depth := 1 738 | for { 739 | 740 | c, err = x.readByte() 741 | 742 | if err != nil { 743 | return err 744 | } 745 | 746 | if c == '>' { 747 | depth-- 748 | if depth == 0 { 749 | goto scan_declartions 750 | } 751 | continue 752 | } 753 | if c == '<' { 754 | depth++ 755 | } 756 | 757 | } 758 | 759 | } 760 | 761 | func (x *XMLParser) closeTagName() (string, error) { 762 | 763 | x.scratch.reset() 764 | var c byte 765 | var err error 766 | for { 767 | c, err = x.readByte() 768 | 769 | if err != nil { 770 | return "", err 771 | } 772 | 773 | if c == '>' { 774 | return string(x.scratch.bytes()), nil 775 | } 776 | if !x.isWS(c) { 777 | x.scratch.add(c) 778 | } 779 | } 780 | } 781 | 782 | func (x *XMLParser) readByte() (byte, error) { 783 | 784 | by, err := x.reader.ReadByte() 785 | 786 | x.TotalReadSize++ 787 | 788 | if err != nil { 789 | return 0, err 790 | } 791 | return by, nil 792 | 793 | } 794 | 795 | func (x *XMLParser) unreadByte() error { 796 | 797 | err := x.reader.UnreadByte() 798 | if err != nil { 799 | return err 800 | } 801 | x.TotalReadSize = x.TotalReadSize - 1 802 | return nil 803 | 804 | } 805 | 806 | func (x *XMLParser) isWS(in byte) bool { 807 | 808 | if in == ' ' || in == '\n' || in == '\t' || in == '\r' { 809 | return true 810 | } 811 | 812 | return false 813 | 814 | } 815 | 816 | func (x *XMLParser) sendError() { 817 | err := fmt.Errorf("Invalid xml") 818 | x.resultChannel <- &XMLElement{Err: err} 819 | } 820 | 821 | func (x *XMLParser) defaultError() error { 822 | err := fmt.Errorf("Invalid xml") 823 | return err 824 | } 825 | 826 | func (x *XMLParser) string(start byte) (string, error) { 827 | 828 | x.scratch.reset() 829 | 830 | var err error 831 | var c byte 832 | for { 833 | 834 | c, err = x.readByte() 835 | if err != nil { 836 | if err != nil { 837 | return "", err 838 | } 839 | } 840 | 841 | if c == start { 842 | return string(x.scratch.bytes()), nil 843 | } 844 | 845 | x.scratch.add(c) 846 | 847 | } 848 | 849 | } 850 | 851 | // scratch taken from 852 | // https://github.com/bcicen/jstream 853 | type scratch struct { 854 | data []byte 855 | fill int 856 | } 857 | 858 | // reset scratch buffer 859 | func (s *scratch) reset() { s.fill = 0 } 860 | 861 | // bytes returns the written contents of scratch buffer 862 | func (s *scratch) bytes() []byte { return s.data[0:s.fill] } 863 | 864 | // grow scratch buffer 865 | func (s *scratch) grow() { 866 | ndata := make([]byte, cap(s.data)*2) 867 | copy(ndata, s.data[:]) 868 | s.data = ndata 869 | } 870 | 871 | // append single byte to scratch buffer 872 | func (s *scratch) add(c byte) { 873 | if s.fill+1 >= cap(s.data) { 874 | s.grow() 875 | } 876 | 877 | s.data[s.fill] = c 878 | s.fill++ 879 | } 880 | 881 | // append encoded rune to scratch buffer 882 | func (s *scratch) addRune(r rune) int { 883 | if s.fill+utf8.UTFMax >= cap(s.data) { 884 | s.grow() 885 | } 886 | 887 | n := utf8.EncodeRune(s.data[s.fill:], r) 888 | s.fill += n 889 | return n 890 | } 891 | -------------------------------------------------------------------------------- /xmlparser_test.go: -------------------------------------------------------------------------------- 1 | package xmlparser 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "os" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func getparser(prop ...string) *XMLParser { 13 | 14 | return getparserFile("sample.xml", prop...) 15 | } 16 | 17 | func getparserFile(filename string, prop ...string) *XMLParser { 18 | 19 | file, _ := os.Open(filename) 20 | 21 | br := bufio.NewReader(file) 22 | 23 | p := NewXMLParser(br, prop...) 24 | 25 | return p 26 | 27 | } 28 | 29 | func TestBasics(t *testing.T) { 30 | 31 | p := getparser("tag1") 32 | 33 | var results []*XMLElement 34 | for xml := range p.Stream() { 35 | results = append(results, xml) 36 | } 37 | if len(results) != 2 { 38 | panic("Test failed result must be 2") 39 | } 40 | 41 | if len(results[0].Childs) != 4 || len(results[1].Childs) != 4 { 42 | panic("Test failed") 43 | } 44 | // result 1 45 | if results[0].Attrs["att1"] != "" || results[0].Attrs["att2"] != "att0" { 46 | panic("Test failed") 47 | } 48 | 49 | if results[0].Childs["tag11"][0].Attrs["att1"] != "att0" { 50 | panic("Test failed") 51 | } 52 | 53 | if results[0].Childs["tag11"][0].InnerText != "Hello 你好 Gür" { 54 | panic("Test failed") 55 | } 56 | 57 | if results[0].Childs["tag11"][1].InnerText != "InnerText111" { 58 | panic("Test failed") 59 | } 60 | 61 | if results[0].Childs["tag12"][0].Attrs["att1"] != "att0" { 62 | panic("Test failed") 63 | } 64 | 65 | if results[0].Childs["tag12"][0].InnerText != "" { 66 | panic("Test failed") 67 | } 68 | 69 | if results[0].Childs["tag13"][0].Attrs != nil && results[0].Childs["tag13"][0].InnerText != "InnerText13" { 70 | panic("Test failed") 71 | } 72 | 73 | if results[0].Childs["tag14"][0].Attrs != nil && results[0].Childs["tag14"][0].InnerText != "" { 74 | panic("Test failed") 75 | } 76 | 77 | //result 2 78 | if results[1].Attrs["att1"] != "" || results[1].Attrs["att2"] != "att1" { 79 | panic("Test failed") 80 | } 81 | 82 | if results[1].Childs["tag11"][0].Attrs["att1"] != "att1" { 83 | panic("Test failed") 84 | } 85 | 86 | if results[1].Childs["tag11"][0].InnerText != "InnerText2" { 87 | panic("Test failed") 88 | } 89 | 90 | if results[1].Childs["tag12"][0].Attrs["att1"] != "att1" { 91 | panic("Test failed") 92 | } 93 | 94 | if results[1].Childs["tag12"][0].InnerText != "" { 95 | panic("Test failed") 96 | } 97 | if results[1].Childs["tag13"][0].Attrs != nil && results[1].Childs["tag13"][0].InnerText != "InnerText213" { 98 | panic("Test failed") 99 | } 100 | 101 | if results[1].Childs["tag14"][0].Attrs != nil && results[1].Childs["tag14"][0].InnerText != "" { 102 | panic("Test failed") 103 | } 104 | 105 | } 106 | 107 | func TestTagWithNoChild(t *testing.T) { 108 | 109 | p := getparser("tag2") 110 | 111 | var results []*XMLElement 112 | for xml := range p.Stream() { 113 | results = append(results, xml) 114 | } 115 | if len(results) != 2 { 116 | panic("Test failed") 117 | } 118 | if results[0].Childs != nil || results[1].Childs != nil { 119 | panic("Test failed") 120 | } 121 | if results[0].Attrs["att1"] != "testattr<" || results[1].Attrs["att1"] != "testattr<2" { 122 | panic("Test failed") 123 | } 124 | // with inner text 125 | p = getparser("tag3") 126 | 127 | results = results[:0] 128 | for xml := range p.Stream() { 129 | results = append(results, xml) 130 | } 131 | 132 | if len(results) != 2 { 133 | panic("Test failed") 134 | } 135 | if results[0].Childs != nil || results[1].Childs != nil { 136 | panic("Test failed") 137 | } 138 | 139 | if results[0].Attrs != nil || results[0].InnerText != "tag31" { 140 | panic("Test failed") 141 | } 142 | 143 | if results[1].Attrs["att1"] != "testattr<2" || results[1].InnerText != "tag32 " { 144 | panic("Test failed") 145 | } 146 | 147 | } 148 | 149 | func TestTagWithSpaceAndSkipOutElement(t *testing.T) { 150 | 151 | p := getparser("tag4").SkipElements([]string{"skipOutsideTag"}).SkipOuterElements() 152 | 153 | var results []*XMLElement 154 | for xml := range p.Stream() { 155 | results = append(results, xml) 156 | } 157 | 158 | if len(results) != 1 { 159 | panic("Test failed") 160 | } 161 | 162 | if results[0].Childs["tag11"][0].Attrs["att1"] != "att0 " { 163 | panic("Test failed") 164 | } 165 | 166 | if results[0].Childs["tag11"][0].InnerText != "InnerText0 " { 167 | panic("Test failed") 168 | } 169 | 170 | } 171 | 172 | func TestQuote(t *testing.T) { 173 | 174 | p := getparser("quotetest") 175 | 176 | var results []*XMLElement 177 | for xml := range p.Stream() { 178 | results = append(results, xml) 179 | } 180 | 181 | if len(results) != 1 { 182 | panic("Test failed") 183 | } 184 | 185 | if results[0].Attrs["att1"] != "test" || results[0].Attrs["att2"] != "test\"" || results[0].Attrs["att3"] != "test'" { 186 | panic("Test failed") 187 | } 188 | 189 | } 190 | 191 | func TestSkip(t *testing.T) { 192 | 193 | p := getparser("tag1").SkipElements([]string{"tag11", "tag13"}) 194 | 195 | var results []*XMLElement 196 | for xml := range p.Stream() { 197 | results = append(results, xml) 198 | } 199 | 200 | if len(results[0].Childs) != 2 { 201 | panic("Test failed") 202 | } 203 | 204 | if len(results[1].Childs) != 2 { 205 | panic("Test failed") 206 | } 207 | 208 | if results[0].Childs["tag11"] != nil { 209 | panic("Test failed") 210 | } 211 | 212 | if results[0].Childs["tag13"] != nil { 213 | panic("Test failed") 214 | } 215 | 216 | if results[1].Childs["tag11"] != nil { 217 | panic("Test failed") 218 | } 219 | 220 | if results[1].Childs["tag13"] != nil { 221 | panic("Test failed") 222 | } 223 | 224 | } 225 | 226 | func TestError(t *testing.T) { 227 | 228 | p := getparserFile("error.xml", "tag1") 229 | 230 | for xml := range p.Stream() { 231 | if xml.Err == nil { 232 | panic("It must give error") 233 | } 234 | } 235 | 236 | } 237 | 238 | func TestMultipleTags(t *testing.T) { 239 | p := getparser("tag1", "tag2") 240 | 241 | tagCount := map[string]int{} 242 | for xml := range p.Stream() { 243 | if xml.Name != "tag1" && xml.Name != "tag2" { 244 | t.Errorf("Only 'tag1' and 'tag2' expected, but '%s' returned", xml.Name) 245 | } 246 | tagCount[xml.Name]++ 247 | } 248 | 249 | if tagCount["tag1"] != 2 { 250 | t.Errorf("There should be 2 parsed 'tag1', but %d found", tagCount["tag1"]) 251 | } 252 | if tagCount["tag2"] != 2 { 253 | t.Errorf("There should be 2 parsed 'tag2', but %d found", tagCount["tag2"]) 254 | } 255 | } 256 | 257 | func TestMultipleTagsNested(t *testing.T) { 258 | p := getparser("tag1", "tag11") 259 | 260 | tagCount := map[string]int{} 261 | for xml := range p.Stream() { 262 | if xml.Name != "tag1" && xml.Name != "tag11" { 263 | t.Errorf("Only 'tag1' and 'tag11' expected, but '%s' returned", xml.Name) 264 | } 265 | tagCount[xml.Name]++ 266 | } 267 | 268 | if tagCount["tag1"] != 2 { 269 | t.Errorf("There should be 2 parsed 'tag1', but %d found", tagCount["tag1"]) 270 | } 271 | if tagCount["tag11"] != 1 { 272 | if tagCount["tag11"] == 4 { 273 | t.Errorf("There should be only 1 parsed 'tag11', but 'tag11' nested under 'tag1' were parsed too") 274 | } 275 | t.Errorf("There should be 1 parsed 'tag11', but %d found", tagCount["tag11"]) 276 | } 277 | } 278 | 279 | func TestXpath(t *testing.T) { 280 | xmlDoc := ` 281 | 282 | 283 | The Iliad and The Odyssey 284 | 12.95 285 | 286 | Best translation I've read. 287 | I like other versions better. 288 | 289 | 290 | 291 | Anthology of World Literature 292 | 24.95 293 | 294 | Needs more modern literature. 295 | Excellent overview of world literature. 296 | 297 | 298 | 299 | Journal of XML parsing 300 | 1 301 | 302 | ` 303 | 304 | sreader := strings.NewReader(xmlDoc) 305 | 306 | bufreader := bufio.NewReader(sreader) 307 | 308 | p := NewXMLParser(bufreader, "bookstore").EnableXpath() 309 | 310 | for xml := range p.Stream() { 311 | 312 | if list, err := xml.SelectElements("//book"); len(list) != 2 || err != nil { 313 | t.Fatal("//book != 2") 314 | } 315 | 316 | if list, err := xml.SelectElements("./book"); len(list) != 2 || err != nil { 317 | t.Fatal("./book != 2") 318 | } 319 | 320 | if list, err := xml.SelectElements("book"); len(list) != 2 || err != nil { 321 | t.Fatal("book != 2") 322 | } 323 | 324 | list, err := xml.SelectElements("./book/title") 325 | if len(list) != 2 || err != nil { 326 | t.Fatal("book != 2") 327 | } 328 | 329 | title, err := xml.SelectElement("./book/title") 330 | if err != nil && title.InnerText != "The Iliad and The Odyssey" { 331 | t.Fatal("./book/title") 332 | } 333 | 334 | el, err := xml.SelectElement("//book[@id='bk101']") 335 | if el == nil || err != nil { 336 | t.Fatal("//book[@id='bk101] is not found") 337 | } 338 | list, err = xml.SelectElements("//book[price>=10.95]") 339 | if list == nil || err != nil || len(list) != 2 { 340 | t.Fatal("//book[price>=10.95]") 341 | } 342 | 343 | list, err = xml.SelectElements("//book/comments/userComment[@rating='2']") 344 | if len(list) != 1 || err != nil { 345 | t.Fatal("//book/comments/userComment[@rating='2']") 346 | } 347 | 348 | // all books total price 349 | expr, err := p.CompileXpath("sum(//book/price)") 350 | if err != nil { 351 | t.Fatal("sum(//book/price) xpath expression compile error") 352 | } 353 | price := expr.Evaluate(p.CreateXPathNavigator(xml)).(float64) 354 | 355 | if fmt.Sprintf("%.2f", price) != "37.90" { 356 | t.Fatal("invalid total price->", price) 357 | } 358 | 359 | } 360 | } 361 | 362 | func TestXpathNS(t *testing.T) { 363 | 364 | br := bufio.NewReader(bytes.NewReader([]byte(` 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | `))) 382 | 383 | str := NewXMLParser(br, "soap:Envelope").EnableXpath() 384 | for xml := range str.Stream() { 385 | 386 | if list, err := xml.SelectElements("soap:Body"); len(list) != 2 || err != nil { 387 | t.Fatal("soap:Body != 2") 388 | } 389 | 390 | if list, err := xml.SelectElements("./soap:Body/soap:BodyNest1"); len(list) != 2 || err != nil { 391 | t.Fatal("/soap:Body/soap:BodyNest1 != 2") 392 | } 393 | 394 | if list, err := xml.SelectElements("./soap:Body/soap:BodyNest1/soap:BodyNest2"); len(list) != 1 || err != nil { 395 | t.Fatal("/soap:Body/soap:BodyNest1/soap:BodyNest2 != 1") 396 | } 397 | 398 | list, err := xml.SelectElements("./soap:Body/soap:BodyNest1/soap:BodyNest3") 399 | if len(list) != 1 || err != nil { 400 | t.Fatal("/soap:Body/soap:BodyNest1/soap:BodyNest3 != 1") 401 | } 402 | 403 | if list[0].Attrs["nestatt3"] != "nestatt3val" { 404 | t.Fatal("nestatt3 attiribute test failed") 405 | } 406 | 407 | } 408 | 409 | } 410 | 411 | func TestAttrOnly(t *testing.T) { 412 | p := getparser("examples", "tag1").ParseAttributesOnly("examples") 413 | for xml := range p.Stream() { 414 | if xml.Err != nil { 415 | t.Fatal(xml.Err) 416 | } 417 | if xml.Name == "examples" { 418 | if len(xml.Childs) != 0 { 419 | t.Fatal("Childs not empty for ParseAttributesOnly tags") 420 | } 421 | fmt.Printf("Name: \t%s\n", xml.Name) 422 | fmt.Printf("Attrs: \t%v\n\n", xml.Attrs) 423 | } 424 | if xml.Name == "tag1" { 425 | if len(xml.Childs) == 0 { 426 | t.Fatal("Childs not empty for ParseAttributesOnly tags") 427 | } 428 | fmt.Printf("Name: \t%s\n", xml.Name) 429 | fmt.Printf("Attrs: \t%v\n", xml.Attrs) 430 | fmt.Printf("Childs: %v\n", xml.Childs) 431 | } 432 | } 433 | } 434 | 435 | func Benchmark1(b *testing.B) { 436 | 437 | for n := 0; n < b.N; n++ { 438 | p := getparser("tag4").SkipElements([]string{"skipOutsideTag"}).SkipOuterElements() 439 | for xml := range p.Stream() { 440 | nothing(xml) 441 | } 442 | } 443 | } 444 | 445 | func Benchmark2(b *testing.B) { 446 | 447 | for n := 0; n < b.N; n++ { 448 | p := getparser("tag4") 449 | for xml := range p.Stream() { 450 | nothing(xml) 451 | } 452 | } 453 | 454 | } 455 | 456 | func Benchmark3(b *testing.B) { 457 | 458 | for n := 0; n < b.N; n++ { 459 | p := getparser("tag4").EnableXpath() 460 | for xml := range p.Stream() { 461 | nothing(xml) 462 | } 463 | } 464 | 465 | } 466 | 467 | func nothing(...interface{}) { 468 | } 469 | --------------------------------------------------------------------------------