├── VERSION
├── AUTHOR
├── xml
    ├── tests
    │   ├── document
    │   │   ├── empty
    │   │   │   ├── input.txt
    │   │   │   └── output.txt
    │   │   └── basic
    │   │   │   ├── input.txt
    │   │   │   └── output.txt
    │   └── node
    │   │   ├── attributes
    │   │       ├── output.txt~
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── add_child
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── inner
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── add_next_sibling
    │   │       ├── input.txt
    │   │       ├── input.txt~
    │   │       └── output.txt
    │   │   ├── inner_with_attributes
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── replace
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── set_content
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── set_namespace
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── set_ns_attr
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── add_previous_sibling
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── add_previous_sibling2
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── declare_namespace
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── set_children
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── set_default_namespace
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   ├── add_ancestor
    │   │       ├── input.txt
    │   │       └── output.txt
    │   │   └── search
    │   │       ├── input.txt
    │   │       └── output.txt
    ├── comment.go
    ├── element.go
    ├── pi.go
    ├── cdata.go
    ├── text.go
    ├── attribute.go
    ├── nodeset.go
    ├── helper.h
    ├── fragment.go
    ├── fragment_test.go
    ├── attribute_test.go
    ├── search_test.go
    ├── utils_test.go
    ├── helper.c
    ├── document_test.go
    ├── node_test.go
    ├── document.go
    └── node.go
├── html
    ├── tests
    │   └── document
    │   │   ├── html_fragment_encoding
    │   │       ├── output.txt
    │   │       └── input.txt
    │   │   └── encoding
    │   │       └── input.html
    ├── helper.h
    ├── crash_test.go
    ├── xpath_test.go
    ├── helper.c
    ├── encoding_test.go
    ├── utils_test.go
    ├── fragment.go
    ├── document_test.go
    ├── document.go
    ├── fragment_test.go
    └── node_test.go
├── util
    ├── util_test.go
    └── util.go
├── help
    ├── help_test.go
    ├── util_test.go
    └── help.go
├── .gitignore
├── mem
    ├── libxml.h
    ├── mem.go
    ├── mem_test.go
    └── libxml.c
├── css
    ├── test
    │   ├── inputs
    │   ├── outputs-global
    │   └── outputs-local
    ├── css_test.go
    ├── notes.txt
    └── css.go
├── xpath
    ├── util_test.go
    ├── xpath_test.go
    ├── expression.go
    ├── util.go
    └── xpath.go
├── LICENSE
├── Readme.md
├── gokogiri.go
└── gokogiri_test.go


/VERSION:
--------------------------------------------------------------------------------
1 | 1.0
2 | 


--------------------------------------------------------------------------------
/AUTHOR:
--------------------------------------------------------------------------------
1 | Zhigang Chen
2 | Hampton Catlin


--------------------------------------------------------------------------------
/xml/tests/document/empty/input.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/xml/tests/node/attributes/output.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/xml/tests/document/basic/input.txt:
--------------------------------------------------------------------------------
1 | <foo></foo>


--------------------------------------------------------------------------------
/xml/tests/node/add_child/input.txt:
--------------------------------------------------------------------------------
1 | <foo></foo>


--------------------------------------------------------------------------------
/xml/tests/node/inner/input.txt:
--------------------------------------------------------------------------------
1 | <foo></foo>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_next_sibling/input.txt:
--------------------------------------------------------------------------------
1 | <foo></foo>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/inner_with_attributes/input.txt:
--------------------------------------------------------------------------------
1 | <foo></foo>


--------------------------------------------------------------------------------
/xml/tests/node/replace/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar/></foo>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_content/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar/></foo>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_next_sibling/input.txt~:
--------------------------------------------------------------------------------
1 | "<foo></foo>"
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_namespace/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar1/><bar2/></foo>


--------------------------------------------------------------------------------
/xml/tests/node/set_ns_attr/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar1/><bar2/></foo>


--------------------------------------------------------------------------------
/xml/tests/node/add_previous_sibling/input.txt:
--------------------------------------------------------------------------------
1 | <foo>fun</foo>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_previous_sibling2/input.txt:
--------------------------------------------------------------------------------
1 | <foo>fun</foo>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/declare_namespace/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar1/><bar2/></foo>


--------------------------------------------------------------------------------
/xml/tests/node/set_children/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar1/><bar2/></foo>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_default_namespace/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar1/><bar2/></foo>


--------------------------------------------------------------------------------
/xml/tests/document/empty/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_ancestor/input.txt:
--------------------------------------------------------------------------------
1 | <foo><bar><cat><fun/></cat></bar></foo>


--------------------------------------------------------------------------------
/xml/comment.go:
--------------------------------------------------------------------------------
1 | package xml
2 | 
3 | type CommentNode struct {
4 | 	*XmlNode
5 | }
6 | 


--------------------------------------------------------------------------------
/xml/element.go:
--------------------------------------------------------------------------------
1 | package xml
2 | 
3 | type ElementNode struct {
4 | 	*XmlNode
5 | }
6 | 


--------------------------------------------------------------------------------
/html/tests/document/html_fragment_encoding/output.txt:
--------------------------------------------------------------------------------
1 | <span>CHUCK&nbsp;FREAKINNORRIS</span>


--------------------------------------------------------------------------------
/xml/tests/document/basic/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo/>
3 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_child/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo/>
3 | 


--------------------------------------------------------------------------------
/xml/tests/node/attributes/input.txt:
--------------------------------------------------------------------------------
1 | <foo id="a" myname="ff"><bar class="shine"/></foo>
2 | 


--------------------------------------------------------------------------------
/html/tests/document/html_fragment_encoding/input.txt:
--------------------------------------------------------------------------------
1 | <span>CHUCK&nbsp;FREAKINNORRIS</span>
2 | 


--------------------------------------------------------------------------------
/xml/pi.go:
--------------------------------------------------------------------------------
1 | package xml
2 | 
3 | type ProcessingInstructionNode struct {
4 | 	*XmlNode
5 | }
6 | 


--------------------------------------------------------------------------------
/xml/tests/node/replace/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <fun/>
3 | <cool/>
4 | 


--------------------------------------------------------------------------------
/xml/tests/node/search/input.txt:
--------------------------------------------------------------------------------
1 | <foo id="a" class="shine"><bar class="shine"/><vic class="dim"></foo>
2 | 


--------------------------------------------------------------------------------
/util/util_test.go:
--------------------------------------------------------------------------------
1 | package util
2 | 
3 | //please check the search tests in gokogiri/xml and gokogiri/html
4 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_previous_sibling2/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo>COOLfun</foo>
3 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_next_sibling/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo/>
3 | <bar/>
4 | <baz/>
5 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_children/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo>
3 |   <fun/>
4 | </foo>
5 | 


--------------------------------------------------------------------------------
/xml/tests/node/inner/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo>
3 |   <bar/>
4 |   <baz/>
5 | </foo>
6 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_content/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo>&lt;fun&gt;&lt;/fun&gt;</foo>
3 | 


--------------------------------------------------------------------------------
/html/tests/document/encoding/input.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moovweb/gokogiri/HEAD/html/tests/document/encoding/input.html


--------------------------------------------------------------------------------
/xml/tests/node/add_previous_sibling/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <bar/>
3 | <cat/>
4 | <foo>fun</foo>
5 | 


--------------------------------------------------------------------------------
/help/help_test.go:
--------------------------------------------------------------------------------
1 | package help
2 | 
3 | import "testing"
4 | 
5 | func TestCheckMemoryLeaks(t *testing.T) {
6 | 	CheckXmlMemoryLeaks(t)
7 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.out
 2 | build/*
 3 | _*
 4 | *.6
 5 | *.o
 6 | libxml/test/ctest/test
 7 | .DS_Store
 8 | test_output.txt
 9 | .jank
10 | jank.yml


--------------------------------------------------------------------------------
/xml/tests/node/attributes/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo id="a" myname="ff">
3 |   <bar class="shine"/>
4 | </foo>
5 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_default_namespace/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo xmlns="bar">
3 |   <bar1/>
4 |   <bar2/>
5 | </foo>
6 | 


--------------------------------------------------------------------------------
/xml/tests/node/declare_namespace/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo xmlns:foo="bar">
3 |   <foo:bar1/>
4 |   <bar2/>
5 | </foo>
6 | 


--------------------------------------------------------------------------------
/xml/tests/node/search/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo id="a" class="shine"><bar class="shine"/><vic class="dim"/>
3 | </foo>
4 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_namespace/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo:foo xmlns:foo="bar">
3 |   <bar1/>
4 |   <bar2/>
5 | </foo:foo>
6 | 


--------------------------------------------------------------------------------
/mem/libxml.h:
--------------------------------------------------------------------------------
1 | #ifndef _GOKOGIRI_LIBXML_H
2 | #define _GOKOGIRI_LIBXML_H
3 | 
4 | unsigned long libxmlGoAllocSize();
5 | void libxmlGoInit();
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/xml/tests/node/inner_with_attributes/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo>
3 |   <bar give="me" something="good" to="eat"/>
4 | </foo>
5 | 


--------------------------------------------------------------------------------
/xml/tests/node/set_ns_attr/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo xmlns:foo="bar" foo:hello="world">
3 |   <bar1/>
4 |   <bar2/>
5 | </foo>
6 | 


--------------------------------------------------------------------------------
/xml/tests/node/add_ancestor/output.txt:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <foo>
3 |   <bar>
4 |     <cat>
5 |       <fun/>
6 |     </cat>
7 |   </bar>
8 | </foo>
9 | 


--------------------------------------------------------------------------------
/mem/mem.go:
--------------------------------------------------------------------------------
 1 | package mem
 2 | 
 3 | /*
 4 | #cgo pkg-config: libxml-2.0
 5 | 
 6 | #include <libxml/xmlversion.h>
 7 | #include "libxml.h"
 8 | */
 9 | import "C"
10 | 
11 | const LIBXML_VERSION = C.LIBXML_DOTTED_VERSION
12 | 
13 | func init() {
14 | 	C.libxmlGoInit()
15 | }
16 | 
17 | func AllocSize() int {
18 | 	return int(C.libxmlGoAllocSize())
19 | }
20 | 


--------------------------------------------------------------------------------
/mem/mem_test.go:
--------------------------------------------------------------------------------
 1 | package mem
 2 | 
 3 | import "testing"
 4 | 
 5 | const EXPECTED_VERSION = "2.7.8"
 6 | 
 7 | func TestLibxml(t *testing.T) {
 8 | 	if LIBXML_VERSION != EXPECTED_VERSION {
 9 | 		t.Fatal("Invalid libxml version got:", LIBXML_VERSION, "expected", EXPECTED_VERSION)
10 | 	}
11 | 	if AllocSize() != 0 {
12 | 		t.Fatal(AllocSize(), "remaining allocations")
13 | 	}
14 | }
15 | 


--------------------------------------------------------------------------------
/util/util.go:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | var EmptyStringBytes = []byte{0}
 4 | 
 5 | func AppendCStringTerminator(b []byte) []byte {
 6 | 	if num := len(b); num > 0 {
 7 | 		if b[num-1] != 0 {
 8 | 			return append(b, 0)
 9 | 		}
10 | 	}
11 | 	return b
12 | }
13 | 
14 | func GetCString(b []byte) []byte {
15 | 	b = AppendCStringTerminator(b)
16 | 	if len(b) == 0 {
17 | 		return EmptyStringBytes
18 | 	}
19 | 	return b
20 | }
21 | 


--------------------------------------------------------------------------------
/css/test/inputs:
--------------------------------------------------------------------------------
 1 | div
 2 | > div
 3 | div, > span
 4 | div.foo
 5 | div.foo.bar
 6 | div#foo
 7 | div#foo.bar#hux
 8 | > div#foo.bar#hux
 9 | .bar
10 | :first-child
11 | div:first-child
12 | div:nth-child(odd)
13 | div:nth-child(even)
14 | div:nth-child(2n + 1)
15 | div:nth-child(-3n-6)
16 | div:nth-of-type(5)
17 | :nth-child(4)
18 | div :nth-child(2)
19 | div[a='b']
20 | div[a~='b']
21 | div[a|='b']
22 | div[a*='b']
23 | div[a ^= 'b' ]
24 | div   [ a $= 'b' ]
25 | > :only-of-type
26 | div[a='b']:first-of-type.foo
27 | div.bar:not(#foo:first-child)


--------------------------------------------------------------------------------
/xml/cdata.go:
--------------------------------------------------------------------------------
 1 | package xml
 2 | 
 3 | /* CDataNode represents a CDATA section. This XML node type allows the embedding of unescaped, verbatim text within an XML document.
 4 | 
 5 | It is otherwise identical to a TextNode. It is most often used to wrap content that is whitespace-sensitive or likely to contain
 6 | large numbers of less-than or greater-than signs (such as code snippets or example documents).
 7 | 
 8 | If you use the XML_PARSE_NOCDATA parsing option, the parser will always present the CDATA sections as TextNodes.
 9 | */
10 | type CDataNode struct {
11 | 	*XmlNode
12 | }
13 | 


--------------------------------------------------------------------------------
/help/util_test.go:
--------------------------------------------------------------------------------
 1 | package help
 2 | 
 3 | import "testing"
 4 | 
 5 | func CheckXmlMemoryLeaks(t *testing.T) {
 6 | 	// LibxmlCleanUpParser() should only be called once during the lifetime of the
 7 | 	// program, but because there's no way to know when the last test of the suite
 8 | 	// runs in go, we can't accurately call it strictly once, so just avoid calling
 9 | 	// it for now because it's known to cause crashes if called multiple times.
10 | 	//LibxmlCleanUpParser()
11 | 
12 | 	if !LibxmlCheckMemoryLeak() {
13 | 		t.Errorf("Memory leaks: %d!!!", LibxmlGetMemoryAllocation())
14 | 		LibxmlReportMemoryLeak()
15 | 	}
16 | }
17 | 


--------------------------------------------------------------------------------
/xpath/util_test.go:
--------------------------------------------------------------------------------
 1 | package xpath
 2 | 
 3 | import "testing"
 4 | import "github.com/moovweb/gokogiri/help"
 5 | 
 6 | func CheckXmlMemoryLeaks(t *testing.T) {
 7 | 	// LibxmlCleanUpParser() should only be called once during the lifetime of the
 8 | 	// program, but because there's no way to know when the last test of the suite
 9 | 	// runs in go, we can't accurately call it strictly once, so just avoid calling
10 | 	// it for now because it's known to cause crashes if called multiple times.
11 | 	//help.LibxmlCleanUpParser()
12 | 
13 | 	if !help.LibxmlCheckMemoryLeak() {
14 | 		t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation())
15 | 		help.LibxmlReportMemoryLeak()
16 | 	}
17 | }
18 | 


--------------------------------------------------------------------------------
/html/helper.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CHELPER_H__
 2 | #define __CHELPER_H__
 3 | 
 4 | #include <libxml/tree.h>
 5 | #include <libxml/parser.h>
 6 | #include <libxml/HTMLtree.h>
 7 | #include <libxml/HTMLparser.h>
 8 | #include <libxml/xmlsave.h>
 9 | 
10 | htmlDocPtr htmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int errror_buffer_len);
11 | xmlNode* htmlParseFragment(void* doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len);
12 | xmlNode* htmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len);
13 | 
14 | #endif //__CHELPER_H__
15 | 


--------------------------------------------------------------------------------
/xpath/xpath_test.go:
--------------------------------------------------------------------------------
 1 | package xpath
 2 | 
 3 | //please check the search tests in gokogiri/xml and gokogiri/html
 4 | import "testing"
 5 | 
 6 | func TestCompileGoodExpr(t *testing.T) {
 7 | 	defer CheckXmlMemoryLeaks(t)
 8 | 	e := Compile(`./*`)
 9 | 	if e == nil {
10 | 		t.Error("expr should be good")
11 | 	}
12 | 	e.Free()
13 | }
14 | 
15 | func TestCompileBadExpr(t *testing.T) {
16 | 	//defer CheckXmlMemoryLeaks(t)
17 | 	//this test causes memory leaks in libxml
18 | 	//however, the memory leak is very small and does not grow as more bad expressions are compiled
19 | 	e := Compile("./")
20 | 	if e != nil {
21 | 		t.Error("expr should be bad")
22 | 	}
23 | 	e = Compile(".//")
24 | 	if e != nil {
25 | 		t.Error("expr should be bad")
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/xml/text.go:
--------------------------------------------------------------------------------
 1 | package xml
 2 | 
 3 | /*
 4 | #include <libxml/parserInternals.h>
 5 | 
 6 | void disable_escaping(xmlNodePtr node) {
 7 | 	node->name = xmlStringTextNoenc;
 8 | }
 9 | */
10 | import "C"
11 | 
12 | type TextNode struct {
13 | 	*XmlNode
14 | }
15 | 
16 | // DisableOutputEscaping disables the usual safeguards against creating invalid XML and allows the
17 | // characters '<', '>', and '&' to be written out verbatim. Normally they are safely escaped as entities.
18 | //
19 | // This API is intended to provide support for XSLT processors and similar XML manipulation libraries that
20 | // may need to output unsupported entity references or use the XML API for non-XML output. It should never
21 | // be used in the normal course of XML processing.
22 | func (node *TextNode) DisableOutputEscaping() {
23 | 	C.disable_escaping(node.Ptr)
24 | }
25 | 


--------------------------------------------------------------------------------
/html/crash_test.go:
--------------------------------------------------------------------------------
 1 | package html
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestCrazyMove(t *testing.T) {
 6 | 	input := `
 7 | <html>
 8 | <body>
 9 | <div id="foo" name="foo1"> 
10 | <div id="bar" name="bar1"></div>
11 | <div id="foo" name="foo2"></div>
12 | </div>
13 | <div id="bar" name="bar2"></div>
14 | </body>
15 | </html>`
16 | 	doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
17 | 
18 | 	if err != nil {
19 | 		t.Error("Parsing has error:", err)
20 | 		return
21 | 	}
22 | 
23 | 	foos, err := doc.Search("//div[@id='foo']")
24 | 	if err != nil {
25 | 		t.Error("search has error:", err)
26 | 		return
27 | 	}
28 | 	for _, foo := range foos {
29 | 		bars, _ := foo.Search("//div[@id='bar']")
30 | 		for _, bar := range bars {
31 | 			bar.AddChild(foo)
32 | 		}
33 | 	}
34 | 
35 | 	doc.Free()
36 | 	CheckXmlMemoryLeaks(t)
37 | }
38 | 


--------------------------------------------------------------------------------
/css/css_test.go:
--------------------------------------------------------------------------------
 1 | package css
 2 | 
 3 | import (
 4 | 	"io/ioutil"
 5 | 	"strings"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func read(filename string) string {
10 | 	contents, err := ioutil.ReadFile(filename)
11 | 	if err != nil {
12 | 		panic("css2xpath test could not open a test file")
13 | 	}
14 | 	return string(contents)
15 | }
16 | 
17 | func TestSelectors(t *testing.T) {
18 | 	cssSelectors := strings.Split(string(read("./test/inputs")), "\n")
19 | 	localXPaths := strings.Split(string(read("./test/outputs-local")), "\n")
20 | 	globalXPaths := strings.Split(string(read("./test/outputs-global")), "\n")
21 | 
22 | 	for i, css := range cssSelectors {
23 | 		xpathG := strings.TrimSpace(Convert(css, GLOBAL))
24 | 		xpathL := strings.TrimSpace(Convert(css, LOCAL))
25 | 		if xpathG != strings.TrimSpace(globalXPaths[i]) {
26 | 			t.Errorf("IN:\t%s <GLOBAL>\nOUT:\t%s\nEXPECTED:\t%s\n", css, xpathG, globalXPaths[i])
27 | 		}
28 | 		if xpathL != strings.TrimSpace(localXPaths[i]) {
29 | 			t.Errorf("IN:\t%s <LOCAL>\nOUT:\t%s\nEXPECT:\t%s\n", css, xpathL, localXPaths[i])
30 | 		}
31 | 	}
32 | }
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011-2012 Zhigang Chen and Hampton Catlin
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/xml/attribute.go:
--------------------------------------------------------------------------------
 1 | package xml
 2 | 
 3 | /*
 4 | AttributeNode represents an attribute, which has a name and a value.
 5 | 
 6 | AttributeNodes are created by calling SetAttr or SetNsAttr on an element node,
 7 | and retrieved by the Attribute and Attributes functions on an element node.
 8 | 
 9 | Note that while mamespace declarations resemble attributes, they are a distinct node type
10 | and cannot be used or retreived as an AttributeNode.
11 | */
12 | type AttributeNode struct {
13 | 	*XmlNode
14 | }
15 | 
16 | // String returns the value of the attribute.
17 | func (attrNode *AttributeNode) String() string {
18 | 	return attrNode.Content()
19 | }
20 | 
21 | // Value returns the value of the attribute.
22 | func (attrNode *AttributeNode) Value() string {
23 | 	return attrNode.Content()
24 | }
25 | 
26 | //SetValue sets the value of the attribute. Note that the argument will
27 | // be converted to a string, and automatically XML-escaped when the
28 | // document is serialized.
29 | func (attrNode *AttributeNode) SetValue(val interface{}) {
30 | 	attrNode.SetContent(val)
31 | }
32 | 
33 | /*
34 | alias :value :content
35 | alias :to_s :content
36 | alias :content= :value=
37 | */
38 | 


--------------------------------------------------------------------------------
/html/xpath_test.go:
--------------------------------------------------------------------------------
 1 | package html
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestUnfoundFuncInXpath(t *testing.T) {
 6 | 	defer CheckXmlMemoryLeaks(t)
 7 | 
 8 | 	doc, err := Parse([]byte("<html><body><div><h1></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 9 | 
10 | 	if err != nil {
11 | 		t.Error("Parsing has error:", err)
12 | 		return
13 | 	}
14 | 
15 | 	html := doc.Root().FirstChild()
16 | 	results, _ := html.Search("./div[matches(text(), 'foo')]")
17 | 	if results != nil {
18 | 		t.Error("should return nil because the function is not found")
19 | 	}
20 | 	doc.Free()
21 | }
22 | 
23 | func TestXpathEmptyResult(t *testing.T) {
24 | 	defer CheckXmlMemoryLeaks(t)
25 | 
26 | 	doc, err := Parse([]byte("<html><body><div><h1></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
27 | 
28 | 	if err != nil {
29 | 		t.Error("Parsing has error:", err)
30 | 		return
31 | 	}
32 | 
33 | 	html := doc.Root().FirstChild()
34 | 	results, err := html.Search("./div[@calass='cool']")
35 | 	if err != nil {
36 | 		t.Error("Xpath eval should not return nil")
37 | 	}
38 | 	if len(results) > 0 {
39 | 		t.Error("Xpath should return empty result")
40 | 	}
41 | 	doc.Free()
42 | }
43 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | Gokogiri
 2 | ========
 3 | LibXML bindings for the Go programming language.
 4 | ------------------------------------------------
 5 | By Zhigang Chen and Hampton Catlin
 6 | 
 7 | 
 8 | This is a major rewrite from v0 in the following places:
 9 | 
10 | - Separation of XML and HTML
11 | - Put more burden of memory allocation/deallocation on Go
12 | - Fragment parsing -- no more deep-copy
13 | - Serialization
14 | - Some API adjustment
15 | 
16 | ## Installation
17 | 
18 | ```bash
19 | # Linux
20 | sudo apt-get install libxml2-dev
21 | # Mac
22 | brew install libxml2
23 | 
24 | go get github.com/moovweb/gokogiri
25 | ```
26 | 
27 | ## Running tests
28 | 
29 | ```bash
30 | go test github.com/moovweb/gokogiri/...
31 | ```
32 | 
33 | ## Basic example
34 | 
35 | ```go
36 | package main
37 | 
38 | import (
39 |   "net/http"
40 |   "io/ioutil"
41 |   "github.com/moovweb/gokogiri"
42 | )
43 | 
44 | func main() {
45 |   // fetch and read a web page
46 |   resp, _ := http.Get("http://www.google.com")
47 |   page, _ := ioutil.ReadAll(resp.Body)
48 | 
49 |   // parse the web page
50 |   doc, _ := gokogiri.ParseHtml(page)
51 | 
52 |   // perform operations on the parsed page -- consult the tests for examples
53 | 
54 |   // important -- don't forget to free the resources when you're done!
55 |   doc.Free()
56 | }
57 | ```
58 | 


--------------------------------------------------------------------------------
/xml/nodeset.go:
--------------------------------------------------------------------------------
 1 | package xml
 2 | 
 3 | /*
 4 | #cgo pkg-config: libxml-2.0
 5 | 
 6 | #include <libxml/xpath.h>
 7 | #include <libxml/xpathInternals.h>
 8 | 
 9 | */
10 | import "C"
11 | 
12 | import "unsafe"
13 | 
14 | type Nodeset []Node
15 | 
16 | // Produce a slice of unsafe.Pointer objects, suitable for passing to a C function
17 | func (n Nodeset) ToPointers() (pointers []unsafe.Pointer) {
18 | 	for _, node := range n {
19 | 		pointers = append(pointers, node.NodePtr())
20 | 	}
21 | 	return
22 | }
23 | 
24 | // Produce a C.xmlXPathObjectPtr suitable for passing to libxml2
25 | func (n Nodeset) ToXPathNodeset() (ret C.xmlXPathObjectPtr) {
26 | 	ret = C.xmlXPathNewNodeSet(nil)
27 | 	for _, node := range n {
28 | 		C.xmlXPathNodeSetAdd(ret.nodesetval, (*C.xmlNode)(node.NodePtr()))
29 | 	}
30 | 	return
31 | }
32 | 
33 | // Produce a C.xmlXPathObjectPtr marked as a ResultValueTree, suitable for passing to libxml2
34 | func (n Nodeset) ToXPathValueTree() (ret C.xmlXPathObjectPtr) {
35 | 	if len(n) == 0 {
36 | 		ret = C.xmlXPathNewValueTree(nil)
37 | 		return
38 | 	}
39 | 
40 | 	ret = C.xmlXPathNewValueTree(nil)
41 | 	for _, node := range n {
42 | 		C.xmlXPathNodeSetAdd(ret.nodesetval, (*C.xmlNode)(node.NodePtr()))
43 | 	}
44 | 	//this hack-ish looking line tells libxml2 not to free the RVT
45 | 	//if we don't do this we get horrible double-free crashes everywhere
46 | 	ret.boolval = 0
47 | 	return
48 | }
49 | 


--------------------------------------------------------------------------------
/html/helper.c:
--------------------------------------------------------------------------------
 1 | #include "helper.h"
 2 | #include "../xml/helper.h"
 3 | #include <string.h>
 4 | 
 5 | htmlDocPtr htmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) {
 6 | 	const char *c_buffer       = (char*)buffer;
 7 | 	const char *c_url          = (char*)url;
 8 | 	const char *c_encoding     = (char*)encoding;
 9 | 	xmlDoc *doc = NULL;
10 | 
11 | 	xmlResetLastError();
12 | 	doc = htmlReadMemory(c_buffer, buffer_len, c_url, c_encoding, options);
13 | 
14 | 	return doc;
15 | }
16 | 
17 | xmlNode* htmlParseFragment(void *doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len) {
18 | 	xmlNode* root_element = NULL;
19 | 	xmlParserErrors errCode;
20 | 	errCode = xmlParseInNodeContext((xmlNodePtr)doc, buffer, buffer_len, options, &root_element);
21 | 	if (errCode != XML_ERR_OK) {
22 | 		return NULL;
23 | 	}
24 | 	return root_element;
25 | }
26 | 
27 | xmlNode* htmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) {
28 | 	xmlDoc* tmpDoc = NULL;
29 | 	xmlNode* tmpRoot = NULL;
30 | 	tmpDoc = htmlReadMemory((char*)buffer, buffer_len, (char*)url, (char*)encoding, options);
31 | 	if (tmpDoc == NULL) {
32 | 		return NULL;
33 | 	}
34 | 	tmpRoot = xmlDocGetRootElement(tmpDoc);
35 | 	if (tmpRoot == NULL) {
36 | 		return NULL;
37 | 	}
38 | 	tmpRoot = xmlDocCopyNode(tmpRoot, doc, 1);
39 | 	xmlFreeDoc(tmpDoc);
40 | 	return tmpRoot;
41 | }
42 | 


--------------------------------------------------------------------------------
/xml/helper.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CHELPER_H__
 2 | #define __CHELPER_H__
 3 | 
 4 | #include <libxml/tree.h>
 5 | #include <libxml/parser.h>
 6 | #include <libxml/HTMLtree.h>
 7 | #include <libxml/HTMLparser.h>
 8 | #include <libxml/xmlsave.h>
 9 | #include <libxml/xpath.h>
10 | #include <libxml/debugXML.h>
11 | 
12 | xmlDoc* xmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int errror_buffer_len);
13 | xmlNode* xmlParseFragment(void* doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len);
14 | xmlNode* xmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len);
15 | int xmlSaveNode(void *wbuffer, void *node, void *encoding, int options);
16 | void xmlRemoveDefaultNamespace(xmlNode *node);
17 | 
18 | void xmlSetContent(void *gonode, void *node, void *content);
19 | 
20 | xmlDoc* newEmptyXmlDoc();
21 | xmlElementType getNodeType(xmlNode *node);
22 | char *xmlDocDumpToString(xmlDoc *doc, void *encoding, int format);
23 | char *htmlDocDumpToString(xmlDoc *doc, int format);
24 | void xmlFreeChars(char *buffer);
25 | int xmlUnlinkNodeWithCheck(xmlNode *node);
26 | int xmlNodePtrCheck(void *node);
27 | void xmlNodeWriteCallback(void *buffer, void *data, int data_len);
28 | void xmlUnlinkNodeCallback(void *nodePtr, void *gonodePtr);
29 | 
30 | typedef struct XmlBufferContext {
31 | 	void *obj;
32 | 	char *buffer;
33 | 	int buffer_len;
34 | 	int data_size;
35 | } XmlBufferContext;
36 | 
37 | #endif //__CHELPER_H__
38 | 


--------------------------------------------------------------------------------
/html/encoding_test.go:
--------------------------------------------------------------------------------
 1 | package html
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"io/ioutil"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func TestParseDocument_CP1252(t *testing.T) {
10 | 	input, err := ioutil.ReadFile("./tests/document/encoding/input.html")
11 | 	if err != nil {
12 | 		t.Error("err:", err.Error())
13 | 		return
14 | 	}
15 | 	doc, err := Parse(input, []byte("windows-1252"), nil, DefaultParseOption, DefaultEncodingBytes)
16 | 	if err != nil {
17 | 		t.Error("err:", err.Error())
18 | 		return
19 | 	}
20 | 	out := doc.String()
21 | 	if index := bytes.IndexByte([]byte(out), byte(146)); index >= 0 {
22 | 		t.Error("the output is not properly encoded")
23 | 	}
24 | 	doc.Free()
25 | 	CheckXmlMemoryLeaks(t)
26 | }
27 | 
28 | func TestParseDocumentWithInOutEncodings(t *testing.T) {
29 | 	println("Starting to read input file.")
30 | 	input, err := ioutil.ReadFile("./tests/document/encoding/input.html")
31 | 	if err != nil {
32 | 		t.Error("err:", err.Error())
33 | 		return
34 | 	}
35 | 	println("Succesfully read input file, beginning parsing.")
36 | 	doc, err := Parse(input, []byte("windows-1252"), nil, DefaultParseOption, []byte("windows-1252"))
37 | 	if err != nil {
38 | 		t.Error("err:", err.Error())
39 | 		return
40 | 	}
41 | 	println("Successfully parsed, getting document as a string...")
42 | 	out := doc.String()
43 | 	if index := bytes.IndexByte([]byte(out), byte(146)); index < 0 {
44 | 		t.Error("the output is not properly encoded")
45 | 	}
46 | 
47 | 	println("Test complete, about to free document.")
48 | 	doc.Free()
49 | 	println("Successfully freed document, checking for memory leaks...")
50 | 	CheckXmlMemoryLeaks(t)
51 | 	println("Finished checking for leaks.")
52 | }
53 | 


--------------------------------------------------------------------------------
/gokogiri.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | The gokogiri package provides a Go interface to the libxml2 library.
 3 | 
 4 | It is inspired by the ruby-based Nokogiri API, and allows one to parse, manipulate, and create HTML and XML
 5 | documents. Nodes can be selected using either CSS selectors (in much the same fashion as jQuery) or XPath 1.0 expressions,
 6 | and a simple DOM-like inteface allows for building up documents from scratch.
 7 | */
 8 | package gokogiri
 9 | 
10 | import (
11 | 	"github.com/moovweb/gokogiri/html"
12 | 	"github.com/moovweb/gokogiri/xml"
13 | )
14 | 
15 | /*
16 | ParseHtml parses an UTF-8 encoded byte array and returns an html.HtmlDocument. It uses parsing default options that ignore
17 | errors or warnings, making it suitable for the poorly-formed 'tag soup' often found on the web.
18 | 
19 | If the content is not UTF-8 encoded or you want to customize the parsing options, you should call html.Parse directly.
20 | */
21 | func ParseHtml(content []byte) (doc *html.HtmlDocument, err error) {
22 | 	return html.Parse(content, html.DefaultEncodingBytes, nil, html.DefaultParseOption, html.DefaultEncodingBytes)
23 | }
24 | 
25 | /*
26 | ParseXml parses an UTF-8 encoded byte array and returns an xml.XmlDocument. By default the parsing options ignore validation
27 | and suppress errors and warnings. This allows one to liberal in accepting badly-formed documents, but is not standards-compliant.
28 | 
29 | If the content is not UTF-8 encoded or you want to customize the parsing options, you should call the Parse or ReadFile functions
30 | found in the github.com/moovweb/gokogiri/xml package. The xml.StrictParsingOption is conveniently provided for standards-compliant
31 | behaviour.
32 | */
33 | func ParseXml(content []byte) (doc *xml.XmlDocument, err error) {
34 | 	return xml.Parse(content, xml.DefaultEncodingBytes, nil, xml.DefaultParseOption, xml.DefaultEncodingBytes)
35 | }
36 | 


--------------------------------------------------------------------------------
/css/test/outputs-global:
--------------------------------------------------------------------------------
 1 | /descendant-or-self::*/*[self::div]
 2 | /child::*[self::div]
 3 | /descendant-or-self::*/*[self::div] | /child::*[self::span]
 4 | /descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ")]
 5 | /descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ") and contains(concat(" ", @class, " "), " bar ")]
 6 | /descendant-or-self::*/*[self::div and @id="foo"]
 7 | /descendant-or-self::*/*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"]
 8 | /child::*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"]
 9 | /descendant-or-self::*/*[contains(concat(" ", @class, " "), " bar ")]
10 | /descendant-or-self::*/*[position()=1]
11 | /descendant-or-self::*/*[self::div and position()=1]
12 | /descendant-or-self::*/*[self::div and position() mod 2 = 1]
13 | /descendant-or-self::*/*[self::div and position() mod 2 = 0]
14 | /descendant-or-self::*/*[self::div and (position() - 1) mod 2 = 0]
15 | /descendant-or-self::*/*[self::div and (position() + 6) mod -3 = 0]
16 | /descendant-or-self::*/*[self::div][position() = 5]
17 | /descendant-or-self::*/*[position() = 4]
18 | /descendant-or-self::*/*[self::div]/descendant-or-self::*/*[position() = 2]
19 | /descendant-or-self::*/*[self::div and @a='b']
20 | /descendant-or-self::*/*[self::div and contains(concat(" ", @a, " "), concat(" ", 'b', " "))]
21 | /descendant-or-self::*/*[self::div and (@a='b' or starts-with(@a, concat('b', "-")))]
22 | /descendant-or-self::*/*[self::div and contains(@a, 'b')]
23 | /descendant-or-self::*/*[self::div and starts-with(@a, 'b')]
24 | /descendant-or-self::*/*[self::div]/descendant-or-self::*/*[substring(@a, string-length(@a) - string-length('b') + 1) = 'b']
25 | /child::*[position() = 1 and position() = last()]
26 | /descendant-or-self::*/*[self::div and @a='b'][position()=1 and contains(concat(" ", @class, " "), " foo ")]
27 | /descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " bar ") and not(@id="foo" and position()=1)]


--------------------------------------------------------------------------------
/css/test/outputs-local:
--------------------------------------------------------------------------------
 1 | ./descendant-or-self::*/*[self::div]
 2 | ./child::*[self::div]
 3 | ./descendant-or-self::*/*[self::div] | ./child::*[self::span]
 4 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ")]
 5 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ") and contains(concat(" ", @class, " "), " bar ")]
 6 | ./descendant-or-self::*/*[self::div and @id="foo"]
 7 | ./descendant-or-self::*/*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"]
 8 | ./child::*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"]
 9 | ./descendant-or-self::*/*[contains(concat(" ", @class, " "), " bar ")]
10 | ./descendant-or-self::*/*[position()=1]
11 | ./descendant-or-self::*/*[self::div and position()=1]
12 | ./descendant-or-self::*/*[self::div and position() mod 2 = 1]
13 | ./descendant-or-self::*/*[self::div and position() mod 2 = 0]
14 | ./descendant-or-self::*/*[self::div and (position() - 1) mod 2 = 0]
15 | ./descendant-or-self::*/*[self::div and (position() + 6) mod -3 = 0]
16 | ./descendant-or-self::*/*[self::div][position() = 5]
17 | ./descendant-or-self::*/*[position() = 4]
18 | ./descendant-or-self::*/*[self::div]/descendant-or-self::*/*[position() = 2]
19 | ./descendant-or-self::*/*[self::div and @a='b']
20 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @a, " "), concat(" ", 'b', " "))]
21 | ./descendant-or-self::*/*[self::div and (@a='b' or starts-with(@a, concat('b', "-")))]
22 | ./descendant-or-self::*/*[self::div and contains(@a, 'b')]
23 | ./descendant-or-self::*/*[self::div and starts-with(@a, 'b')]
24 | ./descendant-or-self::*/*[self::div]/descendant-or-self::*/*[substring(@a, string-length(@a) - string-length('b') + 1) = 'b']
25 | ./child::*[position() = 1 and position() = last()]
26 | ./descendant-or-self::*/*[self::div and @a='b'][position()=1 and contains(concat(" ", @class, " "), " foo ")]
27 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " bar ") and not(@id="foo" and position()=1)]


--------------------------------------------------------------------------------
/xpath/expression.go:
--------------------------------------------------------------------------------
 1 | package xpath
 2 | 
 3 | /*
 4 | #include <libxml/xpath.h>
 5 | #include <libxml/xpathInternals.h>
 6 | #include <string.h>
 7 | 
 8 | void check_xpath_syntax_noop(void *ctx, const char *fmt, ...) {
 9 | }
10 | 
11 | char *check_xpath_syntax(const char *xpath) {
12 | 	xmlGenericErrorFunc err_func = check_xpath_syntax_noop;
13 | 	initGenericErrorDefaultFunc(&err_func);
14 | 	xmlResetLastError();
15 | 	xmlXPathCompile((const xmlChar *)xpath);
16 | 	xmlErrorPtr err = xmlGetLastError();
17 | 	if (err != NULL) {
18 | 		if (err->code == XML_XPATH_EXPR_ERROR) {
19 | 			// TODO: Not the cleanest but should scale well
20 | 			int size = strlen(err->message) + strlen(err->str1) + err->int1 + 16;
21 | 			char *msg = malloc(size);
22 | 			sprintf(msg, "%s%s\n%*s^", err->message, err->str1, err->int1, " ");
23 | 			return msg;
24 | 		} else {
25 | 			char *msg = malloc(strlen(err->message));
26 | 			sprintf(msg, "%s", err->message);
27 | 			return msg;
28 | 		}
29 | 	}
30 | 	return NULL;
31 | }
32 | */
33 | import "C"
34 | import "unsafe"
35 | import . "github.com/moovweb/gokogiri/util"
36 | 
37 | //import "runtime"
38 | import "errors"
39 | 
40 | type Expression struct {
41 | 	Ptr   *C.xmlXPathCompExpr
42 | 	xpath string
43 | }
44 | 
45 | func Check(path string) (err error) {
46 | 	str := C.CString(path)
47 | 	defer C.free(unsafe.Pointer(str))
48 | 	cstr := C.check_xpath_syntax(str)
49 | 	if cstr != nil {
50 | 		defer C.free(unsafe.Pointer(cstr))
51 | 		err = errors.New(C.GoString(cstr))
52 | 	}
53 | 	return
54 | }
55 | 
56 | func Compile(path string) (expr *Expression) {
57 | 	if len(path) == 0 {
58 | 		return
59 | 	}
60 | 
61 | 	xpathBytes := GetCString([]byte(path))
62 | 	xpathPtr := unsafe.Pointer(&xpathBytes[0])
63 | 	ptr := C.xmlXPathCompile((*C.xmlChar)(xpathPtr))
64 | 	if ptr == nil {
65 | 		return
66 | 	}
67 | 	expr = &Expression{Ptr: ptr, xpath: path}
68 | 	//runtime.SetFinalizer(expr, (*Expression).Free)
69 | 	return
70 | }
71 | 
72 | func (exp *Expression) String() string {
73 | 	return exp.xpath
74 | }
75 | 
76 | func (exp *Expression) Free() {
77 | 	if exp.Ptr != nil {
78 | 		C.xmlXPathFreeCompExpr(exp.Ptr)
79 | 		exp.Ptr = nil
80 | 	}
81 | }
82 | 


--------------------------------------------------------------------------------
/html/utils_test.go:
--------------------------------------------------------------------------------
 1 | package html
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/moovweb/gokogiri/help"
 6 | 	"io/ioutil"
 7 | 	"path/filepath"
 8 | 	"strings"
 9 | 	"testing"
10 | )
11 | 
12 | func badOutput(actual string, expected string) {
13 | 	fmt.Printf("Got:\n[%v]\n", actual)
14 | 	fmt.Printf("Expected:\n[%v]\n", expected)
15 | }
16 | 
17 | func getTestData(name string) (input []byte, output []byte, error string) {
18 | 	var errorMessage string
19 | 	offset := "\t"
20 | 	inputFile := filepath.Join(name, "input.txt")
21 | 
22 | 	input, err := ioutil.ReadFile(inputFile)
23 | 
24 | 	if err != nil {
25 | 		errorMessage += fmt.Sprintf("%vCouldn't read test (%v) input:\n%v\n", offset, name, offset+err.Error())
26 | 	}
27 | 
28 | 	output, err = ioutil.ReadFile(filepath.Join(name, "output.txt"))
29 | 
30 | 	if err != nil {
31 | 		errorMessage += fmt.Sprintf("%vCouldn't read test (%v) output:\n%v\n", offset, name, offset+err.Error())
32 | 	}
33 | 
34 | 	return input, output, errorMessage
35 | }
36 | 
37 | func collectTests(suite string) (names []string, error string) {
38 | 	testPath := filepath.Join("tests", suite)
39 | 	entries, err := ioutil.ReadDir(testPath)
40 | 
41 | 	if err != nil {
42 | 		return nil, fmt.Sprintf("Couldn't read tests:\n%v\n", err.Error())
43 | 	}
44 | 
45 | 	for _, entry := range entries {
46 | 		if strings.HasPrefix(entry.Name(), "_") || strings.HasPrefix(entry.Name(), ".") {
47 | 			continue
48 | 		}
49 | 
50 | 		if entry.IsDir() {
51 | 			names = append(names, filepath.Join(testPath, entry.Name()))
52 | 		}
53 | 	}
54 | 
55 | 	return
56 | }
57 | 
58 | func CheckXmlMemoryLeaks(t *testing.T) {
59 | 	// LibxmlCleanUpParser() should only be called once during the lifetime of the
60 | 	// program, but because there's no way to know when the last test of the suite
61 | 	// runs in go, we can't accurately call it strictly once, so just avoid calling
62 | 	// it for now because it's known to cause crashes if called multiple times.
63 | 	//println("Cleaning up parser...")
64 | 	//help.LibxmlCleanUpParser()
65 | 
66 | 	println("Checking for libxml leaks...")
67 | 	if !help.LibxmlCheckMemoryLeak() {
68 | 		println("Found memory leaks!")
69 | 		t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation())
70 | 		help.LibxmlReportMemoryLeak()
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/help/help.go:
--------------------------------------------------------------------------------
 1 | package help
 2 | 
 3 | /*
 4 | #cgo pkg-config: libxml-2.0
 5 | 
 6 | #include <libxml/tree.h>
 7 | #include <libxml/parser.h>
 8 | #include <libxml/HTMLtree.h>
 9 | #include <libxml/HTMLparser.h>
10 | #include <libxml/xmlsave.h>
11 | 
12 | void printMemoryLeak() { xmlMemDisplay(stdout); }
13 | */
14 | import "C"
15 | 
16 | import (
17 | 	"sync"
18 | 	"sync/atomic"
19 | )
20 | 
21 | /**
22 | * With regards to Thread Safety
23 | *
24 | * xmlInitParser and xmlCleanupParser need to be called *once* each during the
25 | * lifetime of the program, regardless of how many documents you parse.
26 | *
27 | * xmlInitParser should be called at the very beginning before doing anything
28 | *   parser related.  Luckly, using the call below, we can guarantee that by
29 | *   making sure it gets called exactly once if anyone uses any gokogiri
30 | *   related functions.
31 | *
32 | * xmlCleanupParser is trickier because it also can only be called once, but it
33 | *   should strictly be called at the very end of program execution, after we're
34 | *   sure that no more documents will be parsed.  If it's ever called, and a new
35 | *   document is parsed, there is a potential for a segfault.
36 | *
37 | * For more information:
38 | *
39 | * http://www.xmlsoft.org/threads.html
40 | * http://www.xmlsoft.org/FAQ.html#Developer (In particular, question #7)
41 | **/
42 | 
43 | var once sync.Once
44 | var cleaned = new(int32)
45 | 
46 | func LibxmlInitParser() {
47 | 	if called_clean := atomic.LoadInt32(cleaned); called_clean != 0 {
48 | 		panic("LibxmlCleanUpParser has been called.  Please make sure you only " +
49 | 			"call it if no more document parsing will take place.")
50 | 	}
51 | 	once.Do(func() { C.xmlInitParser() })
52 | }
53 | 
54 | func LibxmlCleanUpParser() {
55 | 	// Because of our test structure, this method is called several
56 | 	// times during a test run (but it should only be called once
57 | 	// during the lifetime of the program).
58 | 	once.Do(func() {
59 | 		atomic.AddInt32(cleaned, 1)
60 | 		C.xmlCleanupParser()
61 | 	})
62 | }
63 | 
64 | func LibxmlGetMemoryAllocation() int {
65 | 	return (int)(C.xmlMemBlocks())
66 | }
67 | 
68 | func LibxmlCheckMemoryLeak() bool {
69 | 	return (C.xmlMemBlocks() == 0)
70 | }
71 | 
72 | func LibxmlReportMemoryLeak() {
73 | 	C.printMemoryLeak()
74 | }
75 | 


--------------------------------------------------------------------------------
/gokogiri_test.go:
--------------------------------------------------------------------------------
 1 | package gokogiri
 2 | 
 3 | import (
 4 | 	"github.com/moovweb/gokogiri/help"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestParseHtml(t *testing.T) {
 9 | 	input := "<html><body><div><h1></div>"
10 | 	expected := `<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
11 | <html><body><div><h1></h1></div></body></html>
12 | `
13 | 	doc, err := ParseHtml([]byte(input))
14 | 	if err != nil {
15 | 		t.Error("Parsing has error:", err)
16 | 		return
17 | 	}
18 | 	if doc.String() != expected {
19 | 		t.Error("the output of the html doc does not match the expected")
20 | 	}
21 | 
22 | 	expected = `<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
23 | <html>
24 | <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
25 | <body><div><h1></h1></div></body>
26 | </html>
27 | `
28 | 	doc.Root().FirstChild().AddPreviousSibling("<head></head>")
29 | 
30 | 	if doc.String() != expected {
31 | 		println(doc.String())
32 | 		t.Error("the output of the html doc does not match the expected")
33 | 	}
34 | 	doc.Free()
35 | 	CheckXmlMemoryLeaks(t)
36 | }
37 | 
38 | func TestParseXml(t *testing.T) {
39 | 	input := "<foo></foo>"
40 | 	expected := `<?xml version="1.0" encoding="utf-8"?>
41 | <foo/>
42 | `
43 | 	doc, err := ParseXml([]byte(input))
44 | 	if err != nil {
45 | 		t.Error("Parsing has error:", err)
46 | 		return
47 | 	}
48 | 
49 | 	if doc.String() != expected {
50 | 		t.Error("the output of the xml doc does not match the expected")
51 | 	}
52 | 
53 | 	expected = `<?xml version="1.0" encoding="utf-8"?>
54 | <foo>
55 |   <bar/>
56 | </foo>
57 | `
58 | 	doc.Root().AddChild("<bar/>")
59 | 	if doc.String() != expected {
60 | 		t.Error("the output of the xml doc does not match the expected")
61 | 	}
62 | 	doc.Free()
63 | 	CheckXmlMemoryLeaks(t)
64 | }
65 | 
66 | func CheckXmlMemoryLeaks(t *testing.T) {
67 | 	// LibxmlCleanUpParser() should only be called once during the lifetime of the
68 | 	// program, but because there's no way to know when the last test of the suite
69 | 	// runs in go, we can't accurately call it strictly once, so just avoid calling
70 | 	// it for now because it's known to cause crashes if called multiple times.
71 | 	//help.LibxmlCleanUpParser()
72 | 
73 | 	if !help.LibxmlCheckMemoryLeak() {
74 | 		t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation())
75 | 		help.LibxmlReportMemoryLeak()
76 | 	}
77 | }
78 | 


--------------------------------------------------------------------------------
/mem/libxml.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <time.h>
  5 | 
  6 | #include <libxml/xmlmemory.h>
  7 | 
  8 | //#define TRACE_MEM
  9 | //#define CUSTOM_GC
 10 | 
 11 | unsigned long alloc_count = 0;
 12 | 
 13 | #ifndef strdup
 14 | char *strdup (const char *str) {
 15 | 	char *new = malloc(strlen(str));
 16 | 	strcpy(new, str);
 17 | 	return new;
 18 | }
 19 | #endif
 20 | 
 21 | #ifdef CUSTOM_GC
 22 | #pragma pack(push)
 23 | #pragma pack(1)
 24 | typedef struct go_xml_allocation {
 25 | 	size_t size;
 26 | 	struct timespec timestamp;
 27 | 	void *p;
 28 | } go_xml_allocation;
 29 | #pragma pack(pop)
 30 | #endif
 31 | 
 32 | unsigned long libxmlGoAllocSize() {
 33 | 	if (alloc_count > 0) {
 34 | 		xmlCleanupParser();
 35 | 	}
 36 | 	return alloc_count;
 37 | }
 38 | 
 39 | void libxmlGoFree(void *p) {
 40 | 	alloc_count--;
 41 | #ifdef CUSTOM_GC
 42 | 	go_xml_allocation *gxa = (go_xml_allocation *)(p - sizeof(go_xml_allocation));
 43 | 	fprintf(stderr, "Freeing %lu bytes @ %p created at: %lu\n", gxa->size, gxa->p, gxa->timestamp.tv_nsec);
 44 | 	return free(gxa);
 45 | #else
 46 | #ifdef TRACE_MEM
 47 | 	fprintf(stderr, "%08lu Free %p\n", alloc_count, p);
 48 | #endif
 49 | 	return free(p);
 50 | #endif
 51 | }
 52 | 
 53 | void *libxmlGoMalloc(int size) {
 54 | 	alloc_count++;
 55 | #ifdef CUSTOM_GC
 56 | 	go_xml_allocation *gxa = (go_xml_allocation *)malloc(size + sizeof(go_xml_allocation));
 57 | 	gxa->p = (void *)gxa + sizeof(go_xml_allocation);
 58 | 	gxa->size = size;
 59 | 	clock_gettime(CLOCK_REALTIME, &(gxa->timestamp));
 60 | 	fprintf(stderr, "Allocated %lu bytes @ %p timestamp: %lu\n", gxa->size, gxa->p, gxa->timestamp.tv_nsec);
 61 | 	return gxa->p;
 62 | #else
 63 | #ifdef TRACE_MEM
 64 | 	fprintf(stderr, "%08lu Malloc %d\n", alloc_count, size);
 65 | #endif
 66 | 	return malloc(size);
 67 | #endif
 68 | }
 69 | 
 70 | void *libxmlGoRealloc(void *p, int size) {
 71 | #ifdef TRACE_MEM
 72 | 	fprintf(stderr, "Realloc %p, %d\n", p, size);
 73 | #endif
 74 | 	return realloc(p, size);
 75 | }
 76 | 
 77 | void *libxmlGoStrDup(void *p) {
 78 | 	alloc_count++;
 79 | #ifdef TRACE_MEM
 80 | 	fprintf(stderr, "%08lu StrDup %p\n", alloc_count, p);
 81 | #endif
 82 | 	return strdup(p);
 83 | }
 84 | 
 85 | void libxmlGoInit() {
 86 | #ifndef WINDOWS
 87 | 	//fprintf(stderr, "Running xmlMemSetup()...\n");
 88 | 	xmlMemSetup(
 89 | 		(xmlFreeFunc)libxmlGoFree, 
 90 | 		(xmlMallocFunc)libxmlGoMalloc, 
 91 | 		(xmlReallocFunc)libxmlGoRealloc,
 92 |       	(xmlStrdupFunc)libxmlGoStrDup
 93 | 	);
 94 | #endif
 95 | 
 96 | 	//char *_LIBXML_VERSION = strdup(LIBXML_DOTTED_VERSION);
 97 | 	//char *_LIBXML_PARSER_VERSION = strdup(xmlParserVersion);
 98 | 	//fprintf(stderr, "LIBXML_VERSION: %s\n", _LIBXML_VERSION);
 99 | 	//fprintf(stderr, "LIBXML_PARSER_VERSION: %s\n", _LIBXML_PARSER_VERSION);
100 | 
101 | #ifdef LIBXML_ICONV_ENABLED
102 | 	//fprintf(stderr, "LIBXML_ICONV_ENABLED: %s\n", "true");
103 | #else
104 | 	//fprintf(stderr, "LIBXML_ICONV_ENABLED: %s\n", "false");
105 | #endif
106 | 
107 | 	//xmlInitParser();
108 | }
109 | 
110 | 


--------------------------------------------------------------------------------
/html/fragment.go:
--------------------------------------------------------------------------------
 1 | package html
 2 | 
 3 | //#include "helper.h"
 4 | import "C"
 5 | import (
 6 | 	"bytes"
 7 | 	"errors"
 8 | 	. "github.com/moovweb/gokogiri/util"
 9 | 	"github.com/moovweb/gokogiri/xml"
10 | 	"unsafe"
11 | )
12 | 
13 | var fragmentWrapperStart = []byte("<div>")
14 | var fragmentWrapperEnd = []byte("</div>")
15 | var fragmentWrapper = []byte("<html><body>")
16 | var bodySigBytes = []byte("<body")
17 | 
18 | var ErrFailParseFragment = errors.New("failed to parse html fragment")
19 | var ErrEmptyFragment = errors.New("empty html fragment")
20 | 
21 | const initChildrenNumber = 4
22 | 
23 | func parsefragment(document xml.Document, node *xml.XmlNode, content, url []byte, options xml.ParseOption) (fragment *xml.DocumentFragment, err error) {
24 | 	//set up pointers before calling the C function
25 | 	var contentPtr, urlPtr unsafe.Pointer
26 | 	if len(url) > 0 {
27 | 		urlPtr = unsafe.Pointer(&url[0])
28 | 	}
29 | 
30 | 	var root xml.Node
31 | 	if node == nil {
32 | 		containBody := (bytes.Index(content, bodySigBytes) >= 0)
33 | 
34 | 		content = append(fragmentWrapper, content...)
35 | 		contentPtr = unsafe.Pointer(&content[0])
36 | 		contentLen := len(content)
37 | 
38 | 		inEncoding := document.InputEncoding()
39 | 		var encodingPtr unsafe.Pointer
40 | 		if len(inEncoding) > 0 {
41 | 			encodingPtr = unsafe.Pointer(&inEncoding[0])
42 | 		}
43 | 		htmlPtr := C.htmlParseFragmentAsDoc(document.DocPtr(), contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0)
44 | 
45 | 		//Note we've parsed the fragment within the given document
46 | 		//the root is not the root of the document; rather it's the root of the subtree from the fragment
47 | 		html := xml.NewNode(unsafe.Pointer(htmlPtr), document)
48 | 
49 | 		if html == nil {
50 | 			err = ErrFailParseFragment
51 | 			return
52 | 		}
53 | 		root = html
54 | 
55 | 		if !containBody {
56 | 			root = html.FirstChild()
57 | 			html.AddPreviousSibling(root)
58 | 			html.Remove() //remove html otherwise it's leaked
59 | 		}
60 | 	} else {
61 | 		//wrap the content
62 | 		newContent := append(fragmentWrapperStart, content...)
63 | 		newContent = append(newContent, fragmentWrapperEnd...)
64 | 		contentPtr = unsafe.Pointer(&newContent[0])
65 | 		contentLen := len(newContent)
66 | 		rootElementPtr := C.htmlParseFragment(node.NodePtr(), contentPtr, C.int(contentLen), urlPtr, C.int(options), nil, 0)
67 | 		if rootElementPtr == nil {
68 | 			//try to parse it as a doc
69 | 			fragment, err = parsefragment(document, nil, content, url, options)
70 | 			return
71 | 		}
72 | 		if rootElementPtr == nil {
73 | 			err = ErrFailParseFragment
74 | 			return
75 | 		}
76 | 		root = xml.NewNode(unsafe.Pointer(rootElementPtr), document)
77 | 	}
78 | 
79 | 	fragment = &xml.DocumentFragment{}
80 | 	fragment.Node = root
81 | 	fragment.InEncoding = document.InputEncoding()
82 | 	fragment.OutEncoding = document.OutputEncoding()
83 | 
84 | 	document.BookkeepFragment(fragment)
85 | 	return
86 | }
87 | 
88 | func ParseFragment(content, inEncoding, url []byte, options xml.ParseOption, outEncoding []byte) (fragment *xml.DocumentFragment, err error) {
89 | 	inEncoding = AppendCStringTerminator(inEncoding)
90 | 	outEncoding = AppendCStringTerminator(outEncoding)
91 | 	document := CreateEmptyDocument(inEncoding, outEncoding)
92 | 	fragment, err = parsefragment(document, nil, content, url, options)
93 | 	return
94 | }
95 | 


--------------------------------------------------------------------------------
/html/document_test.go:
--------------------------------------------------------------------------------
  1 | package html
  2 | 
  3 | import "testing"
  4 | import "fmt"
  5 | 
  6 | func TestParseDocument(t *testing.T) {
  7 | 	expected :=
  8 | 		`<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
  9 | <html><body><div><h1></h1></div></body></html>
 10 | `
 11 | 	expected_xml :=
 12 | 		`<?xml version="1.0" encoding="utf-8" standalone="yes"?>
 13 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 14 | <html>
 15 |   <body>
 16 |     <div>
 17 |       <h1/>
 18 |     </div>
 19 |   </body>
 20 | </html>
 21 | `
 22 | 	doc, err := Parse([]byte("<html><body><div><h1></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 23 | 
 24 | 	if err != nil {
 25 | 		t.Error("Parsing has error:", err)
 26 | 		return
 27 | 	}
 28 | 
 29 | 	if doc.String() != expected {
 30 | 		println("got:\n", doc.String())
 31 | 		println("expected:\n", expected)
 32 | 		t.Error("the output of the html doc does not match")
 33 | 	}
 34 | 
 35 | 	s, _ := doc.ToXml(nil, nil)
 36 | 	if string(s) != expected_xml {
 37 | 		println("got:\n", string(s))
 38 | 		println("expected:\n", expected_xml)
 39 | 		t.Error("the xml output of the html doc does not match")
 40 | 	}
 41 | 
 42 | 	doc.Free()
 43 | 	CheckXmlMemoryLeaks(t)
 44 | }
 45 | 
 46 | func TestEmptyDocument(t *testing.T) {
 47 | 	expected :=
 48 | 		`<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 49 | 
 50 | `
 51 | 	doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 52 | 
 53 | 	if err != nil {
 54 | 		t.Error("Parsing has error:", err)
 55 | 		return
 56 | 	}
 57 | 
 58 | 	if doc.String() != expected {
 59 | 		println(doc.String())
 60 | 		t.Error("the output of the html doc does not match the empty xml")
 61 | 	}
 62 | 	doc.Free()
 63 | 	CheckXmlMemoryLeaks(t)
 64 | }
 65 | 
 66 | func TestNodeById(t *testing.T) {
 67 | 	html := "<html><head></head><body><div id='yup'>success</div><div id='nope'>fail</div></body></html>"
 68 | 	doc, _ := Parse([]byte(html), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 69 | 	p := doc.NodeById("yup")
 70 | 	if p == nil {
 71 | 		t.Errorf("Did not find node by ID!")
 72 | 		return
 73 | 	}
 74 | 	output := fmt.Sprintf("%v", p.Content())
 75 | 	if output != "success" {
 76 | 		t.Errorf("Incorrect node selected by ID!")
 77 | 	}
 78 | }
 79 | 
 80 | /*
 81 | func TestHTMLFragmentEncoding(t *testing.T) {
 82 | 	defer CheckXmlMemoryLeaks(t)
 83 | 
 84 | 	input, output, error := getTestData(filepath.Join("tests", "document", "html_fragment_encoding"))
 85 | 
 86 | 	if len(error) > 0 {
 87 | 		t.Errorf("Error gathering test data for %v:\n%v\n", "html_fragment_encoding", error)
 88 | 		t.FailNow()
 89 | 	}
 90 | 
 91 | 	expected := string(output)
 92 | 
 93 | 	inputEncodingBytes := []byte("utf-8")
 94 | 
 95 | 	buffer := make([]byte, 100)
 96 | 	fragment, err := ParseFragment([]byte(input), inputEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes, buffer)
 97 | 
 98 | 	if err != nil {
 99 | 		println("WHAT")
100 | 		t.Error(err.Error())
101 | 	}
102 | 
103 | 	if fragment.String() != expected {
104 | 		badOutput(fragment.String(), expected)
105 | 		t.Error("the output of the xml doc does not match")
106 | 	}
107 | 
108 | 	fragment.Node.MyDocument().Free()
109 | }
110 | */
111 | 


--------------------------------------------------------------------------------
/css/notes.txt:
--------------------------------------------------------------------------------
  1 | group         ->  selector (',' selector)*
  2 | 
  3 | selector      ->  '>'? sequence (combinator sequence)*
  4 | 
  5 | sequence      ->  spaces? element qualifier*
  6 |               ->  spaces? qualifier+
  7 | 
  8 | combinator    ->  spaces? ('+' | '~' | '>')
  9 |               ->  spaces
 10 | 
 11 | element       ->  universal | type
 12 | 
 13 | universal     ->  LEXEME ('*')
 14 | 
 15 | type          ->  LEXEME (\w+)
 16 | 
 17 | qualifier     ->  attribute
 18 |               ->  class
 19 |               ->  id
 20 |               ->  pseudo-class
 21 | 
 22 | attribute     ->  LEXEME (complicated regexp)
 23 | 
 24 | class         ->  LEXEME ('.' identifier)
 25 | 
 26 | id            ->  LEXEME ('#' identifier)
 27 | 
 28 | pseudo-class  ->  ':first-child'
 29 |               ->  ':first-of-type'
 30 |               ->  ':only-child'
 31 |               ->  ':only-of-type'
 32 |               ->  ':empty'
 33 |               ->  (':nth-child' | 'nth-of-type') '(' predicate ')'
 34 |               ->  ':not' '(' (element | qualifier) ')
 35 | 
 36 | predicate     ->  LEXEME(odd|even)
 37 |               ->  LEXEME([-+]?\d+n[-+]\d+)
 38 | 
 39 | 
 40 | 
 41 | 
 42 | <P> > <A>:first-child<B>
 43 | <P>/*[1][./self::<A> and <B>]<B>
 44 | #<P>//*/*[1][./self::<A>]<B>
 45 | 
 46 | <A>:first-child<B>
 47 | /*[position() = 1 and ./self::<A> and <B>]
 48 | //*/*[position = 1 and ./self::<A> and <B>]
 49 | 
 50 | 
 51 | div, DEEP
 52 | /descendant-or-self::*/*[./self::div]
 53 | 
 54 | div, FLAT
 55 | /child::*[./self::div]
 56 | 
 57 | div span
 58 | /descendant-or-self::*/*[./self::div] /descendant-or-self::*/*[./self::span]
 59 | 
 60 | div > span
 61 | /descendant-or-self::*/*[./self::div] /child::*[./self::span]
 62 | 
 63 | div ~ span
 64 | /descendant-or-self::*/*[./self::div] /following-sibling::*[./self::span]
 65 | 
 66 | div + span
 67 | /descendant-or-self::*/*[./self::div] /following-sibling::*[./self::span and position()=1]
 68 | 
 69 | div:first-child, DEEP
 70 | /descendant-or-self::*/*[./self::div and position()=1]
 71 | 
 72 | div:first-child, FLAT
 73 | /child::*[./self::div and position()=1]
 74 | 
 75 | div:first-of-type, DEEP
 76 | /descendant-or-self::*/*[./self::div][position()=1]
 77 | 
 78 | div:last-of-type, DEEP
 79 | /descendant-or-self::*/*[./self::div][position()=last()]
 80 | 
 81 | 
 82 | div:nth-child(odd), DEEP
 83 | /child::*[./self::div and position() mod 2 = 1]
 84 | 
 85 | div:nth-child(a), DEEP
 86 | /child::*[./self::div and position()=a]
 87 | 
 88 | 
 89 | 
 90 | <A>:first-child
 91 | descendant-or-self::*/*[./self::<A> and position()=1]
 92 | 
 93 | 
 94 | <A>:first-child:last-child
 95 | 
 96 | /*[position() = 1 and ./self::<A>]
 97 | 
 98 | 
 99 | foo + bar + hux --> foo [+ bar] [+ hux]
100 | foo/following-sibling::*[1]/self::bar/following-sibling::*[1]/self::hux
101 | 
102 | foo ~ bar ~ hux
103 | foo/following-sibling::bar/following-sibling::hux
104 | 
105 | foo bar
106 | foo//bar
107 | 
108 | foo > bar
109 | foo/bar
110 | 
111 | div.foo:nth-of-type(3)
112 | div[@class='foo'][3]
113 | 
114 | div:nth-child(3).foo
115 | 
116 | *[3][./self::div][@class='foo']
117 | 
118 | foo.bar:first-child
119 | *[1][./self::foo[@class='bar']]
120 | 
121 | <something>:first-child
122 | *[position()=1][./self::<something>]
123 | 
124 | <something>:not(:first-child)
125 | *[not(position()=1)][./self::<something>]
126 | 
127 | 
128 | div:nth-child(3)
129 | *[3][./self::div]
130 | 
131 | // div *[3][./self::
132 | 
133 | 
134 | :nth-child(an+b)
135 | *[(position() - b) mod a = 0]
136 | 
137 | 
138 | :not(div)
139 | *[not(./self::div)]
140 | 
141 | :first-child
142 | *[position() = 1]
143 | :nth-child(4)
144 | *[position() = 4]
145 | 
146 | :not(:first-child)
147 | *[not(position() = 1)]
148 | :not(:nth-child(4))
149 | *[not(position() = 4)]


--------------------------------------------------------------------------------
/xml/fragment.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | //#include "helper.h"
  4 | import "C"
  5 | import (
  6 | 	"errors"
  7 | 	. "github.com/moovweb/gokogiri/util"
  8 | 	"unsafe"
  9 | )
 10 | 
 11 | type DocumentFragment struct {
 12 | 	Node
 13 | 	InEncoding  []byte
 14 | 	OutEncoding []byte
 15 | }
 16 | 
 17 | var (
 18 | 	fragmentWrapperStart = []byte("<root>")
 19 | 	fragmentWrapperEnd   = []byte("</root>")
 20 | )
 21 | 
 22 | var ErrFailParseFragment = errors.New("failed to parse xml fragment")
 23 | var ErrEmptyFragment = errors.New("empty xml fragment")
 24 | 
 25 | const initChildrenNumber = 4
 26 | 
 27 | func parsefragment(document Document, node *XmlNode, content, url []byte, options ParseOption) (fragment *DocumentFragment, err error) {
 28 | 	//wrap the content before parsing
 29 | 	content = append(fragmentWrapperStart, content...)
 30 | 	content = append(content, fragmentWrapperEnd...)
 31 | 
 32 | 	//set up pointers before calling the C function
 33 | 	var contentPtr, urlPtr unsafe.Pointer
 34 | 	contentPtr = unsafe.Pointer(&content[0])
 35 | 	contentLen := len(content)
 36 | 	if len(url) > 0 {
 37 | 		url = AppendCStringTerminator(url)
 38 | 		urlPtr = unsafe.Pointer(&url[0])
 39 | 	}
 40 | 
 41 | 	var rootElementPtr *C.xmlNode
 42 | 
 43 | 	if node == nil {
 44 | 		inEncoding := document.InputEncoding()
 45 | 		var encodingPtr unsafe.Pointer
 46 | 		if len(inEncoding) > 0 {
 47 | 			encodingPtr = unsafe.Pointer(&inEncoding[0])
 48 | 		}
 49 | 		rootElementPtr = C.xmlParseFragmentAsDoc(document.DocPtr(), contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0)
 50 | 
 51 | 	} else {
 52 | 		rootElementPtr = C.xmlParseFragment(node.NodePtr(), contentPtr, C.int(contentLen), urlPtr, C.int(options), nil, 0)
 53 | 	}
 54 | 
 55 | 	//Note we've parsed the fragment within the given document
 56 | 	//the root is not the root of the document; rather it's the root of the subtree from the fragment
 57 | 	root := NewNode(unsafe.Pointer(rootElementPtr), document)
 58 | 
 59 | 	//the fragment was in invalid
 60 | 	if root == nil {
 61 | 		err = ErrFailParseFragment
 62 | 		return
 63 | 	}
 64 | 
 65 | 	fragment = &DocumentFragment{}
 66 | 	fragment.Node = root
 67 | 	fragment.InEncoding = document.InputEncoding()
 68 | 	fragment.OutEncoding = document.OutputEncoding()
 69 | 
 70 | 	document.BookkeepFragment(fragment)
 71 | 	return
 72 | }
 73 | 
 74 | func ParseFragment(content, inEncoding, url []byte, options ParseOption, outEncoding []byte) (fragment *DocumentFragment, err error) {
 75 | 	inEncoding = AppendCStringTerminator(inEncoding)
 76 | 	outEncoding = AppendCStringTerminator(outEncoding)
 77 | 	document := CreateEmptyDocument(inEncoding, outEncoding)
 78 | 	fragment, err = parsefragment(document, nil, content, url, options)
 79 | 	return
 80 | }
 81 | 
 82 | func (fragment *DocumentFragment) Remove() {
 83 | 	fragment.Node.Remove()
 84 | }
 85 | 
 86 | func (fragment *DocumentFragment) Children() []Node {
 87 | 	nodes := make([]Node, 0, initChildrenNumber)
 88 | 	child := fragment.FirstChild()
 89 | 	for ; child != nil; child = child.NextSibling() {
 90 | 		nodes = append(nodes, child)
 91 | 	}
 92 | 	return nodes
 93 | }
 94 | 
 95 | func (fragment *DocumentFragment) ToBuffer(outputBuffer []byte) []byte {
 96 | 	var b []byte
 97 | 	var size int
 98 | 	for _, node := range fragment.Children() {
 99 | 		if docType := node.MyDocument().DocType(); docType == XML_HTML_DOCUMENT_NODE {
100 | 			b, size = node.ToHtml(fragment.OutEncoding, nil)
101 | 		} else {
102 | 			b, size = node.ToXml(fragment.OutEncoding, nil)
103 | 		}
104 | 		outputBuffer = append(outputBuffer, b[:size]...)
105 | 	}
106 | 	return outputBuffer
107 | }
108 | 
109 | func (fragment *DocumentFragment) String() string {
110 | 	b := fragment.ToBuffer(nil)
111 | 	if b == nil {
112 | 		return ""
113 | 	}
114 | 	return string(b)
115 | }
116 | 


--------------------------------------------------------------------------------
/xml/fragment_test.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | import "testing"
  4 | 
  5 | func TestParseDocumentFragmentBasic(t *testing.T) {
  6 | 	defer CheckXmlMemoryLeaks(t)
  7 | 
  8 | 	doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
  9 | 	if err != nil {
 10 | 		t.Error("parsing error:", err.Error())
 11 | 		return
 12 | 	}
 13 | 	root := doc.Root()
 14 | 	if root != nil {
 15 | 		println("root:", root.String())
 16 | 	}
 17 | 	docFragment, err := doc.ParseFragment([]byte("hi"), nil, DefaultParseOption)
 18 | 	if err != nil {
 19 | 		t.Error(err.Error())
 20 | 		doc.Free()
 21 | 		return
 22 | 	}
 23 | 	if len(docFragment.Children()) != 1 {
 24 | 		t.Error("the number of children from the fragment does not match")
 25 | 	}
 26 | 	doc.Free()
 27 | }
 28 | 
 29 | func TestParseDocumentFragment(t *testing.T) {
 30 | 	defer CheckXmlMemoryLeaks(t)
 31 | 
 32 | 	doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 33 | 	if err != nil {
 34 | 		t.Error("parsing error:", err.Error())
 35 | 		return
 36 | 	}
 37 | 	docFragment, err := doc.ParseFragment([]byte("<foo></foo><!-- comment here --><bar>fun</bar>"), nil, DefaultParseOption)
 38 | 	if err != nil {
 39 | 		t.Error(err.Error())
 40 | 		doc.Free()
 41 | 		return
 42 | 	}
 43 | 	if docFragment.String() != "<foo/><!-- comment here --><bar>fun</bar>" {
 44 | 		t.Error("fragment output is wrong\n")
 45 | 		doc.Free()
 46 | 		return
 47 | 	}
 48 | 	if len(docFragment.Children()) != 3 {
 49 | 		t.Error("the number of children from the fragment does not match")
 50 | 	}
 51 | 	doc.Free()
 52 | }
 53 | 
 54 | func TestSearchDocumentFragment(t *testing.T) {
 55 | 	defer CheckXmlMemoryLeaks(t)
 56 | 
 57 | 	doc, err := Parse([]byte("<moovweb><z/><s/></moovweb>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 58 | 	if err != nil {
 59 | 		t.Error("parsing error:", err.Error())
 60 | 		return
 61 | 	}
 62 | 	docFragment, err := doc.ParseFragment([]byte("<foo></foo><!-- comment here --><bar>fun</bar>"), nil, DefaultParseOption)
 63 | 	if err != nil {
 64 | 		t.Error(err.Error())
 65 | 		doc.Free()
 66 | 		return
 67 | 	}
 68 | 	nodes, err := docFragment.Search(".//*")
 69 | 	if err != nil {
 70 | 		t.Error("fragment search has error")
 71 | 		doc.Free()
 72 | 		return
 73 | 	}
 74 | 	if len(nodes) != 2 {
 75 | 		t.Error("the number of children from the fragment does not match")
 76 | 	}
 77 | 	nodes, err = docFragment.Search("//*")
 78 | 
 79 | 	if err != nil {
 80 | 		t.Error("fragment search has error")
 81 | 		doc.Free()
 82 | 		return
 83 | 	}
 84 | 
 85 | 	if len(nodes) != 3 {
 86 | 		t.Error("the number of children from the fragment's document does not match")
 87 | 	}
 88 | 
 89 | 	doc.Free()
 90 | }
 91 | 
 92 | func TestSearchDocumentFragmentWithEmptyDoc(t *testing.T) {
 93 | 	defer CheckXmlMemoryLeaks(t)
 94 | 
 95 | 	doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 96 | 	if err != nil {
 97 | 		t.Error("parsing error:", err.Error())
 98 | 		return
 99 | 	}
100 | 	docFragment, err := doc.ParseFragment([]byte("<foo></foo><!-- comment here --><bar>fun</bar>"), nil, DefaultParseOption)
101 | 	if err != nil {
102 | 		t.Error(err.Error())
103 | 		doc.Free()
104 | 		return
105 | 	}
106 | 	nodes, err := docFragment.Search(".//*")
107 | 	if err != nil {
108 | 		t.Error("fragment search has error")
109 | 		doc.Free()
110 | 		return
111 | 	}
112 | 	if len(nodes) != 2 {
113 | 		t.Error("the number of children from the fragment does not match")
114 | 	}
115 | 	nodes, err = docFragment.Search("//*")
116 | 
117 | 	if err != nil {
118 | 		t.Error("fragment search has error")
119 | 		doc.Free()
120 | 		return
121 | 	}
122 | 
123 | 	if len(nodes) != 0 {
124 | 		t.Error("the number of children from the fragment's document does not match")
125 | 	}
126 | 
127 | 	doc.Free()
128 | }
129 | 


--------------------------------------------------------------------------------
/xpath/util.go:
--------------------------------------------------------------------------------
  1 | package xpath
  2 | 
  3 | /*
  4 | #cgo pkg-config: libxml-2.0
  5 | 
  6 | #include <libxml/xpath.h>
  7 | #include <libxml/xpathInternals.h>
  8 | #include <libxml/parser.h>
  9 | 
 10 | int getXPathObjectType(xmlXPathObject* o);
 11 | 
 12 | */
 13 | import "C"
 14 | 
 15 | import "unsafe"
 16 | import "reflect"
 17 | import . "github.com/moovweb/gokogiri/util"
 18 | 
 19 | //export go_resolve_variables
 20 | func go_resolve_variables(ctxt unsafe.Pointer, name, ns *C.char) (ret C.xmlXPathObjectPtr) {
 21 | 	variable := C.GoString(name)
 22 | 	namespace := C.GoString(ns)
 23 | 
 24 | 	context := (*VariableScope)(ctxt)
 25 | 	if context != nil {
 26 | 		val := (*context).ResolveVariable(variable, namespace)
 27 | 		ret = ValueToXPathObject(val)
 28 | 	}
 29 | 	return
 30 | }
 31 | 
 32 | // Convert an arbitrary value into a C.xmlXPathObjectPtr
 33 | // Unrecognised and nil values are converted to empty node sets.
 34 | func ValueToXPathObject(val interface{}) (ret C.xmlXPathObjectPtr) {
 35 | 	if val == nil {
 36 | 		//return the empty node set
 37 | 		ret = C.xmlXPathNewNodeSet(nil)
 38 | 		return
 39 | 	}
 40 | 	switch v := val.(type) {
 41 | 	case unsafe.Pointer:
 42 | 		return (C.xmlXPathObjectPtr)(v)
 43 | 	case []unsafe.Pointer:
 44 | 		ptrs := v
 45 | 		if len(ptrs) > 0 {
 46 | 			//default - return a node set
 47 | 			ret = C.xmlXPathNewNodeSet(nil)
 48 | 			for _, p := range ptrs {
 49 | 				C.xmlXPathNodeSetAdd(ret.nodesetval, (*C.xmlNode)(p))
 50 | 			}
 51 | 		} else {
 52 | 			ret = C.xmlXPathNewNodeSet(nil)
 53 | 			return
 54 | 		}
 55 | 	case float64:
 56 | 		ret = C.xmlXPathNewFloat(C.double(v))
 57 | 	case string:
 58 | 		xpathBytes := GetCString([]byte(v))
 59 | 		xpathPtr := unsafe.Pointer(&xpathBytes[0])
 60 | 		ret = C.xmlXPathNewString((*C.xmlChar)(xpathPtr))
 61 | 	default:
 62 | 		typ := reflect.TypeOf(val)
 63 | 		// if a pointer to a struct is passed, get the type of the dereferenced object
 64 | 		if typ.Kind() == reflect.Ptr {
 65 | 			typ = typ.Elem()
 66 | 		}
 67 | 		//log the unknown type, return an empty node set
 68 | 		//fmt.Println("go-resolve wrong-type", typ.Kind())
 69 | 		ret = C.xmlXPathNewNodeSet(nil)
 70 | 	}
 71 | 	return
 72 | }
 73 | 
 74 | //export exec_xpath_function
 75 | func exec_xpath_function(ctxt C.xmlXPathParserContextPtr, nargs C.int) {
 76 | 	function := C.GoString((*C.char)(unsafe.Pointer(ctxt.context.function)))
 77 | 	namespace := C.GoString((*C.char)(unsafe.Pointer(ctxt.context.functionURI)))
 78 | 	context := (*VariableScope)(ctxt.context.funcLookupData)
 79 | 
 80 | 	argcount := int(nargs)
 81 | 	var args []interface{}
 82 | 
 83 | 	for i := 0; i < argcount; i = i + 1 {
 84 | 		args = append(args, XPathObjectToValue(C.valuePop(ctxt)))
 85 | 	}
 86 | 
 87 | 	// arguments are popped off the stack in reverse order, so
 88 | 	// we reverse the slice before invoking our callback
 89 | 	if argcount > 1 {
 90 | 		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
 91 | 			args[i], args[j] = args[j], args[i]
 92 | 		}
 93 | 	}
 94 | 
 95 | 	// push the result onto the stack
 96 | 	// if for some reason we are unable to resolve the
 97 | 	// function we push an empty nodeset
 98 | 	f := (*context).ResolveFunction(function, namespace)
 99 | 	if f != nil {
100 | 		retval := f(*context, args)
101 | 		C.valuePush(ctxt, ValueToXPathObject(retval))
102 | 	} else {
103 | 		ret := C.xmlXPathNewNodeSet(nil)
104 | 		C.valuePush(ctxt, ret)
105 | 	}
106 | 
107 | }
108 | 
109 | //export go_can_resolve_function
110 | func go_can_resolve_function(ctxt unsafe.Pointer, name, ns *C.char) (ret C.int) {
111 | 	function := C.GoString(name)
112 | 	namespace := C.GoString(ns)
113 | 	context := (*VariableScope)(ctxt)
114 | 	if *context == nil {
115 | 		return C.int(0)
116 | 	}
117 | 	if (*context).IsFunctionRegistered(function, namespace) {
118 | 		return C.int(1)
119 | 	}
120 | 	return C.int(0)
121 | }
122 | 


--------------------------------------------------------------------------------
/xml/attribute_test.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | import "testing"
  4 | import "fmt"
  5 | 
  6 | func TestSetValue(t *testing.T) {
  7 | 	defer CheckXmlMemoryLeaks(t)
  8 | 	doc, err := Parse([]byte("<foo id=\"a\" myname=\"ff\"><bar class=\"shine\"/></foo>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
  9 | 	if err != nil {
 10 | 		t.Error("Parsing has error:", err)
 11 | 		return
 12 | 	}
 13 | 	root := doc.Root()
 14 | 	attributes := root.Attributes()
 15 | 	if len(attributes) != 2 || attributes["myname"].String() != "ff" {
 16 | 		fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
 17 | 		t.Error("root's attributes do not match")
 18 | 	}
 19 | 	child := root.FirstChild()
 20 | 	childAttributes := child.Attributes()
 21 | 	if len(childAttributes) != 1 || childAttributes["class"].String() != "shine" {
 22 | 		t.Error("child's attributes do not match")
 23 | 	}
 24 | 	attributes["myname"].SetValue("new")
 25 | 	expected :=
 26 | 		`<foo id="a" myname="new">
 27 |   <bar class="shine"/>
 28 | </foo>`
 29 | 	if root.String() != expected {
 30 | 		println("got:\n", root.String())
 31 | 		println("expected:\n", expected)
 32 | 		t.Error("root's new attr do not match")
 33 | 	}
 34 | 	attributes["id"].Remove()
 35 | 	expected =
 36 | 		`<foo myname="new">
 37 |   <bar class="shine"/>
 38 | </foo>`
 39 | 
 40 | 	if root.String() != expected {
 41 | 		println("got:\n", root.String())
 42 | 		println("expected:\n", expected)
 43 | 		t.Error("root's remove attr do not match")
 44 | 	}
 45 | 	doc.Free()
 46 | }
 47 | 
 48 | func TestSetAttribute(t *testing.T) {
 49 | 	defer CheckXmlMemoryLeaks(t)
 50 | 	doc, err := Parse([]byte("<foo id=\"a\" myname=\"ff\"><bar class=\"shine\"/></foo>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 51 | 	if err != nil {
 52 | 		t.Error("Parsing has error:", err)
 53 | 		return
 54 | 	}
 55 | 	root := doc.Root()
 56 | 	attributes := root.Attributes()
 57 | 	if len(attributes) != 2 || attributes["myname"].String() != "ff" {
 58 | 		fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
 59 | 		t.Error("root's attributes do not match")
 60 | 	}
 61 | 
 62 | 	root.SetAttr("id", "cooler")
 63 | 	root.SetAttr("id2", "hot")
 64 | 	root.SetAttr("id3", "")
 65 | 	expected :=
 66 | 		`<foo id="cooler" myname="ff" id2="hot" id3="">
 67 |   <bar class="shine"/>
 68 | </foo>`
 69 | 	if root.String() != expected {
 70 | 		println("got:\n", root.String())
 71 | 		println("expected:\n", expected)
 72 | 		t.Error("root's new attr do not match")
 73 | 	}
 74 | 	if root.Attr("id3") != "" {
 75 | 		println("got:\n", root.Attr("id3"))
 76 | 		println("expected:\n", "")
 77 | 		t.Error("root's attr should have empty val")
 78 | 	}
 79 | 	if root.Attribute("id3") == nil {
 80 | 		t.Error("root's attr should not be nil")
 81 | 	}
 82 | 	doc.Free()
 83 | }
 84 | 
 85 | func TestSetEmptyAttribute(t *testing.T) {
 86 | 	defer CheckXmlMemoryLeaks(t)
 87 | 	doc, err := Parse([]byte("<foo id=\"a\" myname=\"ff\"><bar class=\"shine\"/></foo>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 88 | 	if err != nil {
 89 | 		t.Error("Parsing has error:", err)
 90 | 		return
 91 | 	}
 92 | 	root := doc.Root()
 93 | 	attributes := root.Attributes()
 94 | 	if len(attributes) != 2 || attributes["myname"].String() != "ff" {
 95 | 		fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
 96 | 		t.Error("root's attributes do not match")
 97 | 	}
 98 | 
 99 | 	root.SetAttr("", "cool")
100 | 	expected :=
101 | 		`<foo id="a" myname="ff" ="cool">
102 |   <bar class="shine"/>
103 | </foo>`
104 | 	if root.String() != expected {
105 | 		println("got:\n", root.String())
106 | 		println("expected:\n", expected)
107 | 		t.Error("root's new attr do not match")
108 | 	}
109 | 
110 | 	root.SetAttr("", "")
111 | 	expected =
112 | 		`<foo id="a" myname="ff" ="">
113 |   <bar class="shine"/>
114 | </foo>`
115 | 	if root.String() != expected {
116 | 		println("got:\n", root.String())
117 | 		println("expected:\n", expected)
118 | 		t.Error("root's new attr do not match")
119 | 	}
120 | 	doc.Free()
121 | }
122 | 


--------------------------------------------------------------------------------
/html/document.go:
--------------------------------------------------------------------------------
  1 | package html
  2 | 
  3 | /*
  4 | #cgo pkg-config: libxml-2.0
  5 | 
  6 | #include <libxml/HTMLtree.h>
  7 | #include <libxml/HTMLparser.h>
  8 | #include "helper.h"
  9 | */
 10 | import "C"
 11 | 
 12 | import (
 13 | 	"errors"
 14 | 	"github.com/moovweb/gokogiri/help"
 15 | 	. "github.com/moovweb/gokogiri/util"
 16 | 	"github.com/moovweb/gokogiri/xml"
 17 | 	//"runtime"
 18 | 	"unsafe"
 19 | )
 20 | 
 21 | //xml parse option
 22 | const (
 23 | 	HTML_PARSE_RECOVER   xml.ParseOption = 1 << 0  /* Relaxed parsing */
 24 | 	HTML_PARSE_NODEFDTD  xml.ParseOption = 1 << 2  /* do not default a doctype if not found */
 25 | 	HTML_PARSE_NOERROR   xml.ParseOption = 1 << 5  /* suppress error reports */
 26 | 	HTML_PARSE_NOWARNING xml.ParseOption = 1 << 6  /* suppress warning reports */
 27 | 	HTML_PARSE_PEDANTIC  xml.ParseOption = 1 << 7  /* pedantic error reporting */
 28 | 	HTML_PARSE_NOBLANKS  xml.ParseOption = 1 << 8  /* remove blank nodes */
 29 | 	HTML_PARSE_NONET     xml.ParseOption = 1 << 11 /* Forbid network access */
 30 | 	HTML_PARSE_NOIMPLIED xml.ParseOption = 1 << 13 /* Do not add implied html/body... elements */
 31 | 	HTML_PARSE_COMPACT   xml.ParseOption = 1 << 16 /* compact small text nodes */
 32 | )
 33 | 
 34 | const EmptyHtmlDoc = ""
 35 | 
 36 | //default parsing option: relax parsing
 37 | var DefaultParseOption xml.ParseOption = HTML_PARSE_RECOVER |
 38 | 	HTML_PARSE_NONET |
 39 | 	HTML_PARSE_NOERROR |
 40 | 	HTML_PARSE_NOWARNING
 41 | 
 42 | type HtmlDocument struct {
 43 | 	*xml.XmlDocument
 44 | }
 45 | 
 46 | //default encoding in byte slice
 47 | var DefaultEncodingBytes = []byte(xml.DefaultEncoding)
 48 | var emptyHtmlDocBytes = []byte(EmptyHtmlDoc)
 49 | 
 50 | var ErrSetMetaEncoding = errors.New("Set Meta Encoding failed")
 51 | var ERR_FAILED_TO_PARSE_HTML = errors.New("failed to parse html input")
 52 | var emptyStringBytes = []byte{0}
 53 | 
 54 | //create a document
 55 | func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *HtmlDocument) {
 56 | 	doc = &HtmlDocument{}
 57 | 	doc.XmlDocument = xml.NewDocument(p, contentLen, inEncoding, outEncoding)
 58 | 	doc.Me = doc
 59 | 	node := doc.Node.(*xml.XmlNode)
 60 | 	node.Document = doc
 61 | 	//runtime.SetFinalizer(doc, (*HtmlDocument).Free)
 62 | 	return
 63 | }
 64 | 
 65 | //parse a string to document
 66 | func Parse(content, inEncoding, url []byte, options xml.ParseOption, outEncoding []byte) (doc *HtmlDocument, err error) {
 67 | 	inEncoding = AppendCStringTerminator(inEncoding)
 68 | 	outEncoding = AppendCStringTerminator(outEncoding)
 69 | 
 70 | 	var docPtr *C.xmlDoc
 71 | 	contentLen := len(content)
 72 | 
 73 | 	if contentLen > 0 {
 74 | 		var contentPtr, urlPtr, encodingPtr unsafe.Pointer
 75 | 
 76 | 		contentPtr = unsafe.Pointer(&content[0])
 77 | 		if len(url) > 0 {
 78 | 			url = AppendCStringTerminator(url)
 79 | 			urlPtr = unsafe.Pointer(&url[0])
 80 | 		}
 81 | 		if len(inEncoding) > 0 {
 82 | 			encodingPtr = unsafe.Pointer(&inEncoding[0])
 83 | 		}
 84 | 
 85 | 		docPtr = C.htmlParse(contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0)
 86 | 
 87 | 		if docPtr == nil {
 88 | 			err = ERR_FAILED_TO_PARSE_HTML
 89 | 		} else {
 90 | 			doc = NewDocument(unsafe.Pointer(docPtr), contentLen, inEncoding, outEncoding)
 91 | 		}
 92 | 	}
 93 | 	if docPtr == nil {
 94 | 		doc = CreateEmptyDocument(inEncoding, outEncoding)
 95 | 	}
 96 | 	return
 97 | }
 98 | 
 99 | func CreateEmptyDocument(inEncoding, outEncoding []byte) (doc *HtmlDocument) {
100 | 	help.LibxmlInitParser()
101 | 	docPtr := C.htmlNewDoc(nil, nil)
102 | 	doc = NewDocument(unsafe.Pointer(docPtr), 0, inEncoding, outEncoding)
103 | 	return
104 | }
105 | 
106 | func (document *HtmlDocument) ParseFragment(input, url []byte, options xml.ParseOption) (fragment *xml.DocumentFragment, err error) {
107 | 	root := document.Root()
108 | 	if root == nil {
109 | 		fragment, err = parsefragment(document, nil, input, url, options)
110 | 	} else {
111 | 		fragment, err = parsefragment(document, root.XmlNode, input, url, options)
112 | 	}
113 | 	return
114 | }
115 | 
116 | func (doc *HtmlDocument) MetaEncoding() string {
117 | 	metaEncodingXmlCharPtr := C.htmlGetMetaEncoding((*C.xmlDoc)(doc.DocPtr()))
118 | 	return C.GoString((*C.char)(unsafe.Pointer(metaEncodingXmlCharPtr)))
119 | }
120 | 
121 | func (doc *HtmlDocument) SetMetaEncoding(encoding string) (err error) {
122 | 	var encodingPtr unsafe.Pointer = nil
123 | 	if len(encoding) > 0 {
124 | 		encodingBytes := AppendCStringTerminator([]byte(encoding))
125 | 		encodingPtr = unsafe.Pointer(&encodingBytes[0])
126 | 	}
127 | 	ret := int(C.htmlSetMetaEncoding((*C.xmlDoc)(doc.DocPtr()), (*C.xmlChar)(encodingPtr)))
128 | 	if ret == -1 {
129 | 		err = ErrSetMetaEncoding
130 | 	}
131 | 	return
132 | }
133 | 


--------------------------------------------------------------------------------
/html/fragment_test.go:
--------------------------------------------------------------------------------
  1 | package html
  2 | 
  3 | import "testing"
  4 | 
  5 | func TestParseDocumentFragmentText(t *testing.T) {
  6 | 	doc, err := Parse(nil, []byte("iso-8859-1"), nil, DefaultParseOption, []byte("iso-8859-1"))
  7 | 	if err != nil {
  8 | 		println(err.Error())
  9 | 	}
 10 | 	docFragment, err := doc.ParseFragment([]byte("ok\r\n"), nil, DefaultParseOption)
 11 | 	if err != nil {
 12 | 		t.Error(err.Error())
 13 | 		return
 14 | 	}
 15 | 	if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "ok\r\n" {
 16 | 		println(docFragment.String())
 17 | 		t.Error("the children from the fragment text do not match")
 18 | 	}
 19 | 	doc.Free()
 20 | 	CheckXmlMemoryLeaks(t)
 21 | }
 22 | 
 23 | func TestParseDocumentFragment(t *testing.T) {
 24 | 	doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 25 | 	if err != nil {
 26 | 		println(err.Error())
 27 | 	}
 28 | 	docFragment, err := doc.ParseFragment([]byte("<div><h1>"), nil, DefaultParseOption)
 29 | 	if err != nil {
 30 | 		t.Error(err.Error())
 31 | 		return
 32 | 	}
 33 | 	if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "<div><h1></h1></div>" {
 34 | 		t.Error("the of children from the fragment do not match")
 35 | 	}
 36 | 
 37 | 	doc.Free()
 38 | 	CheckXmlMemoryLeaks(t)
 39 | 
 40 | }
 41 | 
 42 | func TestParseDocumentFragment2(t *testing.T) {
 43 | 	docStr := `<html>
 44 | <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
 45 | <body>
 46 |   </body>
 47 | </html>`
 48 | 	doc, err := Parse([]byte(docStr), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 49 | 	if err != nil {
 50 | 		println(err.Error())
 51 | 	}
 52 | 	docFragment, err := doc.ParseFragment([]byte("<script>cool & fun</script>"), nil, DefaultParseOption)
 53 | 	if err != nil {
 54 | 		t.Error(err.Error())
 55 | 		return
 56 | 	}
 57 | 	if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "<script>cool & fun</script>" {
 58 | 		t.Error("the of children from the fragment do not match")
 59 | 	}
 60 | 
 61 | 	doc.Free()
 62 | 	CheckXmlMemoryLeaks(t)
 63 | }
 64 | 
 65 | func TestSearchDocumentFragment(t *testing.T) {
 66 | 	doc, err := Parse([]byte("<div class='cool'></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 67 | 	if err != nil {
 68 | 		println(err.Error())
 69 | 	}
 70 | 	docFragment, err := doc.ParseFragment([]byte("<div class='cool'><h1>"), nil, DefaultParseOption)
 71 | 	if err != nil {
 72 | 		t.Error(err.Error())
 73 | 		return
 74 | 	}
 75 | 	if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "<div class=\"cool\"><h1></h1></div>" {
 76 | 		t.Error("the of children from the fragment do not match")
 77 | 	}
 78 | 
 79 | 	nodes, err := docFragment.Search(".//*")
 80 | 	if err != nil {
 81 | 		t.Error("fragment search has error")
 82 | 		return
 83 | 	}
 84 | 	if len(nodes) != 2 {
 85 | 		t.Error("the number of children from the fragment does not match")
 86 | 	}
 87 | 	nodes, err = docFragment.Search("//div[@class='cool']")
 88 | 
 89 | 	if err != nil {
 90 | 		t.Error("fragment search has error")
 91 | 		return
 92 | 	}
 93 | 
 94 | 	if len(nodes) != 1 {
 95 | 		println(len(nodes))
 96 | 		for _, node := range nodes {
 97 | 			println(node.String())
 98 | 		}
 99 | 		t.Error("the number of children from the fragment's document does not match")
100 | 	}
101 | 
102 | 	doc.Free()
103 | 	CheckXmlMemoryLeaks(t)
104 | }
105 | 
106 | func TestAddFragmentWithNamespace(t *testing.T) {
107 | 	doc, err := Parse([]byte("<div class='cool'></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
108 | 	if err != nil {
109 | 		println(err.Error())
110 | 	}
111 | 	defer doc.Free()
112 | 	docFragment, err := doc.ParseFragment([]byte("<div xmlns='http://www.moovweb.com' class='cool'><h1>"), nil, DefaultParseOption)
113 | 	if err != nil {
114 | 		t.Error(err.Error())
115 | 		return
116 | 	}
117 | 	if docFragment.String() != `<div xmlns="http://www.moovweb.com" class="cool"><h1></h1></div>` {
118 | 		t.Errorf("doc fragment does not match\n")
119 | 	}
120 | 	doc2, err := Parse([]byte("<div class='not so cool'></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
121 | 	if err != nil {
122 | 		println(err.Error())
123 | 		return
124 | 	}
125 | 	defer doc2.Free()
126 | 	body := doc2.Root().FirstChild()
127 | 	body.AddChild(docFragment)
128 | 	if doc2.String() != `<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
129 | <html><body>
130 | <div class="not so cool"></div>
131 | <div xmlns="http://www.moovweb.com" class="cool"><h1></h1></div>
132 | </body></html>
133 | ` 	{
134 | 		t.Errorf("document does not match after adding a fragment with namespace\n")
135 | 	}
136 | 	CheckXmlMemoryLeaks(t)
137 | }
138 | 


--------------------------------------------------------------------------------
/xml/search_test.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | import "testing"
  4 | 
  5 | func TestSearch(t *testing.T) {
  6 | 
  7 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
  8 | 		root := doc.Root()
  9 | 		result, _ := root.Search(".//*[@class]")
 10 | 		if len(result) != 2 {
 11 | 			t.Error("search at root does not match")
 12 | 		}
 13 | 		result, _ = root.Search("//*[@class]")
 14 | 		if len(result) != 3 {
 15 | 			t.Error("search at root does not match")
 16 | 		}
 17 | 		result, _ = doc.Search(".//*[@class]")
 18 | 		if len(result) != 3 {
 19 | 			t.Error("search at doc does not match")
 20 | 		}
 21 | 		result, _ = doc.Search(".//*[@class='shine']")
 22 | 		if len(result) != 2 {
 23 | 			t.Error("search with value at doc does not match")
 24 | 		}
 25 | 	}
 26 | 
 27 | 	RunTest(t, "node", "search", testLogic)
 28 | }
 29 | 
 30 | func BenchmarkSearch(b *testing.B) {
 31 | 
 32 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
 33 | 		root := doc.Root()
 34 | 
 35 | 		for i := 0; i < b.N; i++ {
 36 | 			root.Search(".//*[@class]")
 37 | 		}
 38 | 	}
 39 | 
 40 | 	RunBenchmark(b, "node", "search", benchmarkLogic)
 41 | }
 42 | 
 43 | func BenchmarkBigDocDeepSearchyTagName(b *testing.B) {
 44 | 
 45 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
 46 | 
 47 | 		for i := 0; i < b.N; i++ {
 48 | 			doc.Search("//div")
 49 | 		}
 50 | 	}
 51 | 
 52 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
 53 | }
 54 | 
 55 | func BenchmarkBigDocPunctuatedDeepSearch(b *testing.B) {
 56 | 
 57 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
 58 | 
 59 | 		for i := 0; i < b.N; i++ {
 60 | 			doc.Search("//*[@class='filters']//div")
 61 | 		}
 62 | 	}
 63 | 
 64 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
 65 | }
 66 | 
 67 | func BenchmarkBigDocDeepSearchByID(b *testing.B) {
 68 | 
 69 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
 70 | 
 71 | 		for i := 0; i < b.N; i++ {
 72 | 			doc.Search("//*[@id='ppp']")
 73 | 			//nodes, _ := doc.Search("//*[@id='ppp']")
 74 | 			//fmt.Printf("%v\t", len(nodes))
 75 | 		}
 76 | 	}
 77 | 
 78 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
 79 | }
 80 | 
 81 | func BenchmarkBigDocDeepSearchByClass(b *testing.B) {
 82 | 
 83 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
 84 | 
 85 | 		for i := 0; i < b.N; i++ {
 86 | 			doc.Search("//*[@class]")
 87 | 			//nodes, _ := doc.Search("//*[@class]")
 88 | 			//fmt.Printf("%v\t", len(nodes))
 89 | 		}
 90 | 	}
 91 | 
 92 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
 93 | }
 94 | 
 95 | func BenchmarkBigDocDeepSearchByClassContains(b *testing.B) {
 96 | 
 97 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
 98 | 
 99 | 		for i := 0; i < b.N; i++ {
100 | 			doc.Search("//*[contains(@class, 'header')]")
101 | 		}
102 | 	}
103 | 
104 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
105 | }
106 | 
107 | func BenchmarkBigDocDeepSearchBySemanticClass(b *testing.B) {
108 | 
109 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
110 | 
111 | 		for i := 0; i < b.N; i++ {
112 | 			doc.Search("//*[contains(concat(concat(' ', @class), ' '), concat(concat(' ','header'), ' '))]")
113 | 		}
114 | 	}
115 | 
116 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
117 | }
118 | 
119 | func BenchmarkBigDocDeepSearchByText(b *testing.B) {
120 | 
121 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
122 | 
123 | 		for i := 0; i < b.N; i++ {
124 | 			doc.Search("//*[text()='hey']")
125 | 		}
126 | 	}
127 | 
128 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
129 | }
130 | 
131 | func BenchmarkBigDocDeepSearchByTextContains(b *testing.B) {
132 | 
133 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
134 | 
135 | 		for i := 0; i < b.N; i++ {
136 | 			doc.Search("//*[contains(text(),'hey')]")
137 | 		}
138 | 	}
139 | 
140 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
141 | }
142 | 
143 | func BenchmarkBigDocSearchAncestorAxes(b *testing.B) {
144 | 
145 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
146 | 		elem, _ := doc.Search("//*[@id='ppp']")
147 | 		for i := 0; i < b.N; i++ {
148 | 			elem[0].Search("ancestor::html")
149 | 		}
150 | 	}
151 | 
152 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
153 | }
154 | 
155 | func BenchmarkBigDocSearchLongTraverseUpToRoot(b *testing.B) {
156 | 
157 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
158 | 		elem, _ := doc.Search("//*[@id='ppp']")
159 | 
160 | 		for i := 0; i < b.N; i++ {
161 | 			elem[0].Search("../../../../../../../../..")
162 | 		}
163 | 	}
164 | 
165 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
166 | }
167 | 
168 | func BenchmarkBigDocSearchShortTraverseUpToRoot(b *testing.B) {
169 | 
170 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
171 | 		elem, _ := doc.Search("//*[@id='ppp']")
172 | 
173 | 		for i := 0; i < b.N; i++ {
174 | 			elem[0].Search("../../../..")
175 | 		}
176 | 	}
177 | 
178 | 	RunBenchmark(b, "document", "big_un", benchmarkLogic)
179 | }
180 | 


--------------------------------------------------------------------------------
/xml/utils_test.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"github.com/moovweb/gokogiri/help"
  7 | 	"github.com/moovweb/gokogiri/xpath"
  8 | 	"io/ioutil"
  9 | 	"path/filepath"
 10 | 	"strings"
 11 | 	"testing"
 12 | )
 13 | 
 14 | func badOutput(actual string, expected string) {
 15 | 	fmt.Printf("Got:\n[%v]\n", actual)
 16 | 	fmt.Printf("Expected:\n[%v]\n", expected)
 17 | }
 18 | 
 19 | func RunTest(t *testing.T, suite string, name string, specificLogic func(t *testing.T, doc *XmlDocument), extraAssertions ...func(doc *XmlDocument) (string, string, string)) {
 20 | 	defer CheckXmlMemoryLeaks(t)
 21 | 
 22 | 	//println("Initiating test:" + suite + ":" + name)
 23 | 
 24 | 	input, output, error := getTestData(filepath.Join("tests", suite, name))
 25 | 
 26 | 	if len(error) > 0 {
 27 | 		t.Errorf("Error gathering test data for %v:\n%v\n", name, error)
 28 | 		t.FailNow()
 29 | 	}
 30 | 
 31 | 	expected := string(output)
 32 | 
 33 | 	//println("Got raw input/output")
 34 | 
 35 | 	doc, err := parseInput(input)
 36 | 
 37 | 	if err != nil {
 38 | 		t.Error(err.Error())
 39 | 	}
 40 | 
 41 | 	//println("parsed input")
 42 | 
 43 | 	if specificLogic != nil {
 44 | 		specificLogic(t, doc)
 45 | 	}
 46 | 	if doc.String() != expected {
 47 | 		badOutput(doc.String(), expected)
 48 | 		t.Error("the output of the xml doc does not match")
 49 | 	}
 50 | 	for _, extraAssertion := range extraAssertions {
 51 | 		actual, expected, message := extraAssertion(doc)
 52 | 
 53 | 		if actual != expected {
 54 | 			badOutput(actual, expected)
 55 | 			t.Error(message)
 56 | 		}
 57 | 	}
 58 | 
 59 | 	doc.Free()
 60 | }
 61 | 
 62 | func RunBenchmark(b *testing.B, suite string, name string, specificLogic func(b *testing.B, doc *XmlDocument)) {
 63 | 	b.StopTimer()
 64 | 
 65 | 	//	defer CheckXmlMemoryLeaks(b)
 66 | 
 67 | 	input, _, error := getTestData(filepath.Join("tests", suite, name))
 68 | 
 69 | 	if len(error) > 0 {
 70 | 		panic(fmt.Sprintf("Error gathering test data for %v:\n%v\n", name, error))
 71 | 	}
 72 | 
 73 | 	doc, err := parseInput(input)
 74 | 
 75 | 	if err != nil {
 76 | 		panic("Error:" + err.Error())
 77 | 	}
 78 | 
 79 | 	b.StartTimer()
 80 | 
 81 | 	if specificLogic != nil {
 82 | 		specificLogic(b, doc)
 83 | 	}
 84 | 
 85 | 	doc.Free()
 86 | 
 87 | 	//	println("----------- END OF BENCHMARK -----------")
 88 | }
 89 | 
 90 | func parseInput(input interface{}) (*XmlDocument, error) {
 91 | 	var realInput []byte
 92 | 
 93 | 	switch thisInput := input.(type) {
 94 | 	case []byte:
 95 | 		realInput = thisInput
 96 | 	case string:
 97 | 		realInput = []byte(thisInput)
 98 | 	default:
 99 | 		return nil, errors.New("Unrecognized parsing input!")
100 | 	}
101 | 
102 | 	doc, err := Parse(realInput, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
103 | 
104 | 	if err != nil {
105 | 		return nil, errors.New(fmt.Sprintf("parsing error:%v\n", err.Error()))
106 | 	}
107 | 
108 | 	return doc, nil
109 | }
110 | 
111 | func getTestData(name string) (input []byte, output []byte, error string) {
112 | 	var errorMessage string
113 | 	offset := "\t"
114 | 	inputFile := filepath.Join(name, "input.txt")
115 | 
116 | 	input, err := ioutil.ReadFile(inputFile)
117 | 
118 | 	if err != nil {
119 | 		errorMessage += fmt.Sprintf("%vCouldn't read test (%v) input:\n%v\n", offset, name, offset+err.Error())
120 | 	}
121 | 
122 | 	output, err = ioutil.ReadFile(filepath.Join(name, "output.txt"))
123 | 
124 | 	if err != nil {
125 | 		errorMessage += fmt.Sprintf("%vCouldn't read test (%v) output:\n%v\n", offset, name, offset+err.Error())
126 | 	}
127 | 
128 | 	return input, output, errorMessage
129 | }
130 | 
131 | func collectTests(suite string) (names []string, error string) {
132 | 	testPath := filepath.Join("tests", suite)
133 | 	entries, err := ioutil.ReadDir(testPath)
134 | 
135 | 	if err != nil {
136 | 		return nil, fmt.Sprintf("Couldn't read tests:\n%v\n", err.Error())
137 | 	}
138 | 
139 | 	for _, entry := range entries {
140 | 		if strings.HasPrefix(entry.Name(), "_") || strings.HasPrefix(entry.Name(), ".") {
141 | 			continue
142 | 		}
143 | 
144 | 		if entry.IsDir() {
145 | 			names = append(names, filepath.Join(testPath, entry.Name()))
146 | 		}
147 | 	}
148 | 
149 | 	return
150 | }
151 | 
152 | func CheckXmlMemoryLeaks(t *testing.T) {
153 | 	// LibxmlCleanUpParser() should only be called once during the lifetime of the
154 | 	// program, but because there's no way to know when the last test of the suite
155 | 	// runs in go, we can't accurately call it strictly once, so just avoid calling
156 | 	// it for now because it's known to cause crashes if called multiple times.
157 | 	//help.LibxmlCleanUpParser()
158 | 
159 | 	if !help.LibxmlCheckMemoryLeak() {
160 | 		t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation())
161 | 		help.LibxmlReportMemoryLeak()
162 | 	}
163 | }
164 | 
165 | // This is a simple test implementation of the VariableScope interface.
166 | // Currently it's os simple it ignores the namespace argument.
167 | type SimpleVariableScope struct {
168 | 	variables map[string]interface{}
169 | 	functions map[string]xpath.XPathFunction
170 | }
171 | 
172 | func (s *SimpleVariableScope) ResolveVariable(name, ns string) interface{} {
173 | 	v, _ := s.variables[name]
174 | 	return v
175 | }
176 | 
177 | func (s *SimpleVariableScope) IsFunctionRegistered(name, ns string) bool {
178 | 	_, ok := s.functions[name]
179 | 	return ok
180 | }
181 | func (s *SimpleVariableScope) ResolveFunction(name, ns string) xpath.XPathFunction {
182 | 	f, _ := s.functions[name]
183 | 	return f
184 | }
185 | 
186 | func newSimpleVariableScope() *SimpleVariableScope {
187 | 	s := &SimpleVariableScope{make(map[string]interface{}), make(map[string]xpath.XPathFunction)}
188 | 	return s
189 | }
190 | 


--------------------------------------------------------------------------------
/xml/helper.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include "helper.h"
  3 | 
  4 | //internal callback functions
  5 | int xml_write_callback(void *ctx, char *buffer, int len) {
  6 | 	if (len > 0) {
  7 | 		xmlNodeWriteCallback(ctx, buffer, len);
  8 | 	}
  9 |   	return len;
 10 | }
 11 | 
 12 | int close_callback(void * ctx) {
 13 |   	return 0;
 14 | }
 15 | 
 16 | xmlDoc* newEmptyXmlDoc() {
 17 | 	return xmlNewDoc(BAD_CAST XML_DEFAULT_VERSION);
 18 | }
 19 | 
 20 | xmlElementType getNodeType(xmlNode *node) { return node->type; }
 21 | 
 22 | void xmlFreeChars(char *buffer) {
 23 | 	if (buffer) {
 24 | 		xmlFree((xmlChar*)buffer);
 25 | 	}
 26 | }
 27 | 
 28 | char *xmlDocDumpToString(xmlDoc *doc, void *encoding, int format) {
 29 | 	xmlChar *buff;
 30 | 	int buffersize;
 31 | 	xmlDocDumpFormatMemoryEnc(doc, &buff, &buffersize, (char*)encoding, format);
 32 | 	return (char*)buff;
 33 | }
 34 | 
 35 | char *htmlDocDumpToString(htmlDocPtr doc, int format) {
 36 | 	xmlChar *buff;
 37 | 	int buffersize;
 38 | 	htmlDocDumpMemoryFormat(doc, &buff, &buffersize, format);
 39 | 	return (char*)buff;
 40 | }
 41 | 
 42 | xmlDoc* xmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) {
 43 | 	const char *c_buffer       = (char*)buffer;
 44 | 	const char *c_url          = (char*)url;
 45 | 	const char *c_encoding     = (char*)encoding;
 46 | 	xmlDoc *doc = NULL;
 47 | 
 48 | 	xmlResetLastError();
 49 | 	doc = xmlReadMemory(c_buffer, buffer_len, c_url, c_encoding, options);
 50 | 
 51 | 	if(doc == NULL) {
 52 | 		xmlErrorPtr error;
 53 | 	    xmlFreeDoc(doc);
 54 | 	    error = xmlGetLastError();
 55 | 		if(error != NULL && error_buffer != NULL && error->level >= XML_ERR_ERROR) {
 56 | 			char *c_error_buffer = (char*)error_buffer;
 57 | 			if (error->message != NULL) {
 58 | 				strncpy(c_error_buffer, error->message, error_buffer_len-1);
 59 | 				c_error_buffer[error_buffer_len-1] = '\0';
 60 | 			}
 61 | 			else {
 62 | 				//snprintf(c_error_buffer, error_buffer_len, "xml parsing error:%d", error->code);
 63 | 			}
 64 | 		}
 65 | 	}
 66 | 	return doc;
 67 | }
 68 | 
 69 | xmlNode* xmlParseFragment(void *doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len) {
 70 | 	xmlNodePtr root_element = NULL;
 71 | 	xmlParserErrors errCode;
 72 | 	errCode = xmlParseInNodeContext((xmlNodePtr)doc, buffer, buffer_len, options, &root_element);
 73 | 	if (errCode != XML_ERR_OK) {
 74 | 		if (error_buffer != NULL && error_buffer_len > 0) {
 75 | 			//char *c_error_buffer = (char*)error_buffer;
 76 | 			//snprintf(c_error_buffer, error_buffer_len, "xml fragemnt parsing error (xmlParserErrors):%d", errCode);
 77 | 		}
 78 | 		printf("errorcode %d\n", errCode);
 79 | 		return NULL;
 80 | 	}
 81 | 	return root_element;
 82 | }
 83 | 
 84 | xmlNode* xmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) {
 85 | 	xmlDoc* tmpDoc = NULL;
 86 | 	xmlNode* tmpRoot = NULL;
 87 | 	tmpDoc = xmlReadMemory((char*)buffer, buffer_len, (char*)url, (char*)encoding, options);
 88 | 	if (tmpDoc == NULL) {
 89 | 		return NULL;
 90 | 	}
 91 | 	tmpRoot = xmlDocGetRootElement(tmpDoc);
 92 | 	if (tmpRoot == NULL) {
 93 | 		return NULL;
 94 | 	}
 95 | 	tmpRoot = xmlDocCopyNode(tmpRoot, doc, 1);
 96 | 	xmlFreeDoc(tmpDoc);
 97 | 	return tmpRoot;
 98 | }
 99 | 
100 | void xmlSetContent(void *gonode, void *n, void *content) {
101 | 	xmlNode *node = (xmlNode*)n;
102 | 	xmlNode *child = node->children;
103 | 	xmlNode *next = NULL;
104 | 	unsigned char *encoded = xmlEncodeSpecialChars(node->doc, content);
105 | 	if (encoded) {
106 | 		while (child) {
107 | 			next = child->next ;
108 | 			xmlUnlinkNode(child);
109 | 			//xmlFreeNode(child);
110 | 			xmlUnlinkNodeCallback(child, gonode);
111 | 			child = next ;
112 | 	  	}
113 | 	  	xmlNodeSetContent(node, (xmlChar*)encoded);
114 | 		xmlFree(encoded);
115 | 	}
116 | }
117 | 
118 | int xmlUnlinkNodeWithCheck(xmlNode *node) {
119 | 	if (xmlNodePtrCheck(node->parent)) {
120 | 		xmlUnlinkNode(node);
121 | 		return 1;
122 | 	}
123 | 	return 0;
124 | }
125 | 
126 | int xmlNodePtrCheck(void *node) {
127 | 	if (node == (void*)(-1))
128 | 		return 0;
129 | 	return 1;
130 | }
131 | 
132 | int xmlSaveNode(void *wbuffer, void *node, void *encoding, int options) {
133 | 	xmlSaveCtxtPtr savectx;
134 | 	const char *c_encoding = (char*)encoding;
135 | 
136 | 	savectx = xmlSaveToIO(
137 | 	      (xmlOutputWriteCallback)xml_write_callback,
138 | 	      (xmlOutputCloseCallback)close_callback,
139 | 	      wbuffer,
140 | 	      encoding,
141 | 	      options
142 | 	  );
143 | 	xmlSaveTree(savectx, (xmlNode*)node);
144 | 	return xmlSaveClose(savectx);
145 | }
146 | 
147 | void removeNamespace(xmlNs **source, xmlNs *target) {
148 |     xmlNs *ns, *prevns = NULL;
149 | 
150 |     for (ns = *source; ns; ns = ns->next) {
151 |         if (ns == target) {
152 |             if (!prevns) {
153 |                 // we are the first element
154 |                 *source = ns->next;
155 |             } else {
156 |                 prevns->next = ns->next;
157 |             }
158 | 
159 |             break;
160 |         }
161 | 
162 |         prevns = ns;
163 |     }
164 | }
165 | 
166 | void removeDefaultNamespace(xmlNs *ns, xmlNode *node) {
167 |     removeNamespace(&node->nsDef, ns);
168 | 
169 |     xmlAttr *attr;
170 | 
171 |     for (attr = node->properties; attr; attr = attr->next) {
172 |         if (!attr->ns)
173 |             continue;
174 | 
175 |         removeNamespace(&attr->ns, ns);
176 |     }
177 | 
178 |     if (node->ns == ns)
179 |         node->ns = NULL;
180 | 
181 |     xmlNode *child;
182 | 
183 |     for (child = xmlFirstElementChild(node); child; child = xmlNextElementSibling(child)) {
184 |         removeDefaultNamespace(ns, child);
185 |     }
186 | }
187 | 
188 | void xmlRemoveDefaultNamespace(xmlNode *node) {
189 |     if (node->ns && node->ns->prefix) {
190 |         // not a default namespace
191 |         return;
192 |     }
193 | 
194 |     removeDefaultNamespace(node->ns, node);
195 | }
196 | 


--------------------------------------------------------------------------------
/html/node_test.go:
--------------------------------------------------------------------------------
  1 | package html
  2 | 
  3 | import "testing"
  4 | 
  5 | func TestInnerScript(t *testing.T) {
  6 | 	defer CheckXmlMemoryLeaks(t)
  7 | 
  8 | 	doc, err := Parse([]byte("<html><body><div><h1></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
  9 | 
 10 | 	if err != nil {
 11 | 		t.Error("Parsing has error:", err)
 12 | 		return
 13 | 	}
 14 | 
 15 | 	h1 := doc.Root().FirstChild().FirstChild().FirstChild()
 16 | 	h1.SetInnerHtml("<script>if (suppressReviews !== 'true' && app == 'PRR') { ok = true; }</script>")
 17 | 	if h1.String() != "<h1><script>if (suppressReviews !== 'true' && app == 'PRR') { ok = true; }</script></h1>" {
 18 | 		t.Error("script does not match")
 19 | 	}
 20 | 	doc.Free()
 21 | }
 22 | 
 23 | func TestInnerScript2(t *testing.T) {
 24 | 	defer CheckXmlMemoryLeaks(t)
 25 | 	script := `<script>try {
 26 | var productNAPage = "",
 27 | suppressReviews = "false";
 28 | var bvtoken = MACYS.util.Cookie.get("BazaarVoiceToken","GCs");
 29 | //bvtoken=bvtoken.substring(0,bvtoken.length-1);
 30 | $BV.configure("global", {
 31 | userToken: bvtoken,
 32 | productId: '531726',
 33 | submissionUI: 'LIGHTBOX',
 34 | submissionContainerUrl: window.location.href,
 35 | allowSamePageSubmission: true,
 36 | doLogin: function(callback, success_url) {
 37 | MACYS.util.Cookie.set("FORWARDPAGE_KEY",success_url);
 38 | window.location = 'https://www.macys.com/signin/index.ognc?fromPage=pdpReviews';
 39 | },
 40 | doShowContent: function(app, dc, sub, sr) {
 41 | if (suppressReviews !== 'true' && app == "PRR") {
 42 | MACYS.pdp.showReviewsTab();
 43 | } else if (productNAPage !== 'true' && app == "QA") {
 44 | MACYS.pdp.showQATab();
 45 | }
 46 | }
 47 | });
 48 | if (suppressReviews !== 'true') {
 49 | $BV.ui('rr', 'show_reviews', {
 50 | });
 51 | }
 52 | $BV.ui("qa", "show_questions", {
 53 | subjectType: 'product'
 54 | });
 55 | } catch ( e ) { }</script>`
 56 | 
 57 | 	doc, err := Parse([]byte("<html><body><div><h1></div>"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 58 | 
 59 | 	if err != nil {
 60 | 		t.Error("Parsing has error:", err)
 61 | 		return
 62 | 	}
 63 | 
 64 | 	h1 := doc.Root().FirstChild().FirstChild().FirstChild()
 65 | 	h1.SetInnerHtml(script)
 66 | 	if h1.String() != "<h1>"+script+"</h1>" {
 67 | 		t.Error("script does not match")
 68 | 	}
 69 | 	doc.Free()
 70 | }
 71 | 
 72 | func TestInsertMyselfBefore(t *testing.T) {
 73 | 	input := `<html>
 74 | <head>
 75 | <title> Title </title>
 76 | </head>
 77 | <body>
 78 | <div id="header"></div>
 79 | <h1> Welcome to Tritium Tester </h1>
 80 | </body>
 81 | </html>
 82 | `
 83 | 	doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 84 | 
 85 | 	if err != nil {
 86 | 		t.Error("Parsing has error:", err)
 87 | 		return
 88 | 	}
 89 | 
 90 | 	divs, _ := doc.Search("//div")
 91 | 	if len(divs) != 1 {
 92 | 		t.Error("should have 1 div")
 93 | 		return
 94 | 	}
 95 | 
 96 | 	div := divs[0]
 97 | 	div.InsertBefore(div)
 98 | 
 99 | 	expected := `<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
100 | <html>
101 | <head>
102 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
103 | <title> Title </title>
104 | </head>
105 | <body>
106 | <div id="header"></div>
107 | <h1> Welcome to Tritium Tester </h1>
108 | </body>
109 | </html>
110 | `
111 | 	if expected != doc.String() {
112 | 		t.Error("doc is not expected:\n", doc.String(), "\n", expected)
113 | 	}
114 | 	doc.Free()
115 | 	CheckXmlMemoryLeaks(t)
116 | }
117 | 
118 | func TestInsertMyselfAfter(t *testing.T) {
119 | 	input := `<html>
120 | <head>
121 | <title> Title </title>
122 | </head>
123 | <body>
124 | <div id="header"></div>
125 | <h1> Welcome to Tritium Tester </h1>
126 | </body>
127 | </html>
128 | `
129 | 	doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
130 | 
131 | 	if err != nil {
132 | 		t.Error("Parsing has error:", err)
133 | 		return
134 | 	}
135 | 
136 | 	divs, _ := doc.Search("//div")
137 | 	if len(divs) != 1 {
138 | 		t.Error("should have 1 div")
139 | 		return
140 | 	}
141 | 
142 | 	div := divs[0]
143 | 	div.InsertAfter(div)
144 | 
145 | 	expected := `<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
146 | <html>
147 | <head>
148 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
149 | <title> Title </title>
150 | </head>
151 | <body>
152 | <div id="header"></div>
153 | <h1> Welcome to Tritium Tester </h1>
154 | </body>
155 | </html>
156 | `
157 | 	if expected != doc.String() {
158 | 		t.Error("doc is not expected:\n", doc.String(), "\n", expected)
159 | 	}
160 | 	doc.Free()
161 | 	CheckXmlMemoryLeaks(t)
162 | }
163 | 
164 | func TestAddMyselfChild(t *testing.T) {
165 | 	input := `<html>
166 | <head>
167 | <title> Title </title>
168 | </head>
169 | <body>
170 | <div id="header"></div>
171 | <h1> Welcome to Tritium Tester </h1>
172 | </body>
173 | </html>
174 | `
175 | 	doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
176 | 
177 | 	if err != nil {
178 | 		t.Error("Parsing has error:", err)
179 | 		return
180 | 	}
181 | 
182 | 	divs, _ := doc.Search("//div")
183 | 	if len(divs) != 1 {
184 | 		t.Error("should have 1 div")
185 | 		return
186 | 	}
187 | 
188 | 	div := divs[0]
189 | 	div.AddChild(div)
190 | 
191 | 	expected := `<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
192 | <html>
193 | <head>
194 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
195 | <title> Title </title>
196 | </head>
197 | <body>
198 | <div id="header"></div>
199 | <h1> Welcome to Tritium Tester </h1>
200 | </body>
201 | </html>
202 | `
203 | 	if expected != doc.String() {
204 | 		t.Error("doc is not expected:\n", doc.String(), "\n", expected)
205 | 	}
206 | 	doc.Free()
207 | 	CheckXmlMemoryLeaks(t)
208 | }
209 | 
210 | func TestRemoveMeRemoveParent(t *testing.T) {
211 | 	input := `<html>
212 | <head>
213 | <title> Title </title>
214 | </head>
215 | <body>
216 | <div id="header"><h1> Welcome to Tritium Tester </h1></div>
217 | </body>
218 | </html>
219 | `
220 | 	doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
221 | 
222 | 	if err != nil {
223 | 		t.Error("Parsing has error:", err)
224 | 		return
225 | 	}
226 | 
227 | 	divs, _ := doc.Search("//div")
228 | 	if len(divs) != 1 {
229 | 		t.Error("should have 1 div")
230 | 		return
231 | 	}
232 | 
233 | 	div := divs[0]
234 | 	h1 := div.FirstChild()
235 | 	nodes, _ := h1.Search("..")
236 | 	h1.Remove()
237 | 	nodes, _ = h1.Search("..")
238 | 	if len(nodes) != 1 {
239 | 		t.Error("removed node should have a parent , i.e. its document")
240 | 	}
241 | 	nodes[0].Remove()
242 | 	doc.Free()
243 | 	CheckXmlMemoryLeaks(t)
244 | }
245 | 


--------------------------------------------------------------------------------
/xpath/xpath.go:
--------------------------------------------------------------------------------
  1 | package xpath
  2 | 
  3 | /*
  4 | #cgo pkg-config: libxml-2.0
  5 | 
  6 | #include <libxml/xpath.h>
  7 | #include <libxml/xpathInternals.h>
  8 | #include <libxml/parser.h>
  9 | 
 10 | xmlNode* fetchNode(xmlNodeSet *nodeset, int index) {
 11 |     return nodeset->nodeTab[index];
 12 | }
 13 | 
 14 | xmlXPathObjectPtr go_resolve_variables(void* ctxt, char* name, char* ns);
 15 | int go_can_resolve_function(void* ctxt, char* name, char* ns);
 16 | void exec_xpath_function(xmlXPathParserContextPtr ctxt, int nargs);
 17 | 
 18 | xmlXPathFunction go_resolve_function(void* ctxt, char* name, char* ns) {
 19 |     if (go_can_resolve_function(ctxt, name, ns))
 20 |         return exec_xpath_function;
 21 | 
 22 |     return 0;
 23 | }
 24 | 
 25 | static void set_var_lookup(xmlXPathContext* c, void* data) {
 26 |     c->varLookupFunc = (void *)go_resolve_variables;
 27 |     c->varLookupData = data;
 28 | }
 29 | 
 30 | static void set_function_lookup(xmlXPathContext* c, void* data) {
 31 |     c->funcLookupFunc = (void *)go_resolve_function;
 32 |     c->funcLookupData = data;
 33 | }
 34 | 
 35 | int getXPathObjectType(xmlXPathObject* o) {
 36 |     if(o == 0)
 37 |         return 0;
 38 |     return o->type;
 39 | }
 40 | */
 41 | import "C"
 42 | 
 43 | import "unsafe"
 44 | import . "github.com/moovweb/gokogiri/util"
 45 | import "runtime"
 46 | import "errors"
 47 | 
 48 | type XPath struct {
 49 | 	ContextPtr *C.xmlXPathContext
 50 | 	ResultPtr  *C.xmlXPathObject
 51 | }
 52 | 
 53 | type XPathObjectType int
 54 | 
 55 | const (
 56 | 	XPATH_UNDEFINED   XPathObjectType = 0
 57 | 	XPATH_NODESET                     = 1
 58 | 	XPATH_BOOLEAN                     = 2
 59 | 	XPATH_NUMBER                      = 3
 60 | 	XPATH_STRING                      = 4
 61 | 	XPATH_POINT                       = 5
 62 | 	XPATH_RANGE                       = 6
 63 | 	XPATH_LOCATIONSET                 = 7
 64 | 	XPATH_USERS                       = 8
 65 | 	XPATH_XSLT_TREE                   = 9 // An XSLT value tree, non modifiable
 66 | )
 67 | 
 68 | type XPathFunction func(context VariableScope, args []interface{}) interface{}
 69 | 
 70 | // Types that provide the VariableScope interface know how to resolve
 71 | // XPath variable names into values.
 72 | 
 73 | //This interface exist primarily for the benefit of XSLT processors.
 74 | type VariableScope interface {
 75 | 	ResolveVariable(string, string) interface{}
 76 | 	IsFunctionRegistered(string, string) bool
 77 | 	ResolveFunction(string, string) XPathFunction
 78 | }
 79 | 
 80 | func NewXPath(docPtr unsafe.Pointer) (xpath *XPath) {
 81 | 	if docPtr == nil {
 82 | 		return
 83 | 	}
 84 | 	xpath = &XPath{ContextPtr: C.xmlXPathNewContext((*C.xmlDoc)(docPtr)), ResultPtr: nil}
 85 | 	runtime.SetFinalizer(xpath, (*XPath).Free)
 86 | 	return
 87 | }
 88 | 
 89 | func (xpath *XPath) RegisterNamespace(prefix, href string) bool {
 90 | 	var prefixPtr unsafe.Pointer = nil
 91 | 	if len(prefix) > 0 {
 92 | 		prefixBytes := AppendCStringTerminator([]byte(prefix))
 93 | 		prefixPtr = unsafe.Pointer(&prefixBytes[0])
 94 | 	}
 95 | 
 96 | 	var hrefPtr unsafe.Pointer = nil
 97 | 	if len(href) > 0 {
 98 | 		hrefBytes := AppendCStringTerminator([]byte(href))
 99 | 		hrefPtr = unsafe.Pointer(&hrefBytes[0])
100 | 	}
101 | 
102 | 	result := C.xmlXPathRegisterNs(xpath.ContextPtr, (*C.xmlChar)(prefixPtr), (*C.xmlChar)(hrefPtr))
103 | 	return result == 0
104 | }
105 | 
106 | // Evaluate an XPath and attempt to consume the result as a nodeset.
107 | func (xpath *XPath) EvaluateAsNodeset(nodePtr unsafe.Pointer, xpathExpr *Expression) (nodes []unsafe.Pointer, err error) {
108 | 	if nodePtr == nil {
109 | 		//evaluating xpath on a  nil node returns no result.
110 | 		return
111 | 	}
112 | 
113 | 	err = xpath.Evaluate(nodePtr, xpathExpr)
114 | 	if err != nil {
115 | 		return
116 | 	}
117 | 
118 | 	nodes, err = xpath.ResultAsNodeset()
119 | 	return
120 | }
121 | 
122 | // Evaluate an XPath. The returned result is stored in the struct. Call ReturnType to
123 | // discover the type of result, and call one of the ResultAs* functions to return a
124 | // copy of the result as a particular type.
125 | func (xpath *XPath) Evaluate(nodePtr unsafe.Pointer, xpathExpr *Expression) (err error) {
126 | 	if nodePtr == nil {
127 | 		//evaluating xpath on a nil node returns no result.
128 | 		return
129 | 	}
130 | 
131 | 	oldXPContextDoc := xpath.ContextPtr.doc
132 | 	oldXPContextNode := xpath.ContextPtr.node
133 | 	oldXPProximityPosition := xpath.ContextPtr.proximityPosition
134 | 	oldXPContextSize := xpath.ContextPtr.contextSize
135 | 	oldXPNsNr := xpath.ContextPtr.nsNr
136 | 	oldXPNamespaces := xpath.ContextPtr.namespaces
137 | 
138 | 	xpath.ContextPtr.node = (*C.xmlNode)(nodePtr)
139 | 	if xpath.ResultPtr != nil {
140 | 		C.xmlXPathFreeObject(xpath.ResultPtr)
141 | 	}
142 | 	xpath.ResultPtr = C.xmlXPathCompiledEval(xpathExpr.Ptr, xpath.ContextPtr)
143 | 
144 | 	xpath.ContextPtr.doc = oldXPContextDoc
145 | 	xpath.ContextPtr.node = oldXPContextNode
146 | 	xpath.ContextPtr.proximityPosition = oldXPProximityPosition
147 | 	xpath.ContextPtr.contextSize = oldXPContextSize
148 | 	xpath.ContextPtr.nsNr = oldXPNsNr
149 | 	xpath.ContextPtr.namespaces = oldXPNamespaces
150 | 
151 | 	if xpath.ResultPtr == nil {
152 | 		err = errors.New("err in evaluating xpath: " + xpathExpr.String())
153 | 		return
154 | 	}
155 | 	return
156 | }
157 | 
158 | // Determine the actual return type of the XPath evaluation.
159 | func (xpath *XPath) ReturnType() XPathObjectType {
160 | 	return XPathObjectType(C.getXPathObjectType(xpath.ResultPtr))
161 | }
162 | 
163 | // Get the XPath result as a nodeset.
164 | func (xpath *XPath) ResultAsNodeset() (nodes []unsafe.Pointer, err error) {
165 | 	if xpath.ResultPtr == nil {
166 | 		return
167 | 	}
168 | 
169 | 	if xpath.ReturnType() != XPATH_NODESET {
170 | 		err = errors.New("Cannot convert XPath result to nodeset")
171 | 	}
172 | 
173 | 	if nodesetPtr := xpath.ResultPtr.nodesetval; nodesetPtr != nil {
174 | 		if nodesetSize := int(nodesetPtr.nodeNr); nodesetSize > 0 {
175 | 			nodes = make([]unsafe.Pointer, nodesetSize)
176 | 			for i := 0; i < nodesetSize; i++ {
177 | 				nodes[i] = unsafe.Pointer(C.fetchNode(nodesetPtr, C.int(i)))
178 | 			}
179 | 		}
180 | 	}
181 | 	return
182 | }
183 | 
184 | // Coerce the result into a string
185 | func (xpath *XPath) ResultAsString() (val string, err error) {
186 | 	if xpath.ReturnType() != XPATH_STRING {
187 | 		xpath.ResultPtr = C.xmlXPathConvertString(xpath.ResultPtr)
188 | 	}
189 | 	val = C.GoString((*C.char)(unsafe.Pointer(xpath.ResultPtr.stringval)))
190 | 	return
191 | }
192 | 
193 | // Coerce the result into a number
194 | func (xpath *XPath) ResultAsNumber() (val float64, err error) {
195 | 	if xpath.ReturnType() != XPATH_NUMBER {
196 | 		xpath.ResultPtr = C.xmlXPathConvertNumber(xpath.ResultPtr)
197 | 	}
198 | 	val = float64(xpath.ResultPtr.floatval)
199 | 	return
200 | }
201 | 
202 | // Coerce the result into a boolean
203 | func (xpath *XPath) ResultAsBoolean() (val bool, err error) {
204 | 	xpath.ResultPtr = C.xmlXPathConvertBoolean(xpath.ResultPtr)
205 | 	val = xpath.ResultPtr.boolval != 0
206 | 	return
207 | }
208 | 
209 | // Add a variable resolver.
210 | func (xpath *XPath) SetResolver(v VariableScope) {
211 | 	C.set_var_lookup(xpath.ContextPtr, unsafe.Pointer(&v))
212 | 	C.set_function_lookup(xpath.ContextPtr, unsafe.Pointer(&v))
213 | }
214 | 
215 | // SetContextPosition sets the internal values needed to
216 | // determine the values of position() and last() for the
217 | // current context node.
218 | func (xpath *XPath) SetContextPosition(position, size int) {
219 | 	xpath.ContextPtr.proximityPosition = C.int(position)
220 | 	xpath.ContextPtr.contextSize = C.int(size)
221 | }
222 | 
223 | // GetContextPosition retrieves the internal values used to
224 | // determine the values of position() and last() for the
225 | // current context node.
226 | 
227 | // This allows values to saved and restored during processing
228 | // of a document.
229 | func (xpath *XPath) GetContextPosition() (position, size int) {
230 | 	position = int(xpath.ContextPtr.proximityPosition)
231 | 	size = int(xpath.ContextPtr.contextSize)
232 | 	return
233 | }
234 | 
235 | func (xpath *XPath) Free() {
236 | 	if xpath.ContextPtr != nil {
237 | 		C.xmlXPathFreeContext(xpath.ContextPtr)
238 | 		xpath.ContextPtr = nil
239 | 	}
240 | 	if xpath.ResultPtr != nil {
241 | 		C.xmlXPathFreeObject(xpath.ResultPtr)
242 | 		xpath.ResultPtr = nil
243 | 	}
244 | }
245 | 
246 | func XPathObjectToValue(obj C.xmlXPathObjectPtr) (result interface{}) {
247 | 	rt := XPathObjectType(C.getXPathObjectType(obj))
248 | 	switch rt {
249 | 	case XPATH_NODESET, XPATH_XSLT_TREE:
250 | 		if nodesetPtr := obj.nodesetval; nodesetPtr != nil {
251 | 			if nodesetSize := int(nodesetPtr.nodeNr); nodesetSize > 0 {
252 | 				nodes := make([]unsafe.Pointer, nodesetSize)
253 | 				for i := 0; i < nodesetSize; i++ {
254 | 					nodes[i] = unsafe.Pointer(C.fetchNode(nodesetPtr, C.int(i)))
255 | 				}
256 | 				result = nodes
257 | 				return
258 | 			}
259 | 		}
260 | 		result = nil
261 | 	case XPATH_NUMBER:
262 | 		obj = C.xmlXPathConvertNumber(obj)
263 | 		result = float64(obj.floatval)
264 | 	case XPATH_BOOLEAN:
265 | 		obj = C.xmlXPathConvertBoolean(obj)
266 | 		result = obj.boolval != 0
267 | 	default:
268 | 		obj = C.xmlXPathConvertString(obj)
269 | 		result = C.GoString((*C.char)(unsafe.Pointer(obj.stringval)))
270 | 	}
271 | 	return
272 | }
273 | 


--------------------------------------------------------------------------------
/xml/document_test.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io/ioutil"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"runtime"
  9 | 	"strings"
 10 | 	"testing"
 11 | )
 12 | 
 13 | func TestDocuments(t *testing.T) {
 14 | 	if runtime.GOOS == "windows" {
 15 | 		return
 16 | 	}
 17 | 	tests, err := collectTests("document")
 18 | 
 19 | 	if len(err) > 0 {
 20 | 		t.Errorf(err)
 21 | 	}
 22 | 
 23 | 	errors := make([]string, 0)
 24 | 
 25 | 	print("\nTesting: Basic Parsing [")
 26 | 
 27 | 	for _, test := range tests {
 28 | 		error := RunDocumentParseTest(t, test)
 29 | 
 30 | 		if error != nil {
 31 | 			errors = append(errors, fmt.Sprintf("Test %v failed:\n%v\n", test, *error))
 32 | 			print("F")
 33 | 		} else {
 34 | 			print(".")
 35 | 		}
 36 | 	}
 37 | 
 38 | 	println("]")
 39 | 
 40 | 	if len(errors) > 0 {
 41 | 		errorMessage := "\t" + strings.Join(strings.Split(strings.Join(errors, "\n\n"), "\n"), "\n\t")
 42 | 		t.Errorf("\nSome tests failed! (%d passed / %d total) :\n%v", len(tests)-len(errors), len(tests), errorMessage)
 43 | 	} else {
 44 | 		fmt.Printf("\nAll (%d) tests passed!\n", len(tests))
 45 | 	}
 46 | }
 47 | 
 48 | func TestBufferedDocuments(t *testing.T) {
 49 | 	if runtime.GOOS == "windows" {
 50 | 		return
 51 | 	}
 52 | 	tests, err := collectTests("document")
 53 | 
 54 | 	if len(err) > 0 {
 55 | 		t.Errorf(err)
 56 | 	}
 57 | 
 58 | 	errors := make([]string, 0)
 59 | 
 60 | 	print("\nTesting: Buffered Parsing [")
 61 | 
 62 | 	for _, test := range tests {
 63 | 		error := RunParseDocumentWithBufferTest(t, test)
 64 | 
 65 | 		if error != nil {
 66 | 			errors = append(errors, fmt.Sprintf("Test %v failed:\n%v\n", test, *error))
 67 | 			print("F")
 68 | 		} else {
 69 | 			print(".")
 70 | 		}
 71 | 	}
 72 | 
 73 | 	println("]")
 74 | 
 75 | 	if len(errors) > 0 {
 76 | 		errorMessage := "\t" + strings.Join(strings.Split(strings.Join(errors, "\n\n"), "\n"), "\n\t")
 77 | 		t.Errorf("\nSome tests failed! (%d passed / %d total) :\n%v", len(tests)-len(errors), len(tests), errorMessage)
 78 | 	} else {
 79 | 		fmt.Printf("\nAll (%d) tests passed!\n", len(tests))
 80 | 	}
 81 | }
 82 | 
 83 | func RunParseDocumentWithBufferTest(t *testing.T, name string) (error *string) {
 84 | 	var errorMessage string
 85 | 	offset := "\t"
 86 | 
 87 | 	defer CheckXmlMemoryLeaks(t)
 88 | 
 89 | 	input, output, dataError := getTestData(name)
 90 | 
 91 | 	if len(dataError) > 0 {
 92 | 		errorMessage += dataError
 93 | 	}
 94 | 
 95 | 	buffer := make([]byte, 500000)
 96 | 
 97 | 	doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
 98 | 
 99 | 	if err != nil {
100 | 		errorMessage = fmt.Sprintf("parsing error:%v\n", err)
101 | 	}
102 | 
103 | 	if string(doc.ToBuffer(buffer)) != string(output) {
104 | 		formattedOutput := offset + strings.Join(strings.Split("["+doc.String()+"]", "\n"), "\n"+offset)
105 | 		formattedExpectedOutput := offset + strings.Join(strings.Split("["+string(output)+"]", "\n"), "\n"+offset)
106 | 		errorMessage = fmt.Sprintf("%v-- Got --\n%v\n%v-- Expected --\n%v\n", offset, formattedOutput, offset, formattedExpectedOutput)
107 | 	}
108 | 	doc.Free()
109 | 
110 | 	if len(errorMessage) > 0 {
111 | 		return &errorMessage
112 | 	}
113 | 	return nil
114 | 
115 | }
116 | 
117 | func RunDocumentParseTest(t *testing.T, name string) (error *string) {
118 | 
119 | 	var errorMessage string
120 | 	offset := "\t"
121 | 
122 | 	defer CheckXmlMemoryLeaks(t)
123 | 
124 | 	input, output, dataError := getTestData(name)
125 | 
126 | 	if len(dataError) > 0 {
127 | 		errorMessage += dataError
128 | 	}
129 | 
130 | 	doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
131 | 
132 | 	if err != nil {
133 | 		errorMessage = fmt.Sprintf("parsing error:%v\n", err)
134 | 	}
135 | 
136 | 	if doc.String() != string(output) {
137 | 		formattedOutput := offset + strings.Join(strings.Split("["+doc.String()+"]", "\n"), "\n"+offset)
138 | 		formattedExpectedOutput := offset + strings.Join(strings.Split("["+string(output)+"]", "\n"), "\n"+offset)
139 | 		errorMessage = fmt.Sprintf("%v-- Got --\n%v\n%v-- Expected --\n%v\n", offset, formattedOutput, offset, formattedExpectedOutput)
140 | 		testOutput := filepath.Join(name, "test_output.txt")
141 | 		ioutil.WriteFile(testOutput, []byte(doc.String()), os.FileMode(0666))
142 | 		errorMessage += fmt.Sprintf("%v Output test output to: %v\n", offset, testOutput)
143 | 	}
144 | 	doc.Free()
145 | 
146 | 	if len(errorMessage) > 0 {
147 | 		return &errorMessage
148 | 	}
149 | 	return nil
150 | 
151 | }
152 | 
153 | func BenchmarkDocOutput(b *testing.B) {
154 | 	b.StopTimer()
155 | 
156 | 	tests, err := collectTests("document")
157 | 
158 | 	if len(err) > 0 {
159 | 		fmt.Printf(err)
160 | 		return
161 | 	}
162 | 
163 | 	docs := make([]*XmlDocument, 0)
164 | 
165 | 	for _, testName := range tests {
166 | 
167 | 		input, _, dataError := getTestData(testName)
168 | 
169 | 		if len(dataError) > 0 {
170 | 			fmt.Printf(dataError)
171 | 			return
172 | 		}
173 | 		doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
174 | 
175 | 		if err != nil {
176 | 			fmt.Printf("parsing error:%v\n", err)
177 | 			return
178 | 		}
179 | 		docs = append(docs, doc)
180 | 	}
181 | 
182 | 	b.StartTimer()
183 | 
184 | 	for i := 0; i < b.N; i++ {
185 | 		for index, _ := range tests {
186 | 			_ = docs[index].String()
187 | 		}
188 | 	}
189 | 
190 | }
191 | 
192 | func BenchmarkDocOutputToBuffer(b *testing.B) {
193 | 	b.StopTimer()
194 | 
195 | 	tests, err := collectTests("document")
196 | 
197 | 	if len(err) > 0 {
198 | 		fmt.Printf(err)
199 | 		return
200 | 	}
201 | 
202 | 	docs := make([]*XmlDocument, 0)
203 | 
204 | 	for _, testName := range tests {
205 | 
206 | 		input, _, dataError := getTestData(testName)
207 | 
208 | 		if len(dataError) > 0 {
209 | 			fmt.Printf(dataError)
210 | 			return
211 | 		}
212 | 		doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
213 | 
214 | 		if err != nil {
215 | 			fmt.Printf("parsing error:%v\n", err)
216 | 			return
217 | 		}
218 | 		docs = append(docs, doc)
219 | 	}
220 | 
221 | 	buffer := make([]byte, 500*1024)
222 | 
223 | 	b.StartTimer()
224 | 
225 | 	for i := 0; i < b.N; i++ {
226 | 
227 | 		for index, _ := range tests {
228 | 
229 | 			_ = docs[index].ToBuffer(buffer)
230 | 
231 | 		}
232 | 	}
233 | 
234 | }
235 | 
236 | func TestRemoveNamespaces(t *testing.T) {
237 | 	xml := "<SOAP-ENV:Envelope xmlns:SOAP-ENV=\"http://schemas.xmlsoap.org/soap/envelope/\"><SOAP-ENV:Body><m:setPresence xmlns:m=\"http://schemas.microsoft.com/winrtc/2002/11/sip\"><m:presentity m:uri=\"test\"><m:availability m:aggregate=\"300\" m:description=\"online\"/><m:activity m:aggregate=\"400\" m:description=\"Active\"/><deviceName xmlns=\"http://schemas.microsoft.com/2002/09/sip/client/presence\" name=\"WIN-0DDABKC1UI8\"/></m:presentity></m:setPresence></SOAP-ENV:Body></SOAP-ENV:Envelope>"
238 | 	xml_no_namespace := "<Envelope><Body><setPresence><presentity uri=\"test\"><availability aggregate=\"300\" description=\"online\"/><activity aggregate=\"400\" description=\"Active\"/><deviceName name=\"WIN-0DDABKC1UI8\"/></presentity></setPresence></Body></Envelope>"
239 | 
240 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
241 | 	doc.Root().RecursivelyRemoveNamespaces()
242 | 	doc2, _ := Parse([]byte(xml_no_namespace), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
243 | 
244 | 	output := fmt.Sprintf("%v", doc)
245 | 	output_no_namespace := fmt.Sprintf("%v", doc2)
246 | 	if output != output_no_namespace {
247 | 		t.Errorf("Xml namespaces not removed!")
248 | 	}
249 | }
250 | 
251 | func TestRemoveDefaultNamespace(t *testing.T) {
252 | 	xml := `
253 | <body xmlns="http://jabber.org/protocol/httpbind" xmlns:stream="http://etherx.jabber.org/streams" to="127.0.0.1" rid="3" sid="0acad5262d995374">
254 |   <iq id="2" type="get" from="">
255 |     <query xmlns="jabber:iq:auth">
256 |       <username>xyz</username>
257 |     </query>
258 |   </iq>
259 | </body>
260 | `
261 | 
262 | 	xml_no_namespace := `
263 | <body xmlns:stream="http://etherx.jabber.org/streams" to="127.0.0.1" rid="3" sid="0acad5262d995374">
264 |   <iq id="2" type="get" from="">
265 |     <query xmlns="jabber:iq:auth">
266 |       <username>xyz</username>
267 |     </query>
268 |   </iq>
269 | </body>
270 | `
271 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
272 | 	doc.Root().RemoveDefaultNamespace()
273 | 	doc2, _ := Parse([]byte(xml_no_namespace), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
274 | 
275 | 	output := fmt.Sprintf("%v", doc)
276 | 	output_no_namespace := fmt.Sprintf("%v", doc2)
277 | 	if output != output_no_namespace {
278 | 		t.Errorf("Default namespace not removed!")
279 | 	}
280 | }
281 | 
282 | func TestNodeById(t *testing.T) {
283 | 	xml := "<!DOCTYPE doc [\n<!ELEMENT para (#PCDATA)>\n<!ATTLIST para label ID #IMPLIED>\n]>\n<doc><para>Failed</para><para label=\"W11\">Success</para></doc>"
284 | 
285 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
286 | 	p := doc.NodeById("W11")
287 | 
288 | 	if p == nil {
289 | 		t.Errorf("Did not find node by ID!")
290 | 		return
291 | 	}
292 | 
293 | 	output := fmt.Sprintf("%v", p.Content())
294 | 	if output != "Success" {
295 | 		t.Errorf("Incorrect node selected by ID!")
296 | 	}
297 | }
298 | 
299 | func TestUnparsedEntityURI(t *testing.T) {
300 | 	xml := "<!DOCTYPE doc [\n<!ELEMENT doc EMPTY>\n<!ATTLIST doc attr ENTITY #REQUIRED>\n<!NOTATION JPEG SYSTEM \"jpeg.exe\">\n<!ENTITY test SYSTEM \"test.jpg\" NDATA JPEG>\n]>\n<doc attr='test'/>"
301 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
302 | 	expected := "test.jpg"
303 | 	actual := doc.UnparsedEntityURI("test")
304 | 
305 | 	if actual == "" {
306 | 		t.Errorf("Did not find unparsed entity 'test'")
307 | 		return
308 | 	}
309 | 
310 | 	if actual != expected {
311 | 		t.Errorf("Expected '%s', but got '%s' calling doc.UnparsedEntityURI", expected, actual)
312 | 	}
313 | }
314 | 


--------------------------------------------------------------------------------
/xml/node_test.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | import "testing"
  4 | import "fmt"
  5 | 
  6 | func TestAddChild(t *testing.T) {
  7 | 
  8 | 	docAssertion := func(doc *XmlDocument) (string, string, string) {
  9 | 		expectedDocAfterAdd :=
 10 | 			`<?xml version="1.0" encoding="utf-8"?>
 11 | <foo>
 12 |   <bar/>
 13 | </foo>
 14 | `
 15 | 		doc.Root().AddChild("<bar></bar>")
 16 | 
 17 | 		return doc.String(), expectedDocAfterAdd, "output of the xml doc after AddChild does not match"
 18 | 	}
 19 | 
 20 | 	nodeAssertion := func(doc *XmlDocument) (string, string, string) {
 21 | 		expectedNodeAfterAdd :=
 22 | 			`<foo>
 23 |   <bar/>
 24 | </foo>`
 25 | 
 26 | 		return doc.Root().String(), expectedNodeAfterAdd, "the output of the xml root after AddChild does not match"
 27 | 	}
 28 | 
 29 | 	RunTest(t, "node", "add_child", nil, docAssertion, nodeAssertion)
 30 | 
 31 | }
 32 | 
 33 | func TestAddAncestorAsChild(t *testing.T) {
 34 | 	docAssertion := func(doc *XmlDocument) (string, string, string) {
 35 | 		expectedDocAfterAdd :=
 36 | 			`<?xml version="1.0" encoding="utf-8"?>
 37 | <foo/>
 38 | `
 39 | 
 40 | 		foo := doc.Root()
 41 | 		bar := foo.FirstChild()
 42 | 		holiday := bar.FirstChild()
 43 | 		fun := holiday.FirstChild()
 44 | 		fun.AddChild(bar)
 45 | 
 46 | 		return doc.String(), expectedDocAfterAdd, "output of the xml doc after AddChild does not match"
 47 | 	}
 48 | 
 49 | 	nodeAssertion := func(doc *XmlDocument) (string, string, string) {
 50 | 		expectedNodeAfterAdd :=
 51 | 			`<foo/>`
 52 | 
 53 | 		return doc.Root().String(), expectedNodeAfterAdd, "the output of the xml root after AddChild does not match"
 54 | 	}
 55 | 
 56 | 	RunTest(t, "node", "add_ancestor", nil, docAssertion, nodeAssertion)
 57 | 
 58 | }
 59 | 
 60 | func addChildBenchLogic(b *testing.B, doc *XmlDocument) {
 61 | 	root := doc.Root()
 62 | 
 63 | 	for i := 0; i < b.N; i++ {
 64 | 		root.AddChild("<bar></bar>")
 65 | 	}
 66 | }
 67 | 
 68 | func BenchmarkAddChild(b *testing.B) {
 69 | 	RunBenchmark(b, "document", "big_un", addChildBenchLogic) // Run against big doc
 70 | }
 71 | 
 72 | func BenchmarkAddChildBigDoc(b *testing.B) {
 73 | 	RunBenchmark(b, "node", "add_child", addChildBenchLogic)
 74 | }
 75 | 
 76 | func TestAddPreviousSibling(t *testing.T) {
 77 | 
 78 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
 79 | 		err := doc.Root().AddPreviousSibling("<bar></bar><cat></cat>")
 80 | 
 81 | 		if err != nil {
 82 | 			t.Errorf("Error adding previous sibling:\n%v\n", err.Error())
 83 | 		}
 84 | 	}
 85 | 
 86 | 	RunTest(t, "node", "add_previous_sibling", testLogic)
 87 | }
 88 | 
 89 | func TestAddPreviousSibling2(t *testing.T) {
 90 | 
 91 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
 92 | 		err := doc.Root().FirstChild().AddPreviousSibling("COOL")
 93 | 
 94 | 		if err != nil {
 95 | 			t.Errorf("Error adding previous sibling:\n%v\n", err.Error())
 96 | 		}
 97 | 	}
 98 | 
 99 | 	RunTest(t, "node", "add_previous_sibling2", testLogic)
100 | }
101 | 
102 | func TestAddNextSibling(t *testing.T) {
103 | 
104 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
105 | 		doc.Root().AddNextSibling("<bar></bar><baz></baz>")
106 | 	}
107 | 
108 | 	RunTest(t, "node", "add_next_sibling", testLogic)
109 | }
110 | 
111 | func TestSetContent(t *testing.T) {
112 | 
113 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
114 | 		root := doc.Root()
115 | 		root.SetContent("<fun></fun>")
116 | 	}
117 | 
118 | 	RunTest(t, "node", "set_content", testLogic)
119 | }
120 | 
121 | func BenchmarkSetContent(b *testing.B) {
122 | 
123 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
124 | 		root := doc.Root()
125 | 		for i := 0; i < b.N; i++ {
126 | 			root.SetContent("<fun></fun>")
127 | 		}
128 | 	}
129 | 
130 | 	RunBenchmark(b, "node", "set_content", benchmarkLogic)
131 | }
132 | 
133 | func TestSetChildren(t *testing.T) {
134 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
135 | 		root := doc.Root()
136 | 		root.SetChildren("<fun></fun>")
137 | 	}
138 | 
139 | 	RunTest(t, "node", "set_children", testLogic)
140 | }
141 | 
142 | func BenchmarkSetChildren(b *testing.B) {
143 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
144 | 		root := doc.Root()
145 | 		for i := 0; i < b.N; i++ {
146 | 			root.SetChildren("<fun></fun>")
147 | 		}
148 | 	}
149 | 
150 | 	RunBenchmark(b, "node", "set_children", benchmarkLogic)
151 | }
152 | 
153 | func TestReplace(t *testing.T) {
154 | 
155 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
156 | 		root := doc.Root()
157 | 		root.Replace("<fun></fun><cool/>")
158 | 	}
159 | 
160 | 	rootAssertion := func(doc *XmlDocument) (string, string, string) {
161 | 		root := doc.Root()
162 | 		return root.String(), "<fun/>", "the output of the xml root does not match"
163 | 	}
164 | 
165 | 	RunTest(t, "node", "replace", testLogic, rootAssertion)
166 | }
167 | 
168 | func BenchmarkReplace(b *testing.B) {
169 | 
170 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
171 | 		root := doc.Root()
172 | 		for i := 0; i < b.N; i++ {
173 | 			root.Replace("<fun></fun>")
174 | 			root = doc.Root() //once the node has been replaced, we need to get a new node
175 | 		}
176 | 	}
177 | 
178 | 	RunBenchmark(b, "node", "replace", benchmarkLogic)
179 | }
180 | 
181 | func TestAttributes(t *testing.T) {
182 | 
183 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
184 | 
185 | 		root := doc.Root()
186 | 		attributes := root.Attributes()
187 | 
188 | 		if len(attributes) != 2 || attributes["myname"].String() != "ff" {
189 | 			fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
190 | 			t.Error("root's attributes do not match")
191 | 		}
192 | 
193 | 		child := root.FirstChild()
194 | 		childAttributes := child.Attributes()
195 | 
196 | 		if len(childAttributes) != 1 || childAttributes["class"].String() != "shine" {
197 | 			t.Error("child's attributes do not match")
198 | 		}
199 | 	}
200 | 
201 | 	RunTest(t, "node", "attributes", testLogic)
202 | 
203 | }
204 | 
205 | func BenchmarkAttributes(b *testing.B) {
206 | 	benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
207 | 
208 | 		root := doc.Root()
209 | 
210 | 		for i := 0; i < b.N; i++ {
211 | 			root.SetAttr("garfield", "spaghetti")
212 | 		}
213 | 	}
214 | 
215 | 	RunBenchmark(b, "node", "attributes", benchmarkLogic)
216 | }
217 | 
218 | func TestInner(t *testing.T) {
219 | 
220 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
221 | 		root := doc.Root()
222 | 		root.SetInnerHtml("<bar></bar><baz></baz>")
223 | 	}
224 | 
225 | 	RunTest(t, "node", "inner", testLogic)
226 | }
227 | func TestInnerWithAttributes(t *testing.T) {
228 | 
229 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
230 | 		root := doc.Root()
231 | 		root.SetInnerHtml("<bar give='me' something='good' to='eat'></bar>")
232 | 	}
233 | 
234 | 	RunTest(t, "node", "inner_with_attributes", testLogic)
235 | }
236 | 
237 | func TestSetNamespace(t *testing.T) {
238 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
239 | 		root := doc.Root()
240 | 		root.SetNamespace("foo", "bar")
241 | 	}
242 | 
243 | 	RunTest(t, "node", "set_namespace", testLogic)
244 | }
245 | 
246 | func TestSetDefaultNamespace(t *testing.T) {
247 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
248 | 		root := doc.Root()
249 | 		root.SetNamespace("", "bar")
250 | 	}
251 | 
252 | 	RunTest(t, "node", "set_default_namespace", testLogic)
253 | }
254 | 
255 | func TestDeclareNamespace(t *testing.T) {
256 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
257 | 		root := doc.Root()
258 | 		root.DeclareNamespace("foo", "bar")
259 | 		child := root.FirstChild()
260 | 		child.SetNamespace("foo", "bar")
261 | 	}
262 | 
263 | 	RunTest(t, "node", "declare_namespace", testLogic)
264 | }
265 | 
266 | func TestNamespaceAttribute(t *testing.T) {
267 | 	testLogic := func(t *testing.T, doc *XmlDocument) {
268 | 		root := doc.Root()
269 | 		root.DeclareNamespace("foo", "bar")
270 | 		root.SetNsAttr("bar", "hello", "world")
271 | 	}
272 | 
273 | 	RunTest(t, "node", "set_ns_attr", testLogic)
274 | }
275 | 
276 | func TestUnformattedXml(t *testing.T) {
277 | 	xml := "<?xml version=\"1.0\"?>\n<foo>\n\t<bar>Test</bar>\n</foo>"
278 | 	expected := "<foo>\n\t<bar>Test</bar>\n</foo>"
279 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
280 | 	root := doc.Root()
281 | 	out := root.ToUnformattedXml()
282 | 	if out != expected {
283 | 		t.Errorf("TestUnformattedXml Expected: %v\nActual: %v", expected, out)
284 | 	}
285 | 
286 | }
287 | 
288 | func TestSerializewithFomat(t *testing.T) {
289 | 	xml := "<?xml version=\"1.0\"?>\n<foo>\n\t<bar>Test</bar>\n</foo>"
290 | 	expected := "<foo>\n\t<bar>Test</bar>\n</foo>"
291 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
292 | 	root := doc.Root()
293 | 	b, size := root.SerializeWithFormat(XML_SAVE_AS_XML|XML_SAVE_NO_DECL, nil, nil)
294 | 	if b == nil {
295 | 		t.Errorf("SerializeWithFormat Expected: %v\nActual: (nil)", expected)
296 | 		return
297 | 	}
298 | 	out := string(b[:size])
299 | 	if out != expected {
300 | 		t.Errorf("SerializeWithFormat Expected: %v\nActual: %v", expected, out)
301 | 	}
302 | 
303 | }
304 | 
305 | func TestEvalVariableExpr(t *testing.T) {
306 | 	xml := "<foo />"
307 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
308 | 	s := newSimpleVariableScope()
309 | 	root := doc.Root()
310 | 	s.variables["spec"] = "XSLT 1.0"
311 | 	s.variables["number"] = 7
312 | 	v, err := root.EvalXPath("$spec", s)
313 | 	if err != nil {
314 | 		t.Errorf("%v", err)
315 | 	}
316 | 	out := v.(string)
317 | 	if out != "XSLT 1.0" {
318 | 		t.Errorf("TestEvalVariableExpr Expected: %v\nActual: %v", "XSLT 1.0", out)
319 | 	}
320 | }
321 | 
322 | func TestEvalStringExpr(t *testing.T) {
323 | 	xml := "<foo />"
324 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
325 | 	root := doc.Root()
326 | 	v, err := root.EvalXPath("\"Hello\"", nil)
327 | 	if err != nil {
328 | 		t.Errorf("%v", err)
329 | 	}
330 | 	out := v.(string)
331 | 	if out != "Hello" {
332 | 		t.Errorf("TestEvalStringExpr Expected: %v\nActual: %v", "Hello", out)
333 | 	}
334 | }
335 | 
336 | func TestEvalNumericExpr(t *testing.T) {
337 | 	xml := "<foo />"
338 | 	doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
339 | 	root := doc.Root()
340 | 	v, err := root.EvalXPath("7", nil)
341 | 	if err != nil {
342 | 		t.Errorf("%v", err)
343 | 	}
344 | 	out := v.(float64)
345 | 	if out != 7 {
346 | 		t.Errorf("TestEvalNumericExpr Expected: %v\nActual: %v", 7, out)
347 | 	}
348 | }
349 | 
350 | func TestDisableOutputEscaping(t *testing.T) {
351 | 	doc := CreateEmptyDocument(DefaultEncodingBytes, DefaultEncodingBytes)
352 | 	n := doc.CreateTextNode("<br/>")
353 | 
354 | 	//normal usage escapes the output
355 | 	escaped := "&lt;br/&gt;"
356 | 	if n.String() != escaped {
357 | 		t.Errorf("TestDisableOutputEscaping (escaping enabled) Expected: %v\nActual: %v", escaped, n.String())
358 | 	}
359 | 
360 | 	//now we disable the output escaping
361 | 	unescaped := "<br/>"
362 | 	n.DisableOutputEscaping()
363 | 	if n.String() != unescaped {
364 | 		t.Errorf("TestDisableOutputEscaping (escaping disabled) Expected: %v\nActual: %v", unescaped, n.String())
365 | 	}
366 | }
367 | 


--------------------------------------------------------------------------------
/css/css.go:
--------------------------------------------------------------------------------
  1 | package css
  2 | 
  3 | // package main
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"github.com/moovweb/rubex"
  8 | 	"strings"
  9 | )
 10 | 
 11 | type Lexeme int
 12 | 
 13 | const (
 14 | 	SPACES Lexeme = iota
 15 | 	COMMA
 16 | 	UNIVERSAL
 17 | 	TYPE
 18 | 	ELEMENT
 19 | 	CLASS
 20 | 	ID
 21 | 	LBRACKET
 22 | 	RBRACKET
 23 | 	ATTR_NAME
 24 | 	ATTR_VALUE
 25 | 	EQUALS
 26 | 	CONTAINS_CLASS
 27 | 	DASH_PREFIXED
 28 | 	STARTS_WITH
 29 | 	ENDS_WITH
 30 | 	CONTAINS
 31 | 	MATCH_OP
 32 | 	PSEUDO_CLASS
 33 | 	FIRST_CHILD
 34 | 	FIRST_OF_TYPE
 35 | 	NTH_CHILD
 36 | 	NTH_OF_TYPE
 37 | 	ONLY_CHILD
 38 | 	ONLY_OF_TYPE
 39 | 	LAST_CHILD
 40 | 	LAST_OF_TYPE
 41 | 	NOT
 42 | 	LPAREN
 43 | 	RPAREN
 44 | 	COEFFICIENT
 45 | 	SIGNED
 46 | 	UNSIGNED
 47 | 	ODD
 48 | 	EVEN
 49 | 	N
 50 | 	OPERATOR
 51 | 	PLUS
 52 | 	MINUS
 53 | 	BINOMIAL
 54 | 	ADJACENT_TO
 55 | 	PRECEDES
 56 | 	PARENT_OF
 57 | 	ANCESTOR_OF
 58 | 	// and a counter ... I can't believe I didn't think of this sooner
 59 | 	NUM_LEXEMES
 60 | )
 61 | 
 62 | var pattern [NUM_LEXEMES]string
 63 | 
 64 | func init() {
 65 | 	pattern[SPACES] = `\s+`
 66 | 	pattern[COMMA] = `\s*,`
 67 | 	pattern[UNIVERSAL] = `\*`
 68 | 	pattern[TYPE] = `[_a-zA-Z]\w*`
 69 | 	pattern[ELEMENT] = `(\*|[_a-zA-Z]\w*)`
 70 | 	pattern[CLASS] = `\.[-\w]+`
 71 | 	pattern[ID] = `\#[-\w]+`
 72 | 	pattern[LBRACKET] = `\[`
 73 | 	pattern[RBRACKET] = `\]`
 74 | 	pattern[ATTR_NAME] = `[-_:a-zA-Z][-\w:.]*`
 75 | 	pattern[ATTR_VALUE] = `("(\\.|[^"\\])*"|'(\\.|[^'\\])*')`
 76 | 	pattern[EQUALS] = `=`
 77 | 	pattern[CONTAINS_CLASS] = `~=`
 78 | 	pattern[DASH_PREFIXED] = `\|=`
 79 | 	pattern[STARTS_WITH] = `\^=`
 80 | 	pattern[ENDS_WITH] = `\$=`
 81 | 	pattern[CONTAINS] = `\*=`
 82 | 	pattern[MATCH_OP] = "(" + strings.Join([]string{pattern[EQUALS], pattern[CONTAINS_CLASS], pattern[DASH_PREFIXED], pattern[STARTS_WITH], pattern[ENDS_WITH], pattern[CONTAINS]}, "|") + ")"
 83 | 	pattern[PSEUDO_CLASS] = `:[-a-z]+`
 84 | 	pattern[FIRST_CHILD] = `:first-child`
 85 | 	pattern[FIRST_OF_TYPE] = `:first-of-type`
 86 | 	pattern[NTH_CHILD] = `:nth-child`
 87 | 	pattern[NTH_OF_TYPE] = `:nth-of-type`
 88 | 	pattern[ONLY_CHILD] = `:only-child`
 89 | 	pattern[ONLY_OF_TYPE] = `:only-of-type`
 90 | 	pattern[LAST_CHILD] = `:last-child`
 91 | 	pattern[LAST_OF_TYPE] = `:last-of-type`
 92 | 	pattern[NOT] = `:not`
 93 | 	pattern[LPAREN] = `\s*\(`
 94 | 	pattern[RPAREN] = `\s*\)`
 95 | 	pattern[COEFFICIENT] = `[-+]?(\d+)?`
 96 | 	pattern[SIGNED] = `[-+]?\d+`
 97 | 	pattern[UNSIGNED] = `\d+`
 98 | 	pattern[ODD] = `odd`
 99 | 	pattern[EVEN] = `even`
100 | 	pattern[N] = `[nN]`
101 | 	pattern[OPERATOR] = `[-+]`
102 | 	pattern[PLUS] = `\+`
103 | 	pattern[MINUS] = `-`
104 | 	pattern[BINOMIAL] = strings.Join([]string{pattern[COEFFICIENT], pattern[N], `\s*`, pattern[OPERATOR], `\s*`, pattern[UNSIGNED]}, "")
105 | 	pattern[ADJACENT_TO] = `\s*\+`
106 | 	pattern[PRECEDES] = `\s*~`
107 | 	pattern[PARENT_OF] = `\s*>`
108 | 	pattern[ANCESTOR_OF] = `\s+`
109 | }
110 | 
111 | type Scope int
112 | 
113 | const (
114 | 	GLOBAL Scope = iota
115 | 	LOCAL
116 | )
117 | 
118 | func Convert(css string, scope Scope) string {
119 | 	matchers := allocate()
120 | 	defer deallocate(matchers)
121 | 	xpath, _ := selectors(matchers, []byte(css), scope)
122 | 	return xpath
123 | }
124 | 
125 | func allocate() []*rubex.Regexp {
126 | 	// some overlap in here, but it'll make the parsing functions clearer
127 | 	matchers := make([]*rubex.Regexp, 0, NUM_LEXEMES)
128 | 	for _, p := range pattern {
129 | 		matchers = append(matchers, rubex.MustCompile(`\A`+p))
130 | 	}
131 | 	return matchers
132 | }
133 | 
134 | func deallocate(matchers []*rubex.Regexp) {
135 | 	for _, m := range matchers {
136 | 		m.Free()
137 | 	}
138 | }
139 | 
140 | func selectors(matchers []*rubex.Regexp, input []byte, scope Scope) (string, []byte) {
141 | 	x, input := selector(matchers, input, scope)
142 | 	xs := []string{x}
143 | 	for peek(matchers, COMMA, input) {
144 | 		_, input = token(matchers, COMMA, input)
145 | 		x, input = selector(matchers, input, scope)
146 | 		xs = append(xs, x)
147 | 	}
148 | 	return strings.Join(xs, " | "), input
149 | }
150 | 
151 | func selector(matchers []*rubex.Regexp, input []byte, scope Scope) (string, []byte) {
152 | 	var combinator Lexeme
153 | 	var xs []string
154 | 	if scope == LOCAL {
155 | 		xs = []string{"."}
156 | 	}
157 | 	if matched, remainder := token(matchers, PARENT_OF, input); matched != nil {
158 | 		combinator, input = PARENT_OF, remainder
159 | 	} else {
160 | 		combinator = ANCESTOR_OF
161 | 	}
162 | 	x, input := sequence(matchers, input, combinator)
163 | 	xs = append(xs, x)
164 | 	for {
165 | 		if matched, remainder := token(matchers, ADJACENT_TO, input); matched != nil {
166 | 			combinator, input = ADJACENT_TO, remainder
167 | 		} else if matched, remainder := token(matchers, PRECEDES, input); matched != nil {
168 | 			combinator, input = PRECEDES, remainder
169 | 		} else if matched, remainder := token(matchers, PARENT_OF, input); matched != nil {
170 | 			combinator, input = PARENT_OF, remainder
171 | 		} else if matched, remainder := token(matchers, ANCESTOR_OF, input); matched != nil {
172 | 			combinator, input = ANCESTOR_OF, remainder
173 | 		} else {
174 | 			break
175 | 		}
176 | 		x, input = sequence(matchers, input, combinator)
177 | 		xs = append(xs, x)
178 | 	}
179 | 	return strings.Join(xs, ""), input
180 | }
181 | 
182 | func sequence(matchers []*rubex.Regexp, input []byte, combinator Lexeme) (string, []byte) {
183 | 	_, input = token(matchers, SPACES, input)
184 | 	x, ps := "", []string{}
185 | 
186 | 	switch combinator {
187 | 	case ANCESTOR_OF:
188 | 		x = "/descendant-or-self::*/*"
189 | 	case PARENT_OF:
190 | 		x = "/child::*"
191 | 	case PRECEDES:
192 | 		x = "/following-sibling::*"
193 | 	case ADJACENT_TO:
194 | 		x = "/following-sibling::*"
195 | 		ps = append(ps, "position()=1")
196 | 	}
197 | 
198 | 	if e, remainder := token(matchers, ELEMENT, input); e != nil {
199 | 		input = remainder
200 | 		if len(ps) > 0 {
201 | 			ps = append(ps, " and ")
202 | 		}
203 | 		ps = append(ps, "self::"+string(e))
204 | 		if !(peek(matchers, ID, input) || peek(matchers, CLASS, input) || peek(matchers, PSEUDO_CLASS, input) || peek(matchers, LBRACKET, input)) {
205 | 			pstr := strings.Join(ps, "")
206 | 			if pstr != "" {
207 | 				pstr = fmt.Sprintf("[%s]", pstr)
208 | 			}
209 | 			return x + pstr, input
210 | 		}
211 | 	}
212 | 	q, input, connective := qualifier(matchers, input)
213 | 	if q == "" {
214 | 		panic("Invalid CSS selector")
215 | 	}
216 | 	if len(ps) > 0 {
217 | 		ps = append(ps, connective)
218 | 	}
219 | 	ps = append(ps, q)
220 | 	for q, r, c := qualifier(matchers, input); q != ""; q, r, c = qualifier(matchers, input) {
221 | 		ps, input = append(ps, c, q), r
222 | 	}
223 | 	pstr := strings.Join(ps, "")
224 | 	if combinator != NOT {
225 | 		pstr = fmt.Sprintf("[%s]", pstr)
226 | 	}
227 | 	return x + pstr, input
228 | }
229 | 
230 | func qualifier(matchers []*rubex.Regexp, input []byte) (string, []byte, string) {
231 | 	p, connective := "", ""
232 | 	if t, remainder := token(matchers, CLASS, input); t != nil {
233 | 		p = fmt.Sprintf(`contains(concat(" ", @class, " "), " %s ")`, string(t[1:]))
234 | 		input = remainder
235 | 		connective = " and "
236 | 	} else if t, remainder := token(matchers, ID, input); t != nil {
237 | 		p, input, connective = fmt.Sprintf(`@id="%s"`, string(t[1:])), remainder, " and "
238 | 	} else if peek(matchers, PSEUDO_CLASS, input) {
239 | 		p, input, connective = pseudoClass(matchers, input)
240 | 	} else if peek(matchers, LBRACKET, input) {
241 | 		p, input = attribute(matchers, input)
242 | 		connective = " and "
243 | 	}
244 | 	return p, input, connective
245 | }
246 | 
247 | func pseudoClass(matchers []*rubex.Regexp, input []byte) (string, []byte, string) {
248 | 	class, input := token(matchers, PSEUDO_CLASS, input)
249 | 	var p, connective string
250 | 	switch string(class) {
251 | 	case ":first-child":
252 | 		p, connective = "position()=1", " and "
253 | 	case ":first-of-type":
254 | 		p, connective = "position()=1", "]["
255 | 	case ":last-child":
256 | 		p, connective = "position()=last()", " and "
257 | 	case ":last-of-type":
258 | 		p, connective = "position()=last()", "]["
259 | 	case ":only-child":
260 | 		p, connective = "position() = 1 and position() = last()", " and "
261 | 	case ":only-of-type":
262 | 		p, connective = "position() = 1 and position() = last()", "]["
263 | 	case ":nth-child":
264 | 		p, input = nth(matchers, input)
265 | 		connective = " and "
266 | 	case ":nth-of-type":
267 | 		p, input = nth(matchers, input)
268 | 		connective = "]["
269 | 	case ":not":
270 | 		p, input = negate(matchers, input)
271 | 		connective = " and "
272 | 	default:
273 | 		panic(`Cannot convert CSS pseudo-class "` + string(class) + `" to XPath.`)
274 | 	}
275 | 	return p, input, connective
276 | }
277 | 
278 | func nth(matchers []*rubex.Regexp, input []byte) (string, []byte) {
279 | 	lparen, input := token(matchers, LPAREN, input)
280 | 	if lparen == nil {
281 | 		panic(":nth-child and :nth-of-type require an parenthesized argument")
282 | 	}
283 | 	_, input = token(matchers, SPACES, input)
284 | 	var expr string
285 | 	if e, rem := token(matchers, EVEN, input); e != nil {
286 | 		expr, input = "position() mod 2 = 0", rem
287 | 	} else if e, rem := token(matchers, ODD, input); e != nil {
288 | 		expr, input = "position() mod 2 = 1", rem
289 | 	} else if e, _ := token(matchers, BINOMIAL, input); e != nil {
290 | 		var coefficient, operator, constant []byte
291 | 		coefficient, input = token(matchers, COEFFICIENT, input)
292 | 		switch string(coefficient) {
293 | 		case "", "+":
294 | 			coefficient = []byte("1")
295 | 		case "-":
296 | 			coefficient = []byte("-1")
297 | 		}
298 | 		_, input = token(matchers, N, input)
299 | 		_, input = token(matchers, SPACES, input)
300 | 		operator, input = token(matchers, OPERATOR, input)
301 | 		_, input = token(matchers, SPACES, input)
302 | 		constant, input = token(matchers, UNSIGNED, input)
303 | 		expr = fmt.Sprintf("(position() %s %s) mod %s = 0", invert(string(operator)), string(constant), string(coefficient))
304 | 	} else if e, rem := token(matchers, SIGNED, input); e != nil {
305 | 		expr, input = "position() = "+string(e), rem
306 | 	} else {
307 | 		panic("Invalid argument to :nth-child or :nth-of-type.")
308 | 	}
309 | 	fmt.Println(string(input))
310 | 	_, input = token(matchers, SPACES, input)
311 | 	rparen, input := token(matchers, RPAREN, input)
312 | 	if rparen == nil {
313 | 		panic("Unterminated argument to :nth-child or :nth-of-type.")
314 | 	}
315 | 	return expr, input
316 | }
317 | 
318 | func invert(op string) string {
319 | 	op = strings.TrimSpace(op)
320 | 	if op == "+" {
321 | 		op = "-"
322 | 	} else {
323 | 		op = "+"
324 | 	}
325 | 	return op
326 | }
327 | 
328 | func negate(matchers []*rubex.Regexp, input []byte) (string, []byte) {
329 | 	_, input = token(matchers, SPACES, input)
330 | 	lparen, input := token(matchers, LPAREN, input)
331 | 	if lparen == nil {
332 | 		panic(":not requires a parenthesized argument.")
333 | 	}
334 | 	_, input = token(matchers, SPACES, input)
335 | 	p, input := sequence(matchers, input, NOT)
336 | 	_, input = token(matchers, SPACES, input)
337 | 	rparen, input := token(matchers, RPAREN, input)
338 | 	if rparen == nil {
339 | 		panic("Unterminated argument to :not.")
340 | 	}
341 | 	return fmt.Sprintf("not(%s)", p), input
342 | }
343 | 
344 | func attribute(matchers []*rubex.Regexp, input []byte) (string, []byte) {
345 | 	_, input = token(matchers, LBRACKET, input)
346 | 	_, input = token(matchers, SPACES, input)
347 | 	name, input := token(matchers, ATTR_NAME, input)
348 | 	if name == nil {
349 | 		panic("Attribute selector requires an attribute name.")
350 | 	}
351 | 	_, input = token(matchers, SPACES, input)
352 | 	if rbracket, remainder := token(matchers, RBRACKET, input); rbracket != nil {
353 | 		return "@" + string(name), remainder
354 | 	}
355 | 	op, input := token(matchers, MATCH_OP, input)
356 | 	if op == nil {
357 | 		panic("Missing operator in attribute selector.")
358 | 	}
359 | 	_, input = token(matchers, SPACES, input)
360 | 	val, input := token(matchers, ATTR_VALUE, input)
361 | 	if val == nil {
362 | 		panic("Missing value in attribute selector.")
363 | 	}
364 | 	_, input = token(matchers, SPACES, input)
365 | 	rbracket, input := token(matchers, RBRACKET, input)
366 | 	if rbracket == nil {
367 | 		panic("Unterminated attribute selector.")
368 | 	}
369 | 	var expr string
370 | 	n, v := string(name), string(val)
371 | 	switch string(op) {
372 | 	case "=":
373 | 		expr = fmt.Sprintf("@%s=%s", n, v)
374 | 	case "~=":
375 | 		expr = fmt.Sprintf(`contains(concat(" ", @%s, " "), concat(" ", %s, " "))`, n, v)
376 | 	case "|=":
377 | 		expr = fmt.Sprintf(`(@%s=%s or starts-with(@%s, concat(%s, "-")))`, n, v, n, v)
378 | 	case "^=":
379 | 		expr = fmt.Sprintf("starts-with(@%s, %s)", n, v)
380 | 	case "$=":
381 | 		// oy, libxml doesn't support ends-with
382 | 		// generate something like: div[substring(@class, string-length(@class) - string-length('foo') + 1) = 'foo']
383 | 		expr = fmt.Sprintf("substring(@%s, string-length(@%s) - string-length(%s) + 1) = %s", n, n, v, v)
384 | 	case "*=":
385 | 		expr = fmt.Sprintf("contains(@%s, %s)", n, v)
386 | 	}
387 | 	return expr, input
388 | }
389 | 
390 | func token(matchers []*rubex.Regexp, lexeme Lexeme, input []byte) ([]byte, []byte) {
391 | 	matched := matchers[lexeme].Find(input)
392 | 	length := len(matched)
393 | 	if length == 0 {
394 | 		matched = nil
395 | 	}
396 | 	return matched, input[length:]
397 | }
398 | 
399 | func peek(matchers []*rubex.Regexp, lexeme Lexeme, input []byte) bool {
400 | 	matched, _ := token(matchers, lexeme, input)
401 | 	return matched != nil
402 | }
403 | 


--------------------------------------------------------------------------------
/xml/document.go:
--------------------------------------------------------------------------------
  1 | package xml
  2 | 
  3 | /*
  4 | #cgo pkg-config: libxml-2.0
  5 | 
  6 | #include "helper.h"
  7 | */
  8 | import "C"
  9 | 
 10 | import (
 11 | 	"errors"
 12 | 	"github.com/moovweb/gokogiri/help"
 13 | 	. "github.com/moovweb/gokogiri/util"
 14 | 	"github.com/moovweb/gokogiri/xpath"
 15 | 	"os"
 16 | 	"unsafe"
 17 | )
 18 | 
 19 | type Document interface {
 20 | 	/* Nokogiri APIs */
 21 | 	CreateElementNode(string) *ElementNode
 22 | 	CreateCDataNode(string) *CDataNode
 23 | 	CreateTextNode(string) *TextNode
 24 | 	CreateCommentNode(string) *CommentNode
 25 | 	CreatePINode(string, string) *ProcessingInstructionNode
 26 | 	ParseFragment([]byte, []byte, ParseOption) (*DocumentFragment, error)
 27 | 
 28 | 	DocPtr() unsafe.Pointer
 29 | 	DocType() NodeType
 30 | 	DocRef() Document
 31 | 	InputEncoding() []byte
 32 | 	OutputEncoding() []byte
 33 | 	DocXPathCtx() *xpath.XPath
 34 | 	AddUnlinkedNode(unsafe.Pointer)
 35 | 	RemoveUnlinkedNode(unsafe.Pointer) bool
 36 | 	Free()
 37 | 	String() string
 38 | 	Root() *ElementNode
 39 | 	NodeById(string) *ElementNode
 40 | 	BookkeepFragment(*DocumentFragment)
 41 | 
 42 | 	RecursivelyRemoveNamespaces() error
 43 | 	UnparsedEntityURI(string) string
 44 | 	Uri() string
 45 | }
 46 | 
 47 | // ParseOption values allow you to tune the behaviour of the parsing engine.
 48 | type ParseOption int
 49 | 
 50 | const (
 51 | 	XML_PARSE_RECOVER    ParseOption = 1 << iota // recover on errors
 52 | 	XML_PARSE_NOENT                              // substitute entities
 53 | 	XML_PARSE_DTDLOAD                            // load the external subset
 54 | 	XML_PARSE_DTDATTR                            // default DTD attributes
 55 | 	XML_PARSE_DTDVALID                           // validate with the DTD
 56 | 	XML_PARSE_NOERROR                            // suppress error reports
 57 | 	XML_PARSE_NOWARNING                          // suppress warning reports
 58 | 	XML_PARSE_PEDANTIC                           // pedantic error reporting
 59 | 	XML_PARSE_NOBLANKS                           // remove blank nodes
 60 | 	XML_PARSE_SAX1                               // use the SAX1 interface internally
 61 | 	XML_PARSE_XINCLUDE                           // Implement XInclude substitition
 62 | 	XML_PARSE_NONET                              // Forbid network access
 63 | 	XML_PARSE_NODICT                             // Do not reuse the context dictionnary
 64 | 	XML_PARSE_NSCLEAN                            // remove redundant namespaces declarations
 65 | 	XML_PARSE_NOCDATA                            // merge CDATA as text nodes
 66 | 	XML_PARSE_NOXINCNODE                         // do not generate XINCLUDE START/END nodes
 67 | 	XML_PARSE_COMPACT                            // compact small text nodes; makes tree read-only
 68 | 	XML_PARSE_OLD10                              // parse using XML-1.0 before update 5
 69 | 	XML_PARSE_NOBASEFIX                          // do not fixup XINCLUDE xml//base uris
 70 | 	XML_PARSE_HUGE                               // relax any hardcoded limit from the parser
 71 | 	XML_PARSE_OLDSAX                             // parse using SAX2 interface before 2.7.0
 72 | 	XML_PARSE_IGNORE_ENC                         // ignore internal document encoding hint
 73 | 	XML_PARSE_BIG_LINES                          // Store big lines numbers in text PSVI field
 74 | )
 75 | 
 76 | //DefaultParseOption provides liberal parsing highly tolerant of invalid documents. Errors and warnings
 77 | // are suppressed and the DTD is not processed.
 78 | const DefaultParseOption ParseOption = XML_PARSE_RECOVER |
 79 | 	XML_PARSE_NONET |
 80 | 	XML_PARSE_NOERROR |
 81 | 	XML_PARSE_NOWARNING
 82 | 
 83 | //StrictParseOption provides standard-compliant parsing. The DTD is processed, entity
 84 | // substitions are made, and errors and warnings are reported back.
 85 | const StrictParseOption ParseOption = XML_PARSE_NOENT |
 86 | 	XML_PARSE_DTDLOAD |
 87 | 	XML_PARSE_DTDATTR |
 88 | 	XML_PARSE_NOCDATA
 89 | 
 90 | //DefaultEncoding is UTF-8, which is also the default for both libxml2 and Go strings.
 91 | const DefaultEncoding = "utf-8"
 92 | 
 93 | var ERR_FAILED_TO_PARSE_XML = errors.New("failed to parse xml input")
 94 | 
 95 | /*
 96 | XmlDocument is the primary interface for working with XML documents.
 97 | */
 98 | type XmlDocument struct {
 99 | 	Ptr *C.xmlDoc
100 | 	Me  Document
101 | 	Node
102 | 	InEncoding    []byte
103 | 	OutEncoding   []byte
104 | 	UnlinkedNodes map[*C.xmlNode]bool
105 | 	XPathCtx      *xpath.XPath
106 | 	Type          NodeType
107 | 	InputLen      int
108 | 
109 | 	fragments []*DocumentFragment //save the pointers to free them when the doc is freed
110 | }
111 | 
112 | //DefaultEncodingBytes allows us to conveniently pass the DefaultEncoding to various functions that
113 | // expect the encoding as a byte array.
114 | var DefaultEncodingBytes = []byte(DefaultEncoding)
115 | 
116 | const initialFragments = 2
117 | 
118 | //NewDocument wraps the pointer to the C struct.
119 | //
120 | // TODO: this should probably not be exported.
121 | func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *XmlDocument) {
122 | 	inEncoding = AppendCStringTerminator(inEncoding)
123 | 	outEncoding = AppendCStringTerminator(outEncoding)
124 | 
125 | 	xmlNode := &XmlNode{Ptr: (*C.xmlNode)(p)}
126 | 	docPtr := (*C.xmlDoc)(p)
127 | 	doc = &XmlDocument{Ptr: docPtr, Node: xmlNode, InEncoding: inEncoding, OutEncoding: outEncoding, InputLen: contentLen}
128 | 	doc.UnlinkedNodes = make(map[*C.xmlNode]bool)
129 | 	doc.XPathCtx = xpath.NewXPath(p)
130 | 	doc.Type = xmlNode.NodeType()
131 | 	doc.fragments = make([]*DocumentFragment, 0, initialFragments)
132 | 	doc.Me = doc
133 | 	xmlNode.Document = doc
134 | 	//runtime.SetFinalizer(doc, (*XmlDocument).Free)
135 | 	return
136 | }
137 | 
138 | // Parse creates an XmlDocument from some pre-existing content where the input encoding is known. Byte arrays created from
139 | // a Go string are utf-8 encoded (you can pass DefaultEncodingBytes in this scenario).
140 | //
141 | // If you want to build up a document programatically, calling CreateEmptyDocument and building it up using the xml.Node
142 | // interface is a better approach than building a string and calling Parse.
143 | //
144 | // If you have an XML file, then ReadFile will automatically determine the encoding according to the XML specification.
145 | func Parse(content, inEncoding, url []byte, options ParseOption, outEncoding []byte) (doc *XmlDocument, err error) {
146 | 	inEncoding = AppendCStringTerminator(inEncoding)
147 | 	outEncoding = AppendCStringTerminator(outEncoding)
148 | 
149 | 	var docPtr *C.xmlDoc
150 | 	contentLen := len(content)
151 | 
152 | 	if contentLen > 0 {
153 | 		var contentPtr, urlPtr, encodingPtr unsafe.Pointer
154 | 		contentPtr = unsafe.Pointer(&content[0])
155 | 
156 | 		if len(url) > 0 {
157 | 			url = AppendCStringTerminator(url)
158 | 			urlPtr = unsafe.Pointer(&url[0])
159 | 		}
160 | 		if len(inEncoding) > 0 {
161 | 			encodingPtr = unsafe.Pointer(&inEncoding[0])
162 | 		}
163 | 
164 | 		docPtr = C.xmlParse(contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0)
165 | 
166 | 		if docPtr == nil {
167 | 			err = ERR_FAILED_TO_PARSE_XML
168 | 		} else {
169 | 			doc = NewDocument(unsafe.Pointer(docPtr), contentLen, inEncoding, outEncoding)
170 | 		}
171 | 
172 | 	} else {
173 | 		doc = CreateEmptyDocument(inEncoding, outEncoding)
174 | 	}
175 | 	return
176 | }
177 | 
178 | // ReadFile loads an XmlDocument from a filename. The encoding declared in the document will be
179 | // used as the input encoding. If no encoding is declared, the library will use the alogrithm
180 | // in the XML standard to determine if the document is encoded with UTF-8 or UTF-16.
181 | func ReadFile(filename string, options ParseOption) (doc *XmlDocument, err error) {
182 | 	// verify the file exists and can be read before we invoke C API
183 | 	_, err = os.Stat(filename)
184 | 	if err != nil {
185 | 		return
186 | 	}
187 | 
188 | 	dataBytes := GetCString([]byte(filename))
189 | 	dataPtr := unsafe.Pointer(&dataBytes[0])
190 | 	var docPtr *C.xmlDoc
191 | 	docPtr = C.xmlReadFile((*C.char)(dataPtr), nil, C.int(options))
192 | 	if docPtr == nil {
193 | 		err = ERR_FAILED_TO_PARSE_XML
194 | 	} else {
195 | 		var encoding []byte
196 | 		// capture the detected input encoding
197 | 		p := docPtr.encoding
198 | 		if p != nil {
199 | 			encoding = []byte(C.GoString((*C.char)(unsafe.Pointer(p))))
200 | 		}
201 | 		doc = NewDocument(unsafe.Pointer(docPtr), 0, encoding, DefaultEncodingBytes)
202 | 	}
203 | 	return
204 | }
205 | 
206 | // Create an empty XML document and return an XmlDocument. The root element, along with
207 | // any top-level comments or processing instructions, can be added by calling
208 | // AddChild() on the document itself.
209 | func CreateEmptyDocument(inEncoding, outEncoding []byte) (doc *XmlDocument) {
210 | 	help.LibxmlInitParser()
211 | 	docPtr := C.newEmptyXmlDoc()
212 | 	doc = NewDocument(unsafe.Pointer(docPtr), 0, inEncoding, outEncoding)
213 | 	return
214 | }
215 | 
216 | // DocPtr provides access to the libxml2 structure underlying the document.
217 | func (document *XmlDocument) DocPtr() (ptr unsafe.Pointer) {
218 | 	ptr = unsafe.Pointer(document.Ptr)
219 | 	return
220 | }
221 | 
222 | // DocType returns one of the node type constants, usually XML_DOCUMENT_NODE. This
223 | // may be of use if you are working with the C API.
224 | func (document *XmlDocument) DocType() (t NodeType) {
225 | 	t = document.Type
226 | 	return
227 | }
228 | 
229 | // DocRef returns the embedded Document interface.
230 | func (document *XmlDocument) DocRef() (d Document) {
231 | 	d = document.Me
232 | 	return
233 | }
234 | 
235 | // InputEncoding is the original encoding of the document.
236 | func (document *XmlDocument) InputEncoding() (encoding []byte) {
237 | 	encoding = document.InEncoding
238 | 	return
239 | }
240 | 
241 | // OutputEncoding is the encoding that will be used when the document is written out.
242 | // This can be overridden by explicitly specifying an encoding as an argument to any of the
243 | // output functions.
244 | func (document *XmlDocument) OutputEncoding() (encoding []byte) {
245 | 	encoding = document.OutEncoding
246 | 	return
247 | }
248 | 
249 | // Returns an XPath context that can be used to compile and evaluate XPath
250 | // expressions.
251 | //
252 | // In most cases, you should call the Search or EvalXPath functions instead of
253 | // handling the context directly.
254 | func (document *XmlDocument) DocXPathCtx() (ctx *xpath.XPath) {
255 | 	ctx = document.XPathCtx
256 | 	return
257 | }
258 | 
259 | func (document *XmlDocument) AddUnlinkedNode(nodePtr unsafe.Pointer) {
260 | 	p := (*C.xmlNode)(nodePtr)
261 | 	document.UnlinkedNodes[p] = true
262 | }
263 | 
264 | func (document *XmlDocument) RemoveUnlinkedNode(nodePtr unsafe.Pointer) bool {
265 | 	p := (*C.xmlNode)(nodePtr)
266 | 	if document.UnlinkedNodes[p] {
267 | 		delete(document.UnlinkedNodes, p)
268 | 		return true
269 | 	}
270 | 	return false
271 | }
272 | 
273 | func (document *XmlDocument) BookkeepFragment(fragment *DocumentFragment) {
274 | 	document.fragments = append(document.fragments, fragment)
275 | }
276 | 
277 | // Root returns the root node of the document. Newly created documents do not
278 | // have a root node until an element node is added a child of the document.
279 | //
280 | // Documents that have multiple root nodes are invalid adn the behaviour is
281 | // not well defined.
282 | func (document *XmlDocument) Root() (element *ElementNode) {
283 | 	nodePtr := C.xmlDocGetRootElement(document.Ptr)
284 | 	if nodePtr != nil {
285 | 		element = NewNode(unsafe.Pointer(nodePtr), document).(*ElementNode)
286 | 	}
287 | 	return
288 | }
289 | 
290 | // Get an element node by the value of its ID attribute. By convention this attribute
291 | // is named id, but the actual name of the attribute is set by the document's DTD or schema.
292 | //
293 | // The value for an ID attribute is guaranteed to be unique within a valid document.
294 | func (document *XmlDocument) NodeById(id string) (element *ElementNode) {
295 | 	dataBytes := GetCString([]byte(id))
296 | 	dataPtr := unsafe.Pointer(&dataBytes[0])
297 | 	nodePtr := C.xmlGetID(document.Ptr, (*C.xmlChar)(dataPtr))
298 | 	if nodePtr != nil {
299 | 		idattr := NewNode(unsafe.Pointer(nodePtr), document).(*AttributeNode)
300 | 		element = idattr.Parent().(*ElementNode)
301 | 	}
302 | 	return
303 | }
304 | 
305 | /*
306 | CreateElementNode creates an element node with the specified tag name. It can be
307 | added as a child of any other element, or as a child of the document itself.
308 | 
309 | Use SetNamespace if the element node needs to be in a namespace.
310 | 
311 | Note that valid documents have only one child element, referred to as the root node.
312 | */
313 | func (document *XmlDocument) CreateElementNode(tag string) (element *ElementNode) {
314 | 	tagBytes := GetCString([]byte(tag))
315 | 	tagPtr := unsafe.Pointer(&tagBytes[0])
316 | 	newNodePtr := C.xmlNewNode(nil, (*C.xmlChar)(tagPtr))
317 | 	newNode := NewNode(unsafe.Pointer(newNodePtr), document)
318 | 	element = newNode.(*ElementNode)
319 | 	return
320 | }
321 | 
322 | //CreateTextNode creates a text node. It can be added as a child of an element.
323 | //
324 | // The data argument is XML-escaped and used as the content of the node.
325 | func (document *XmlDocument) CreateTextNode(data string) (text *TextNode) {
326 | 	dataBytes := GetCString([]byte(data))
327 | 	dataPtr := unsafe.Pointer(&dataBytes[0])
328 | 	nodePtr := C.xmlNewText((*C.xmlChar)(dataPtr))
329 | 	if nodePtr != nil {
330 | 		nodePtr.doc = (*_Ctype_struct__xmlDoc)(document.DocPtr())
331 | 		text = NewNode(unsafe.Pointer(nodePtr), document).(*TextNode)
332 | 	}
333 | 	return
334 | }
335 | 
336 | //CreateCDataNode creates a CDATA node. CDATA nodes can
337 | // only be children of an element.
338 | //
339 | // The data argument will become the content of the newly created node.
340 | func (document *XmlDocument) CreateCDataNode(data string) (cdata *CDataNode) {
341 | 	dataLen := len(data)
342 | 	dataBytes := GetCString([]byte(data))
343 | 	dataPtr := unsafe.Pointer(&dataBytes[0])
344 | 	nodePtr := C.xmlNewCDataBlock(document.Ptr, (*C.xmlChar)(dataPtr), C.int(dataLen))
345 | 	if nodePtr != nil {
346 | 		cdata = NewNode(unsafe.Pointer(nodePtr), document).(*CDataNode)
347 | 	}
348 | 	return
349 | }
350 | 
351 | //CreateCommentNode creates a comment node. Comment nodes can
352 | // be children of an element or of the document itself.
353 | //
354 | // The data argument will become the content of the comment.
355 | func (document *XmlDocument) CreateCommentNode(data string) (comment *CommentNode) {
356 | 	dataBytes := GetCString([]byte(data))
357 | 	dataPtr := unsafe.Pointer(&dataBytes[0])
358 | 	nodePtr := C.xmlNewComment((*C.xmlChar)(dataPtr))
359 | 	if nodePtr != nil {
360 | 		comment = NewNode(unsafe.Pointer(nodePtr), document).(*CommentNode)
361 | 	}
362 | 	return
363 | }
364 | 
365 | //CreatePINode creates a processing instruction node with the specified name and data.
366 | // Processing instruction nodes can be children of an element or of the document itself.
367 | //
368 | // While it's common to use an attribute-like syntax for processing instructions, the data
369 | // is actually an arbitrary string that you will need to generate or parse yourself.
370 | func (document *XmlDocument) CreatePINode(name, data string) (pi *ProcessingInstructionNode) {
371 | 	nameBytes := GetCString([]byte(name))
372 | 	namePtr := unsafe.Pointer(&nameBytes[0])
373 | 	dataBytes := GetCString([]byte(data))
374 | 	dataPtr := unsafe.Pointer(&dataBytes[0])
375 | 	nodePtr := C.xmlNewDocPI(document.Ptr, (*C.xmlChar)(namePtr), (*C.xmlChar)(dataPtr))
376 | 	if nodePtr != nil {
377 | 		pi = NewNode(unsafe.Pointer(nodePtr), document).(*ProcessingInstructionNode)
378 | 	}
379 | 	return
380 | }
381 | 
382 | func (document *XmlDocument) ParseFragment(input, url []byte, options ParseOption) (fragment *DocumentFragment, err error) {
383 | 	root := document.Root()
384 | 	if root == nil {
385 | 		fragment, err = parsefragment(document, nil, input, url, options)
386 | 	} else {
387 | 		fragment, err = parsefragment(document, root.XmlNode, input, url, options)
388 | 	}
389 | 	return
390 | }
391 | 
392 | // Return the value of an NDATA entity declared in the DTD. If there is no such entity or
393 | // the value cannot be encoded as a valid URI, an empty string is returned.
394 | //
395 | // Note that this library assumes you already know the name of entity and does not
396 | // expose any way of getting the list of entities.
397 | func (document *XmlDocument) UnparsedEntityURI(name string) (val string) {
398 | 	if name == "" {
399 | 		return
400 | 	}
401 | 
402 | 	nameBytes := GetCString([]byte(name))
403 | 	namePtr := unsafe.Pointer(&nameBytes[0])
404 | 	entity := C.xmlGetDocEntity(document.Ptr, (*C.xmlChar)(namePtr))
405 | 	if entity == nil {
406 | 		return
407 | 	}
408 | 
409 | 	// unlike entity.content (which returns the raw, unprocessed string value of the entity),
410 | 	// it looks like entity.URI includes any escaping needed to treat the value as a URI.
411 | 	valPtr := unsafe.Pointer(entity.URI)
412 | 	if valPtr == nil {
413 | 		return
414 | 	}
415 | 
416 | 	val = C.GoString((*C.char)(valPtr))
417 | 	return
418 | }
419 | 
420 | // Free the C structures associated with this document.
421 | func (document *XmlDocument) Free() {
422 | 	//must free the xpath context before freeing the fragments or unlinked nodes
423 | 	//otherwise, it causes memory leaks and crashes when dealing with very large documents (a few MB)
424 | 	if document.XPathCtx != nil {
425 | 		document.XPathCtx.Free()
426 | 		document.XPathCtx = nil
427 | 	}
428 | 	//must clear the fragments first
429 | 	//because the nodes are put in the unlinked list
430 | 	if document.fragments != nil {
431 | 		for _, fragment := range document.fragments {
432 | 			fragment.Remove()
433 | 		}
434 | 	}
435 | 	document.fragments = nil
436 | 	var p *C.xmlNode
437 | 	if document.UnlinkedNodes != nil {
438 | 		for p, _ = range document.UnlinkedNodes {
439 | 			C.xmlFreeNode(p)
440 | 		}
441 | 	}
442 | 	document.UnlinkedNodes = nil
443 | 	if document.Ptr != nil {
444 | 		C.xmlFreeDoc(document.Ptr)
445 | 		document.Ptr = nil
446 | 	}
447 | }
448 | 
449 | /* Uri returns the URI of the document - typically this is the filename if ReadFile was used to parse
450 | the document.
451 | */
452 | func (document *XmlDocument) Uri() (val string) {
453 | 	val = C.GoString((*C.char)(unsafe.Pointer(document.Ptr.URL)))
454 | 	return
455 | }
456 | 


--------------------------------------------------------------------------------
/xml/node.go:
--------------------------------------------------------------------------------
   1 | package xml
   2 | 
   3 | //#include "helper.h"
   4 | //#include <string.h>
   5 | import "C"
   6 | 
   7 | import (
   8 | 	"errors"
   9 | 	. "github.com/moovweb/gokogiri/util"
  10 | 	"github.com/moovweb/gokogiri/xpath"
  11 | 	"strconv"
  12 | 	"unsafe"
  13 | )
  14 | 
  15 | var (
  16 | 	ERR_UNDEFINED_COERCE_PARAM               = errors.New("unexpected parameter type in coerce")
  17 | 	ERR_UNDEFINED_SET_CONTENT_PARAM          = errors.New("unexpected parameter type in SetContent")
  18 | 	ERR_UNDEFINED_SEARCH_PARAM               = errors.New("unexpected parameter type in Search")
  19 | 	ERR_CANNOT_MAKE_DUCMENT_AS_CHILD         = errors.New("cannot add a document node as a child")
  20 | 	ERR_CANNOT_COPY_TEXT_NODE_WHEN_ADD_CHILD = errors.New("cannot copy a text node when adding it")
  21 | )
  22 | 
  23 | // NodeType is an enumeration that indicates the type of XmlNode.
  24 | type NodeType int
  25 | 
  26 | const (
  27 | 	XML_ELEMENT_NODE NodeType = iota + 1
  28 | 	XML_ATTRIBUTE_NODE
  29 | 	XML_TEXT_NODE
  30 | 	XML_CDATA_SECTION_NODE
  31 | 	XML_ENTITY_REF_NODE
  32 | 	XML_ENTITY_NODE
  33 | 	XML_PI_NODE
  34 | 	XML_COMMENT_NODE
  35 | 	XML_DOCUMENT_NODE
  36 | 	XML_DOCUMENT_TYPE_NODE
  37 | 	XML_DOCUMENT_FRAG_NODE
  38 | 	XML_NOTATION_NODE
  39 | 	XML_HTML_DOCUMENT_NODE
  40 | 	XML_DTD_NODE
  41 | 	XML_ELEMENT_DECL
  42 | 	XML_ATTRIBUTE_DECL
  43 | 	XML_ENTITY_DECL
  44 | 	XML_NAMESPACE_DECL
  45 | 	XML_XINCLUDE_START
  46 | 	XML_XINCLUDE_END
  47 | 	XML_DOCB_DOCUMENT_NODE
  48 | )
  49 | 
  50 | // SerializationOption is a set of flags used to control how a node is written out.
  51 | type SerializationOption int
  52 | 
  53 | const (
  54 | 	XML_SAVE_FORMAT   SerializationOption = 1 << iota // format save output
  55 | 	XML_SAVE_NO_DECL                                  //drop the xml declaration
  56 | 	XML_SAVE_NO_EMPTY                                 //no empty tags
  57 | 	XML_SAVE_NO_XHTML                                 //disable XHTML1 specific rules
  58 | 	XML_SAVE_XHTML                                    //force XHTML1 specific rules
  59 | 	XML_SAVE_AS_XML                                   //force XML serialization on HTML doc
  60 | 	XML_SAVE_AS_HTML                                  //force HTML serialization on XML doc
  61 | 	XML_SAVE_WSNONSIG                                 //format with non-significant whitespace
  62 | )
  63 | 
  64 | // NamespaceDeclaration represents a namespace declaration, providing both the prefix and the URI of the namespace.
  65 | // It is returned by the DeclaredNamespaces function.
  66 | type NamespaceDeclaration struct {
  67 | 	Prefix string
  68 | 	Uri    string
  69 | }
  70 | 
  71 | type Node interface {
  72 | 	NodePtr() unsafe.Pointer
  73 | 	ResetNodePtr()
  74 | 	MyDocument() Document
  75 | 
  76 | 	IsValid() bool
  77 | 
  78 | 	ParseFragment([]byte, []byte, ParseOption) (*DocumentFragment, error)
  79 | 	LineNumber() int
  80 | 
  81 | 	//
  82 | 	NodeType() NodeType
  83 | 	NextSibling() Node
  84 | 	PreviousSibling() Node
  85 | 
  86 | 	Parent() Node
  87 | 	FirstChild() Node
  88 | 	LastChild() Node
  89 | 	CountChildren() int
  90 | 	Attributes() map[string]*AttributeNode
  91 | 
  92 | 	Coerce(interface{}) ([]Node, error)
  93 | 
  94 | 	AddChild(interface{}) error
  95 | 	AddPreviousSibling(interface{}) error
  96 | 	AddNextSibling(interface{}) error
  97 | 	InsertBefore(interface{}) error
  98 | 	InsertAfter(interface{}) error
  99 | 	InsertBegin(interface{}) error
 100 | 	InsertEnd(interface{}) error
 101 | 	SetInnerHtml(interface{}) error
 102 | 	SetChildren(interface{}) error
 103 | 	Replace(interface{}) error
 104 | 	Wrap(string) error
 105 | 
 106 | 	SetContent(interface{}) error
 107 | 
 108 | 	Name() string
 109 | 	SetName(string)
 110 | 
 111 | 	Attr(string) string
 112 | 	SetAttr(string, string) string
 113 | 	SetNsAttr(string, string, string) string
 114 | 	Attribute(string) *AttributeNode
 115 | 
 116 | 	Path() string
 117 | 
 118 | 	Duplicate(int) Node
 119 | 	DuplicateTo(Document, int) Node
 120 | 
 121 | 	Search(interface{}) ([]Node, error)
 122 | 	SearchWithVariables(interface{}, xpath.VariableScope) ([]Node, error)
 123 | 	EvalXPath(interface{}, xpath.VariableScope) (interface{}, error)
 124 | 	EvalXPathAsBoolean(interface{}, xpath.VariableScope) bool
 125 | 
 126 | 	Unlink()
 127 | 	Remove()
 128 | 	ResetChildren()
 129 | 
 130 | 	SerializeWithFormat(SerializationOption, []byte, []byte) ([]byte, int)
 131 | 	ToXml([]byte, []byte) ([]byte, int)
 132 | 	ToUnformattedXml() string
 133 | 	ToHtml([]byte, []byte) ([]byte, int)
 134 | 	ToBuffer([]byte) []byte
 135 | 	String() string
 136 | 	Content() string
 137 | 	InnerHtml() string
 138 | 
 139 | 	RecursivelyRemoveNamespaces() error
 140 | 	Namespace() string
 141 | 	SetNamespace(string, string)
 142 | 	DeclareNamespace(string, string)
 143 | 	RemoveDefaultNamespace()
 144 | 	DeclaredNamespaces() []NamespaceDeclaration
 145 | }
 146 | 
 147 | //run out of memory
 148 | var ErrTooLarge = errors.New("Output buffer too large")
 149 | 
 150 | //pre-allocate a buffer for serializing the document
 151 | const initialOutputBufferSize = 10 //100K
 152 | 
 153 | /*
 154 | XmlNode implements the Node interface, and as such is the heart of the API.
 155 | */
 156 | type XmlNode struct {
 157 | 	Ptr *C.xmlNode
 158 | 	Document
 159 | 	valid bool
 160 | }
 161 | 
 162 | type WriteBuffer struct {
 163 | 	Node   *XmlNode
 164 | 	Buffer []byte
 165 | 	Offset int
 166 | }
 167 | 
 168 | // NewNode takes a C pointer from the libxml2 library and returns a Node instance of
 169 | // the appropriate type.
 170 | func NewNode(nodePtr unsafe.Pointer, document Document) (node Node) {
 171 | 	if nodePtr == nil {
 172 | 		return nil
 173 | 	}
 174 | 	xmlNode := &XmlNode{
 175 | 		Ptr:      (*C.xmlNode)(nodePtr),
 176 | 		Document: document,
 177 | 		valid:    true,
 178 | 	}
 179 | 	nodeType := NodeType(C.getNodeType((*C.xmlNode)(nodePtr)))
 180 | 
 181 | 	switch nodeType {
 182 | 	default:
 183 | 		node = xmlNode
 184 | 	case XML_ATTRIBUTE_NODE:
 185 | 		node = &AttributeNode{XmlNode: xmlNode}
 186 | 	case XML_ELEMENT_NODE:
 187 | 		node = &ElementNode{XmlNode: xmlNode}
 188 | 	case XML_CDATA_SECTION_NODE:
 189 | 		node = &CDataNode{XmlNode: xmlNode}
 190 | 	case XML_COMMENT_NODE:
 191 | 		node = &CommentNode{XmlNode: xmlNode}
 192 | 	case XML_PI_NODE:
 193 | 		node = &ProcessingInstructionNode{XmlNode: xmlNode}
 194 | 	case XML_TEXT_NODE:
 195 | 		node = &TextNode{XmlNode: xmlNode}
 196 | 	}
 197 | 	return
 198 | }
 199 | 
 200 | func (xmlNode *XmlNode) coerce(data interface{}) (nodes []Node, err error) {
 201 | 	switch t := data.(type) {
 202 | 	default:
 203 | 		err = ERR_UNDEFINED_COERCE_PARAM
 204 | 	case []Node:
 205 | 		nodes = t
 206 | 	case *DocumentFragment:
 207 | 		nodes = t.Children()
 208 | 	case string:
 209 | 		f, err := xmlNode.MyDocument().ParseFragment([]byte(t), nil, DefaultParseOption)
 210 | 		if err == nil {
 211 | 			nodes = f.Children()
 212 | 		}
 213 | 	case []byte:
 214 | 		f, err := xmlNode.MyDocument().ParseFragment(t, nil, DefaultParseOption)
 215 | 		if err == nil {
 216 | 			nodes = f.Children()
 217 | 		}
 218 | 	}
 219 | 	return
 220 | }
 221 | 
 222 | func (xmlNode *XmlNode) Coerce(data interface{}) (nodes []Node, err error) {
 223 | 	return xmlNode.coerce(data)
 224 | }
 225 | 
 226 | // Add a node as a child of the current node.
 227 | // Passing in a nodeset will add all the nodes as children of the current node.
 228 | func (xmlNode *XmlNode) AddChild(data interface{}) (err error) {
 229 | 	switch t := data.(type) {
 230 | 	default:
 231 | 		if nodes, err := xmlNode.coerce(data); err == nil {
 232 | 			for _, node := range nodes {
 233 | 				if err = xmlNode.addChild(node); err != nil {
 234 | 					break
 235 | 				}
 236 | 			}
 237 | 		}
 238 | 	case *DocumentFragment:
 239 | 		if nodes, err := xmlNode.coerce(data); err == nil {
 240 | 			for _, node := range nodes {
 241 | 				if err = xmlNode.addChild(node); err != nil {
 242 | 					break
 243 | 				}
 244 | 			}
 245 | 		}
 246 | 	case Node:
 247 | 		err = xmlNode.addChild(t)
 248 | 	}
 249 | 	return
 250 | }
 251 | 
 252 | // Insert a node immediately before this node in the document.
 253 | // Passing in a nodeset will add all the nodes, in order.
 254 | func (xmlNode *XmlNode) AddPreviousSibling(data interface{}) (err error) {
 255 | 	switch t := data.(type) {
 256 | 	default:
 257 | 		if nodes, err := xmlNode.coerce(data); err == nil {
 258 | 			for _, node := range nodes {
 259 | 				if err = xmlNode.addPreviousSibling(node); err != nil {
 260 | 					break
 261 | 				}
 262 | 			}
 263 | 		}
 264 | 	case *DocumentFragment:
 265 | 		if nodes, err := xmlNode.coerce(data); err == nil {
 266 | 			for _, node := range nodes {
 267 | 				if err = xmlNode.addPreviousSibling(node); err != nil {
 268 | 					break
 269 | 				}
 270 | 			}
 271 | 		}
 272 | 	case Node:
 273 | 		err = xmlNode.addPreviousSibling(t)
 274 | 	}
 275 | 	return
 276 | }
 277 | 
 278 | // Insert a node immediately after this node in the document.
 279 | // Passing in a nodeset will add all the nodes, in order.
 280 | func (xmlNode *XmlNode) AddNextSibling(data interface{}) (err error) {
 281 | 	switch t := data.(type) {
 282 | 	default:
 283 | 		if nodes, err := xmlNode.coerce(data); err == nil {
 284 | 			for i := len(nodes) - 1; i >= 0; i-- {
 285 | 				node := nodes[i]
 286 | 				if err = xmlNode.addNextSibling(node); err != nil {
 287 | 					break
 288 | 				}
 289 | 			}
 290 | 		}
 291 | 	case *DocumentFragment:
 292 | 		if nodes, err := xmlNode.coerce(data); err == nil {
 293 | 			for i := len(nodes) - 1; i >= 0; i-- {
 294 | 				node := nodes[i]
 295 | 				if err = xmlNode.addNextSibling(node); err != nil {
 296 | 					break
 297 | 				}
 298 | 			}
 299 | 		}
 300 | 	case Node:
 301 | 		err = xmlNode.addNextSibling(t)
 302 | 	}
 303 | 	return
 304 | }
 305 | 
 306 | func (xmlNode *XmlNode) ResetNodePtr() {
 307 | 	xmlNode.Ptr = nil
 308 | 	return
 309 | }
 310 | 
 311 | // Returns true if the node is valid. Nodes become
 312 | // invalid when Remove() is called.
 313 | func (xmlNode *XmlNode) IsValid() bool {
 314 | 	return xmlNode.valid
 315 | }
 316 | 
 317 | // Return the document containing this node. Removed or unlinked
 318 | // nodes still have a document associated with them.
 319 | func (xmlNode *XmlNode) MyDocument() (document Document) {
 320 | 	document = xmlNode.Document.DocRef()
 321 | 	return
 322 | }
 323 | 
 324 | // NodePtr returns a pointer to the underlying C struct.
 325 | func (xmlNode *XmlNode) NodePtr() (p unsafe.Pointer) {
 326 | 	p = unsafe.Pointer(xmlNode.Ptr)
 327 | 	return
 328 | }
 329 | 
 330 | func (xmlNode *XmlNode) NodeType() (nodeType NodeType) {
 331 | 	nodeType = NodeType(C.getNodeType(xmlNode.Ptr))
 332 | 	return
 333 | }
 334 | 
 335 | // Path returns an XPath expression that can be used to
 336 | // select this node in the document.
 337 | func (xmlNode *XmlNode) Path() (path string) {
 338 | 	pathPtr := C.xmlGetNodePath(xmlNode.Ptr)
 339 | 	if pathPtr != nil {
 340 | 		p := (*C.char)(unsafe.Pointer(pathPtr))
 341 | 		defer C.xmlFreeChars(p)
 342 | 		path = C.GoString(p)
 343 | 	}
 344 | 	return
 345 | }
 346 | 
 347 | // NextSibling returns the next sibling (if any) of the current node.
 348 | // It is often used when iterating over the children of a node.
 349 | func (xmlNode *XmlNode) NextSibling() Node {
 350 | 	siblingPtr := (*C.xmlNode)(xmlNode.Ptr.next)
 351 | 	return NewNode(unsafe.Pointer(siblingPtr), xmlNode.Document)
 352 | }
 353 | 
 354 | // PreviousSibling returns the previous sibling (if any) of the current node.
 355 | // It is often used when iterating over the children of a node in reverse.
 356 | func (xmlNode *XmlNode) PreviousSibling() Node {
 357 | 	siblingPtr := (*C.xmlNode)(xmlNode.Ptr.prev)
 358 | 	return NewNode(unsafe.Pointer(siblingPtr), xmlNode.Document)
 359 | }
 360 | 
 361 | // CountChildren returns the number of child nodes.
 362 | func (xmlNode *XmlNode) CountChildren() int {
 363 | 	return int(C.xmlLsCountNode(xmlNode.Ptr))
 364 | }
 365 | 
 366 | func (xmlNode *XmlNode) FirstChild() Node {
 367 | 	return NewNode(unsafe.Pointer(xmlNode.Ptr.children), xmlNode.Document)
 368 | }
 369 | 
 370 | func (xmlNode *XmlNode) LastChild() Node {
 371 | 	return NewNode(unsafe.Pointer(xmlNode.Ptr.last), xmlNode.Document)
 372 | }
 373 | 
 374 | /*
 375 | Parent returns the parent of the current node (or nil if there isn't one).
 376 | This will always be an element or document node, as those are the only node types
 377 | that can have children.
 378 | */
 379 | func (xmlNode *XmlNode) Parent() Node {
 380 | 	if C.xmlNodePtrCheck(unsafe.Pointer(xmlNode.Ptr.parent)) == C.int(0) {
 381 | 		return nil
 382 | 	}
 383 | 	return NewNode(unsafe.Pointer(xmlNode.Ptr.parent), xmlNode.Document)
 384 | }
 385 | 
 386 | func (xmlNode *XmlNode) ResetChildren() {
 387 | 	var p unsafe.Pointer
 388 | 	for childPtr := xmlNode.Ptr.children; childPtr != nil; {
 389 | 		nextPtr := childPtr.next
 390 | 		p = unsafe.Pointer(childPtr)
 391 | 		C.xmlUnlinkNodeWithCheck((*C.xmlNode)(p))
 392 | 		xmlNode.Document.AddUnlinkedNode(p)
 393 | 		childPtr = nextPtr
 394 | 	}
 395 | }
 396 | 
 397 | func (xmlNode *XmlNode) SetContent(content interface{}) (err error) {
 398 | 	switch data := content.(type) {
 399 | 	default:
 400 | 		err = ERR_UNDEFINED_SET_CONTENT_PARAM
 401 | 	case string:
 402 | 		err = xmlNode.SetContent([]byte(data))
 403 | 	case []byte:
 404 | 		contentBytes := GetCString(data)
 405 | 		contentPtr := unsafe.Pointer(&contentBytes[0])
 406 | 		C.xmlSetContent(unsafe.Pointer(xmlNode), unsafe.Pointer(xmlNode.Ptr), contentPtr)
 407 | 	}
 408 | 	return
 409 | }
 410 | 
 411 | func (xmlNode *XmlNode) InsertBefore(data interface{}) (err error) {
 412 | 	err = xmlNode.AddPreviousSibling(data)
 413 | 	return
 414 | }
 415 | 
 416 | func (xmlNode *XmlNode) InsertAfter(data interface{}) (err error) {
 417 | 	err = xmlNode.AddNextSibling(data)
 418 | 	return
 419 | }
 420 | 
 421 | func (xmlNode *XmlNode) InsertBegin(data interface{}) (err error) {
 422 | 	if parent := xmlNode.Parent(); parent != nil {
 423 | 		if last := parent.LastChild(); last != nil {
 424 | 			err = last.AddPreviousSibling(data)
 425 | 		}
 426 | 	}
 427 | 	return
 428 | }
 429 | 
 430 | func (xmlNode *XmlNode) InsertEnd(data interface{}) (err error) {
 431 | 	if parent := xmlNode.Parent(); parent != nil {
 432 | 		if first := parent.FirstChild(); first != nil {
 433 | 			err = first.AddPreviousSibling(data)
 434 | 		}
 435 | 	}
 436 | 	return
 437 | }
 438 | 
 439 | func (xmlNode *XmlNode) SetChildren(data interface{}) (err error) {
 440 | 	nodes, err := xmlNode.coerce(data)
 441 | 	if err != nil {
 442 | 		return
 443 | 	}
 444 | 	xmlNode.ResetChildren()
 445 | 	err = xmlNode.AddChild(nodes)
 446 | 	return nil
 447 | }
 448 | 
 449 | func (xmlNode *XmlNode) SetInnerHtml(data interface{}) (err error) {
 450 | 	err = xmlNode.SetChildren(data)
 451 | 	return
 452 | }
 453 | 
 454 | func (xmlNode *XmlNode) Replace(data interface{}) (err error) {
 455 | 	err = xmlNode.AddPreviousSibling(data)
 456 | 	if err != nil {
 457 | 		return
 458 | 	}
 459 | 	xmlNode.Remove()
 460 | 	return
 461 | }
 462 | 
 463 | func (xmlNode *XmlNode) Attributes() (attributes map[string]*AttributeNode) {
 464 | 	attributes = make(map[string]*AttributeNode)
 465 | 	for prop := xmlNode.Ptr.properties; prop != nil; prop = prop.next {
 466 | 		if prop.name != nil {
 467 | 			namePtr := unsafe.Pointer(prop.name)
 468 | 			name := C.GoString((*C.char)(namePtr))
 469 | 			attrPtr := unsafe.Pointer(prop)
 470 | 			attributeNode := NewNode(attrPtr, xmlNode.Document)
 471 | 			if attr, ok := attributeNode.(*AttributeNode); ok {
 472 | 				attributes[name] = attr
 473 | 			}
 474 | 		}
 475 | 	}
 476 | 	return
 477 | }
 478 | 
 479 | // Return the attribute node, or nil if the attribute does not exist.
 480 | func (xmlNode *XmlNode) Attribute(name string) (attribute *AttributeNode) {
 481 | 	if xmlNode.NodeType() != XML_ELEMENT_NODE {
 482 | 		return
 483 | 	}
 484 | 	nameBytes := GetCString([]byte(name))
 485 | 	namePtr := unsafe.Pointer(&nameBytes[0])
 486 | 	attrPtr := C.xmlHasNsProp(xmlNode.Ptr, (*C.xmlChar)(namePtr), nil)
 487 | 	if attrPtr == nil {
 488 | 		return
 489 | 	} else {
 490 | 		node := NewNode(unsafe.Pointer(attrPtr), xmlNode.Document)
 491 | 		if node, ok := node.(*AttributeNode); ok {
 492 | 			attribute = node
 493 | 		}
 494 | 	}
 495 | 	return
 496 | }
 497 | 
 498 | // Attr returns the value of an attribute.
 499 | 
 500 | // If you need to check for the existence of an attribute,
 501 | // use Attribute.
 502 | func (xmlNode *XmlNode) Attr(name string) (val string) {
 503 | 	if xmlNode.NodeType() != XML_ELEMENT_NODE {
 504 | 		return
 505 | 	}
 506 | 	nameBytes := GetCString([]byte(name))
 507 | 	namePtr := unsafe.Pointer(&nameBytes[0])
 508 | 	valPtr := C.xmlGetProp(xmlNode.Ptr, (*C.xmlChar)(namePtr))
 509 | 	if valPtr == nil {
 510 | 		return
 511 | 	}
 512 | 	p := unsafe.Pointer(valPtr)
 513 | 	defer C.xmlFreeChars((*C.char)(p))
 514 | 	val = C.GoString((*C.char)(p))
 515 | 	return
 516 | }
 517 | 
 518 | // SetAttr sets the value of an attribute. If the attribute is in a namespace,
 519 | // use SetNsAttr instead.
 520 | 
 521 | // While this call accepts QNames for the name parameter, it does not check
 522 | // their validity.
 523 | 
 524 | // Attributes such as "xml:lang" or "xml:space" are not is a formal namespace
 525 | // and should be set by calling SetAttr with the prefix as part of the name.
 526 | func (xmlNode *XmlNode) SetAttr(name, value string) (val string) {
 527 | 	val = value
 528 | 	if xmlNode.NodeType() != XML_ELEMENT_NODE {
 529 | 		return
 530 | 	}
 531 | 	nameBytes := GetCString([]byte(name))
 532 | 	namePtr := unsafe.Pointer(&nameBytes[0])
 533 | 
 534 | 	valueBytes := GetCString([]byte(value))
 535 | 	valuePtr := unsafe.Pointer(&valueBytes[0])
 536 | 
 537 | 	C.xmlSetProp(xmlNode.Ptr, (*C.xmlChar)(namePtr), (*C.xmlChar)(valuePtr))
 538 | 	return
 539 | }
 540 | 
 541 | // SetNsAttr sets the value of a namespaced attribute.
 542 | 
 543 | // Attributes such as "xml:lang" or "xml:space" are not is a formal namespace
 544 | // and should be set by calling SetAttr with the xml prefix as part of the name.
 545 | 
 546 | // The namespace should already be declared and in-scope when SetNsAttr is called.
 547 | // This restriction will be lifted in a future version.
 548 | func (xmlNode *XmlNode) SetNsAttr(href, name, value string) (val string) {
 549 | 	val = value
 550 | 	if xmlNode.NodeType() != XML_ELEMENT_NODE {
 551 | 		return
 552 | 	}
 553 | 	nameBytes := GetCString([]byte(name))
 554 | 	namePtr := unsafe.Pointer(&nameBytes[0])
 555 | 
 556 | 	valueBytes := GetCString([]byte(value))
 557 | 	valuePtr := unsafe.Pointer(&valueBytes[0])
 558 | 
 559 | 	hrefBytes := GetCString([]byte(href))
 560 | 	hrefPtr := unsafe.Pointer(&hrefBytes[0])
 561 | 
 562 | 	ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr))
 563 | 	if ns == nil {
 564 | 		return
 565 | 	}
 566 | 
 567 | 	C.xmlSetNsProp(xmlNode.Ptr, ns, (*C.xmlChar)(namePtr), (*C.xmlChar)(valuePtr))
 568 | 	return
 569 | }
 570 | 
 571 | // Search for nodes that match an XPath. This is the simplest way to look for nodes.
 572 | func (xmlNode *XmlNode) Search(data interface{}) (result []Node, err error) {
 573 | 	switch data := data.(type) {
 574 | 	default:
 575 | 		err = ERR_UNDEFINED_SEARCH_PARAM
 576 | 	case string:
 577 | 		if xpathExpr := xpath.Compile(data); xpathExpr != nil {
 578 | 			defer xpathExpr.Free()
 579 | 			result, err = xmlNode.Search(xpathExpr)
 580 | 		} else {
 581 | 			err = errors.New("cannot compile xpath: " + data)
 582 | 		}
 583 | 	case []byte:
 584 | 		result, err = xmlNode.Search(string(data))
 585 | 	case *xpath.Expression:
 586 | 		xpathCtx := xmlNode.Document.DocXPathCtx()
 587 | 		nodePtrs, err := xpathCtx.EvaluateAsNodeset(unsafe.Pointer(xmlNode.Ptr), data)
 588 | 		if nodePtrs == nil || err != nil {
 589 | 			return nil, err
 590 | 		}
 591 | 		for _, nodePtr := range nodePtrs {
 592 | 			result = append(result, NewNode(nodePtr, xmlNode.Document))
 593 | 		}
 594 | 	}
 595 | 	return
 596 | }
 597 | 
 598 | // As the Search function, but passing a VariableScope that can be used to reolve variable
 599 | // names or registered function references in the XPath being evaluated.
 600 | func (xmlNode *XmlNode) SearchWithVariables(data interface{}, v xpath.VariableScope) (result []Node, err error) {
 601 | 	switch data := data.(type) {
 602 | 	default:
 603 | 		err = ERR_UNDEFINED_SEARCH_PARAM
 604 | 	case string:
 605 | 		if xpathExpr := xpath.Compile(data); xpathExpr != nil {
 606 | 			defer xpathExpr.Free()
 607 | 			result, err = xmlNode.SearchWithVariables(xpathExpr, v)
 608 | 		} else {
 609 | 			err = errors.New("cannot compile xpath: " + data)
 610 | 		}
 611 | 	case []byte:
 612 | 		result, err = xmlNode.SearchWithVariables(string(data), v)
 613 | 	case *xpath.Expression:
 614 | 		xpathCtx := xmlNode.Document.DocXPathCtx()
 615 | 		xpathCtx.SetResolver(v)
 616 | 		nodePtrs, err := xpathCtx.EvaluateAsNodeset(unsafe.Pointer(xmlNode.Ptr), data)
 617 | 		if nodePtrs == nil || err != nil {
 618 | 			return nil, err
 619 | 		}
 620 | 		for _, nodePtr := range nodePtrs {
 621 | 			result = append(result, NewNode(nodePtr, xmlNode.Document))
 622 | 		}
 623 | 	}
 624 | 	return
 625 | }
 626 | 
 627 | // Evaluate an XPath and return a result of the appropriate type.
 628 | // If a non-nil VariableScope is provided, any variables or functions present
 629 | // in the xpath will be resolved.
 630 | 
 631 | // If the result is a nodeset (or the empty nodeset), a nodeset will be returned.
 632 | 
 633 | // If the result is a number, a float64 will be returned.
 634 | 
 635 | // If the result is a boolean, a bool will be returned.
 636 | 
 637 | // In any other cases, the result will be coerced to a string.
 638 | func (xmlNode *XmlNode) EvalXPath(data interface{}, v xpath.VariableScope) (result interface{}, err error) {
 639 | 	switch data := data.(type) {
 640 | 	case string:
 641 | 		if xpathExpr := xpath.Compile(data); xpathExpr != nil {
 642 | 			defer xpathExpr.Free()
 643 | 			result, err = xmlNode.EvalXPath(xpathExpr, v)
 644 | 		} else {
 645 | 			err = errors.New("cannot compile xpath: " + data)
 646 | 		}
 647 | 	case []byte:
 648 | 		result, err = xmlNode.EvalXPath(string(data), v)
 649 | 	case *xpath.Expression:
 650 | 		xpathCtx := xmlNode.Document.DocXPathCtx()
 651 | 		xpathCtx.SetResolver(v)
 652 | 		err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data)
 653 | 		if err != nil {
 654 | 			return nil, err
 655 | 		}
 656 | 		rt := xpathCtx.ReturnType()
 657 | 		switch rt {
 658 | 		case xpath.XPATH_NODESET, xpath.XPATH_XSLT_TREE:
 659 | 			nodePtrs, err := xpathCtx.ResultAsNodeset()
 660 | 			if err != nil {
 661 | 				return nil, err
 662 | 			}
 663 | 			var output []Node
 664 | 			for _, nodePtr := range nodePtrs {
 665 | 				output = append(output, NewNode(nodePtr, xmlNode.Document))
 666 | 			}
 667 | 			result = output
 668 | 		case xpath.XPATH_NUMBER:
 669 | 			result, _ = xpathCtx.ResultAsNumber()
 670 | 		case xpath.XPATH_BOOLEAN:
 671 | 			result, _ = xpathCtx.ResultAsBoolean()
 672 | 		default:
 673 | 			result, _ = xpathCtx.ResultAsString()
 674 | 		}
 675 | 	default:
 676 | 		err = ERR_UNDEFINED_SEARCH_PARAM
 677 | 	}
 678 | 	return
 679 | }
 680 | 
 681 | // Evaluate an XPath and coerce the result to a boolean according to the
 682 | // XPath rules. In the presence of an error, this function will return false
 683 | // even if the expression cannot actually be evaluated.
 684 | 
 685 | // In most cases you are better advised to call EvalXPath; this function is
 686 | // intended for packages that implement XML standards and that are fully aware
 687 | // of the consequences of suppressing a compilation error.
 688 | 
 689 | // If a non-nil VariableScope is provided, any variables or registered functions present
 690 | // in the xpath will be resolved.
 691 | func (xmlNode *XmlNode) EvalXPathAsBoolean(data interface{}, v xpath.VariableScope) (result bool) {
 692 | 	switch data := data.(type) {
 693 | 	case string:
 694 | 		if xpathExpr := xpath.Compile(data); xpathExpr != nil {
 695 | 			defer xpathExpr.Free()
 696 | 			result = xmlNode.EvalXPathAsBoolean(xpathExpr, v)
 697 | 		} else {
 698 | 			//err = errors.New("cannot compile xpath: " + data)
 699 | 		}
 700 | 	case []byte:
 701 | 		result = xmlNode.EvalXPathAsBoolean(string(data), v)
 702 | 	case *xpath.Expression:
 703 | 		xpathCtx := xmlNode.Document.DocXPathCtx()
 704 | 		xpathCtx.SetResolver(v)
 705 | 		err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data)
 706 | 		if err != nil {
 707 | 			return false
 708 | 		}
 709 | 		result, _ = xpathCtx.ResultAsBoolean()
 710 | 	default:
 711 | 		//err = ERR_UNDEFINED_SEARCH_PARAM
 712 | 	}
 713 | 	return
 714 | }
 715 | 
 716 | // The local name of the node. Use Namespace() to get the namespace.
 717 | func (xmlNode *XmlNode) Name() (name string) {
 718 | 	if xmlNode.Ptr.name != nil {
 719 | 		p := unsafe.Pointer(xmlNode.Ptr.name)
 720 | 		name = C.GoString((*C.char)(p))
 721 | 	}
 722 | 	return
 723 | }
 724 | 
 725 | // The namespace of the node. This is the empty string if there
 726 | // no associated namespace.
 727 | func (xmlNode *XmlNode) Namespace() (href string) {
 728 | 	if xmlNode.Ptr.ns != nil {
 729 | 		p := unsafe.Pointer(xmlNode.Ptr.ns.href)
 730 | 		href = C.GoString((*C.char)(p))
 731 | 	}
 732 | 	return
 733 | }
 734 | 
 735 | // Set the local name of the node. The namespace is set via SetNamespace().
 736 | func (xmlNode *XmlNode) SetName(name string) {
 737 | 	if len(name) > 0 {
 738 | 		nameBytes := GetCString([]byte(name))
 739 | 		namePtr := unsafe.Pointer(&nameBytes[0])
 740 | 		C.xmlNodeSetName(xmlNode.Ptr, (*C.xmlChar)(namePtr))
 741 | 	}
 742 | }
 743 | 
 744 | func (xmlNode *XmlNode) Duplicate(level int) Node {
 745 | 	return xmlNode.DuplicateTo(xmlNode.Document, level)
 746 | }
 747 | 
 748 | func (xmlNode *XmlNode) DuplicateTo(doc Document, level int) (dup Node) {
 749 | 	if xmlNode.valid {
 750 | 		dupPtr := C.xmlDocCopyNode(xmlNode.Ptr, (*C.xmlDoc)(doc.DocPtr()), C.int(level))
 751 | 		if dupPtr != nil {
 752 | 			dup = NewNode(unsafe.Pointer(dupPtr), xmlNode.Document)
 753 | 		}
 754 | 	}
 755 | 	return
 756 | }
 757 | 
 758 | func (xmlNode *XmlNode) serialize(format SerializationOption, encoding, outputBuffer []byte) ([]byte, int) {
 759 | 	nodePtr := unsafe.Pointer(xmlNode.Ptr)
 760 | 	var encodingPtr unsafe.Pointer
 761 | 	if len(encoding) == 0 {
 762 | 		encoding = xmlNode.Document.OutputEncoding()
 763 | 	}
 764 | 	if len(encoding) > 0 {
 765 | 		encodingPtr = unsafe.Pointer(&(encoding[0]))
 766 | 	} else {
 767 | 		encodingPtr = nil
 768 | 	}
 769 | 
 770 | 	wbuffer := &WriteBuffer{Node: xmlNode, Buffer: outputBuffer}
 771 | 	wbufferPtr := unsafe.Pointer(wbuffer)
 772 | 
 773 | 	ret := int(C.xmlSaveNode(wbufferPtr, nodePtr, encodingPtr, C.int(format)))
 774 | 	if ret < 0 {
 775 | 		panic("output error in xml node serialization: " + strconv.Itoa(ret))
 776 | 		return nil, 0
 777 | 	}
 778 | 
 779 | 	return wbuffer.Buffer, wbuffer.Offset
 780 | }
 781 | 
 782 | // SerializeWithFormat allows you to control the serialization flags passed to libxml.
 783 | // In most cases ToXml() and ToHtml() provide sensible defaults and should be preferred.
 784 | 
 785 | // The format parameter should be a set of SerializationOption constants or'd together.
 786 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8.
 787 | // If outputBuffer is nil, one will be created for you.
 788 | func (xmlNode *XmlNode) SerializeWithFormat(format SerializationOption, encoding, outputBuffer []byte) ([]byte, int) {
 789 | 	return xmlNode.serialize(format, encoding, outputBuffer)
 790 | }
 791 | 
 792 | // ToXml generates an indented XML document with an XML declaration.
 793 | // It is not guaranteed to be well formed unless xmlNode is an element node,
 794 | // or a document node with only one element child.
 795 | 
 796 | // If you need finer control over the formatting, call SerializeWithFormat.
 797 | 
 798 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8.
 799 | // If outputBuffer is nil, one will be created for you.
 800 | func (xmlNode *XmlNode) ToXml(encoding, outputBuffer []byte) ([]byte, int) {
 801 | 	return xmlNode.serialize(XML_SAVE_AS_XML|XML_SAVE_FORMAT, encoding, outputBuffer)
 802 | }
 803 | 
 804 | // ToUnformattedXml generates an unformatted XML document without an XML declaration.
 805 | // This is useful for conforming to various standards and for unit testing, although
 806 | // the output is not guaranteed to be well formed unless xmlNode is an element node.
 807 | func (xmlNode *XmlNode) ToUnformattedXml() string {
 808 | 	var b []byte
 809 | 	var size int
 810 | 	b, size = xmlNode.serialize(XML_SAVE_AS_XML|XML_SAVE_NO_DECL, nil, nil)
 811 | 	if b == nil {
 812 | 		return ""
 813 | 	}
 814 | 	return string(b[:size])
 815 | }
 816 | 
 817 | // ToHtml generates an indented XML document that conforms to HTML 4.0 rules; meaning
 818 | // that some elements may be unclosed or forced to use end tags even when empty.
 819 | 
 820 | // If you want to output XHTML, call SerializeWithFormat and enable the XML_SAVE_XHTML
 821 | // flag as part of the format.
 822 | 
 823 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8.
 824 | // If outputBuffer is nil, one will be created for you.
 825 | func (xmlNode *XmlNode) ToHtml(encoding, outputBuffer []byte) ([]byte, int) {
 826 | 	return xmlNode.serialize(XML_SAVE_AS_HTML|XML_SAVE_FORMAT, encoding, outputBuffer)
 827 | }
 828 | 
 829 | func (xmlNode *XmlNode) ToBuffer(outputBuffer []byte) []byte {
 830 | 	var b []byte
 831 | 	var size int
 832 | 	if docType := xmlNode.Document.DocType(); docType == XML_HTML_DOCUMENT_NODE {
 833 | 		b, size = xmlNode.ToHtml(nil, outputBuffer)
 834 | 	} else {
 835 | 		b, size = xmlNode.ToXml(nil, outputBuffer)
 836 | 	}
 837 | 	return b[:size]
 838 | }
 839 | 
 840 | func (xmlNode *XmlNode) String() string {
 841 | 	b := xmlNode.ToBuffer(nil)
 842 | 	if b == nil {
 843 | 		return ""
 844 | 	}
 845 | 	return string(b)
 846 | }
 847 | 
 848 | func (xmlNode *XmlNode) Content() string {
 849 | 	contentPtr := C.xmlNodeGetContent(xmlNode.Ptr)
 850 | 	charPtr := (*C.char)(unsafe.Pointer(contentPtr))
 851 | 	defer C.xmlFreeChars(charPtr)
 852 | 	return C.GoString(charPtr)
 853 | }
 854 | 
 855 | func (xmlNode *XmlNode) InnerHtml() string {
 856 | 	out := ""
 857 | 
 858 | 	for child := xmlNode.FirstChild(); child != nil; child = child.NextSibling() {
 859 | 		out += child.String()
 860 | 	}
 861 | 	return out
 862 | }
 863 | 
 864 | func (xmlNode *XmlNode) Unlink() {
 865 | 	if int(C.xmlUnlinkNodeWithCheck(xmlNode.Ptr)) != 0 {
 866 | 		xmlNode.Document.AddUnlinkedNode(unsafe.Pointer(xmlNode.Ptr))
 867 | 	}
 868 | }
 869 | 
 870 | func (xmlNode *XmlNode) Remove() {
 871 | 	if xmlNode.valid && unsafe.Pointer(xmlNode.Ptr) != xmlNode.Document.DocPtr() {
 872 | 		xmlNode.Unlink()
 873 | 		xmlNode.valid = false
 874 | 	}
 875 | }
 876 | 
 877 | func (xmlNode *XmlNode) addChild(node Node) (err error) {
 878 | 	nodeType := node.NodeType()
 879 | 	if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE {
 880 | 		err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD
 881 | 		return
 882 | 	}
 883 | 	nodePtr := node.NodePtr()
 884 | 	if xmlNode.NodePtr() == nodePtr {
 885 | 		return
 886 | 	}
 887 | 	ret := xmlNode.isAccestor(nodePtr)
 888 | 	if ret < 0 {
 889 | 		return
 890 | 	} else if ret == 0 {
 891 | 		if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) {
 892 | 			C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr))
 893 | 		}
 894 | 		C.xmlAddChild(xmlNode.Ptr, (*C.xmlNode)(nodePtr))
 895 | 	} else if ret > 0 {
 896 | 		node.Remove()
 897 | 	}
 898 | 
 899 | 	return
 900 | }
 901 | 
 902 | func (xmlNode *XmlNode) addPreviousSibling(node Node) (err error) {
 903 | 	nodeType := node.NodeType()
 904 | 	if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE {
 905 | 		err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD
 906 | 		return
 907 | 	}
 908 | 	nodePtr := node.NodePtr()
 909 | 	if xmlNode.NodePtr() == nodePtr {
 910 | 		return
 911 | 	}
 912 | 	ret := xmlNode.isAccestor(nodePtr)
 913 | 	if ret < 0 {
 914 | 		return
 915 | 	} else if ret == 0 {
 916 | 		if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) {
 917 | 			C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr))
 918 | 		}
 919 | 		C.xmlAddPrevSibling(xmlNode.Ptr, (*C.xmlNode)(nodePtr))
 920 | 	} else if ret > 0 {
 921 | 		node.Remove()
 922 | 	}
 923 | 	return
 924 | }
 925 | 
 926 | func (xmlNode *XmlNode) addNextSibling(node Node) (err error) {
 927 | 	nodeType := node.NodeType()
 928 | 	if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE {
 929 | 		err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD
 930 | 		return
 931 | 	}
 932 | 	nodePtr := node.NodePtr()
 933 | 	if xmlNode.NodePtr() == nodePtr {
 934 | 		return
 935 | 	}
 936 | 	ret := xmlNode.isAccestor(nodePtr)
 937 | 	if ret < 0 {
 938 | 		return
 939 | 	} else if ret == 0 {
 940 | 		if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) {
 941 | 			C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr))
 942 | 		}
 943 | 		C.xmlAddNextSibling(xmlNode.Ptr, (*C.xmlNode)(nodePtr))
 944 | 	} else if ret > 0 {
 945 | 		node.Remove()
 946 | 	}
 947 | 	return
 948 | }
 949 | 
 950 | func (xmlNode *XmlNode) Wrap(data string) (err error) {
 951 | 	newNodes, err := xmlNode.coerce(data)
 952 | 	if err == nil && len(newNodes) > 0 {
 953 | 		newParent := newNodes[0]
 954 | 		xmlNode.addNextSibling(newParent)
 955 | 		newParent.AddChild(xmlNode)
 956 | 	}
 957 | 	return
 958 | }
 959 | 
 960 | func (xmlNode *XmlNode) ParseFragment(input, url []byte, options ParseOption) (fragment *DocumentFragment, err error) {
 961 | 	fragment, err = parsefragment(xmlNode.Document, xmlNode, input, url, options)
 962 | 	return
 963 | }
 964 | 
 965 | //export xmlNodeWriteCallback
 966 | func xmlNodeWriteCallback(wbufferObj unsafe.Pointer, data unsafe.Pointer, data_len C.int) {
 967 | 	wbuffer := (*WriteBuffer)(wbufferObj)
 968 | 	offset := wbuffer.Offset
 969 | 
 970 | 	if offset > len(wbuffer.Buffer) {
 971 | 		panic("fatal error in xmlNodeWriteCallback")
 972 | 	}
 973 | 
 974 | 	buffer := wbuffer.Buffer[:offset]
 975 | 	dataLen := int(data_len)
 976 | 
 977 | 	if dataLen > 0 {
 978 | 		if len(buffer)+dataLen > cap(buffer) {
 979 | 			newBuffer := grow(buffer, dataLen)
 980 | 			wbuffer.Buffer = newBuffer
 981 | 		}
 982 | 		destBufPtr := unsafe.Pointer(&(wbuffer.Buffer[offset]))
 983 | 		C.memcpy(destBufPtr, data, C.size_t(dataLen))
 984 | 		wbuffer.Offset += dataLen
 985 | 	}
 986 | }
 987 | 
 988 | //export xmlUnlinkNodeCallback
 989 | func xmlUnlinkNodeCallback(nodePtr unsafe.Pointer, gonodePtr unsafe.Pointer) {
 990 | 	xmlNode := (*XmlNode)(gonodePtr)
 991 | 	xmlNode.Document.AddUnlinkedNode(nodePtr)
 992 | }
 993 | 
 994 | func grow(buffer []byte, n int) (newBuffer []byte) {
 995 | 	newBuffer = makeSlice(2*cap(buffer) + n)
 996 | 	copy(newBuffer, buffer)
 997 | 	return
 998 | }
 999 | 
1000 | func makeSlice(n int) []byte {
1001 | 	// If the make fails, give a known error.
1002 | 	defer func() {
1003 | 		if recover() != nil {
1004 | 			panic(ErrTooLarge)
1005 | 		}
1006 | 	}()
1007 | 	return make([]byte, n)
1008 | }
1009 | 
1010 | func (xmlNode *XmlNode) isAccestor(nodePtr unsafe.Pointer) int {
1011 | 	parentPtr := xmlNode.Ptr.parent
1012 | 
1013 | 	if C.xmlNodePtrCheck(unsafe.Pointer(parentPtr)) == C.int(0) {
1014 | 		return -1
1015 | 	}
1016 | 	for ; parentPtr != nil; parentPtr = parentPtr.parent {
1017 | 		if C.xmlNodePtrCheck(unsafe.Pointer(parentPtr)) == C.int(0) {
1018 | 			return -1
1019 | 		}
1020 | 		p := unsafe.Pointer(parentPtr)
1021 | 		if p == nodePtr {
1022 | 			return 1
1023 | 		}
1024 | 	}
1025 | 	return 0
1026 | }
1027 | 
1028 | func (xmlNode *XmlNode) RecursivelyRemoveNamespaces() (err error) {
1029 | 	nodePtr := xmlNode.Ptr
1030 | 	C.xmlSetNs(nodePtr, nil)
1031 | 
1032 | 	for child := xmlNode.FirstChild(); child != nil; {
1033 | 		child.RecursivelyRemoveNamespaces()
1034 | 		child = child.NextSibling()
1035 | 	}
1036 | 
1037 | 	nodeType := xmlNode.NodeType()
1038 | 
1039 | 	if ((nodeType == XML_ELEMENT_NODE) ||
1040 | 		(nodeType == XML_XINCLUDE_START) ||
1041 | 		(nodeType == XML_XINCLUDE_END)) &&
1042 | 		(nodePtr.nsDef != nil) {
1043 | 		C.xmlFreeNsList((*C.xmlNs)(nodePtr.nsDef))
1044 | 		nodePtr.nsDef = nil
1045 | 	}
1046 | 
1047 | 	if nodeType == XML_ELEMENT_NODE && nodePtr.properties != nil {
1048 | 		property := nodePtr.properties
1049 | 		for property != nil {
1050 | 			if property.ns != nil {
1051 | 				property.ns = nil
1052 | 			}
1053 | 			property = property.next
1054 | 		}
1055 | 	}
1056 | 	return
1057 | }
1058 | 
1059 | func (xmlNode *XmlNode) RemoveDefaultNamespace() {
1060 | 	nodePtr := xmlNode.Ptr
1061 | 	C.xmlRemoveDefaultNamespace(nodePtr)
1062 | }
1063 | 
1064 | // Returns a list of all the namespace declarations that exist on this node.
1065 | 
1066 | // You can add a namespace declaration by calling DeclareNamespace.
1067 | // Calling SetNamespace will automatically add a declaration if required.
1068 | 
1069 | // Calling SetNsAttr does *not* automatically create a declaration. This will
1070 | // fixed in a future version.
1071 | func (xmlNode *XmlNode) DeclaredNamespaces() (result []NamespaceDeclaration) {
1072 | 	nodePtr := xmlNode.Ptr
1073 | 	for ns := nodePtr.nsDef; ns != nil; ns = (*C.xmlNs)(ns.next) {
1074 | 		prefixPtr := unsafe.Pointer(ns.prefix)
1075 | 		prefix := C.GoString((*C.char)(prefixPtr))
1076 | 		hrefPtr := unsafe.Pointer(ns.href)
1077 | 		uri := C.GoString((*C.char)(hrefPtr))
1078 | 		decl := NamespaceDeclaration{prefix, uri}
1079 | 		result = append(result, decl)
1080 | 	}
1081 | 	return
1082 | }
1083 | 
1084 | // Add a namespace declaration to an element.
1085 | 
1086 | // This is typically done on the root element or node high up in the tree
1087 | // to avoid duplication. The declaration is not created if the namespace
1088 | // is already declared in this scope with the same prefix.
1089 | func (xmlNode *XmlNode) DeclareNamespace(prefix, href string) {
1090 | 	//can only declare namespaces on elements
1091 | 	if xmlNode.NodeType() != XML_ELEMENT_NODE {
1092 | 		return
1093 | 	}
1094 | 	hrefBytes := GetCString([]byte(href))
1095 | 	hrefPtr := unsafe.Pointer(&hrefBytes[0])
1096 | 
1097 | 	//if the namespace is already declared using this prefix, just return
1098 | 	_ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr))
1099 | 	if _ns != nil {
1100 | 		_prefixPtr := unsafe.Pointer(_ns.prefix)
1101 | 		_prefix := C.GoString((*C.char)(_prefixPtr))
1102 | 		if prefix == _prefix {
1103 | 			return
1104 | 		}
1105 | 	}
1106 | 
1107 | 	prefixBytes := GetCString([]byte(prefix))
1108 | 	prefixPtr := unsafe.Pointer(&prefixBytes[0])
1109 | 	if prefix == "" {
1110 | 		prefixPtr = nil
1111 | 	}
1112 | 
1113 | 	//this adds the namespace declaration to the node
1114 | 	_ = C.xmlNewNs(xmlNode.Ptr, (*C.xmlChar)(hrefPtr), (*C.xmlChar)(prefixPtr))
1115 | }
1116 | 
1117 | // Set the namespace of an element.
1118 | func (xmlNode *XmlNode) SetNamespace(prefix, href string) {
1119 | 	if xmlNode.NodeType() != XML_ELEMENT_NODE {
1120 | 		return
1121 | 	}
1122 | 
1123 | 	prefixBytes := GetCString([]byte(prefix))
1124 | 	prefixPtr := unsafe.Pointer(&prefixBytes[0])
1125 | 	if prefix == "" {
1126 | 		prefixPtr = nil
1127 | 	}
1128 | 
1129 | 	hrefBytes := GetCString([]byte(href))
1130 | 	hrefPtr := unsafe.Pointer(&hrefBytes[0])
1131 | 
1132 | 	// use the existing namespace declaration if there is one
1133 | 	_ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr))
1134 | 	if _ns != nil {
1135 | 		_prefixPtr := unsafe.Pointer(_ns.prefix)
1136 | 		_prefix := C.GoString((*C.char)(_prefixPtr))
1137 | 		if prefix == _prefix {
1138 | 			C.xmlSetNs(xmlNode.Ptr, _ns)
1139 | 			return
1140 | 		}
1141 | 	}
1142 | 
1143 | 	ns := C.xmlNewNs(xmlNode.Ptr, (*C.xmlChar)(hrefPtr), (*C.xmlChar)(prefixPtr))
1144 | 	C.xmlSetNs(xmlNode.Ptr, ns)
1145 | }
1146 | 
1147 | // Returns the line number on which the node appears, or a -1 if the
1148 | // line number cannot be determined.
1149 | func (xmlNode *XmlNode) LineNumber() int {
1150 | 	return int(C.xmlGetLineNo(xmlNode.Ptr))
1151 | }
1152 | 


--------------------------------------------------------------------------------