├── VERSION ├── AUTHOR ├── xml ├── tests │ ├── document │ │ ├── empty │ │ │ ├── input.txt │ │ │ └── output.txt │ │ └── basic │ │ │ ├── input.txt │ │ │ └── output.txt │ └── node │ │ ├── attributes │ │ ├── output.txt~ │ │ ├── input.txt │ │ └── output.txt │ │ ├── add_child │ │ ├── input.txt │ │ └── output.txt │ │ ├── inner │ │ ├── input.txt │ │ └── output.txt │ │ ├── add_next_sibling │ │ ├── input.txt │ │ ├── input.txt~ │ │ └── output.txt │ │ ├── inner_with_attributes │ │ ├── input.txt │ │ └── output.txt │ │ ├── replace │ │ ├── input.txt │ │ └── output.txt │ │ ├── set_content │ │ ├── input.txt │ │ └── output.txt │ │ ├── set_namespace │ │ ├── input.txt │ │ └── output.txt │ │ ├── set_ns_attr │ │ ├── input.txt │ │ └── output.txt │ │ ├── add_previous_sibling │ │ ├── input.txt │ │ └── output.txt │ │ ├── add_previous_sibling2 │ │ ├── input.txt │ │ └── output.txt │ │ ├── declare_namespace │ │ ├── input.txt │ │ └── output.txt │ │ ├── set_children │ │ ├── input.txt │ │ └── output.txt │ │ ├── set_default_namespace │ │ ├── input.txt │ │ └── output.txt │ │ ├── add_ancestor │ │ ├── input.txt │ │ └── output.txt │ │ └── search │ │ ├── input.txt │ │ └── output.txt ├── comment.go ├── element.go ├── pi.go ├── cdata.go ├── text.go ├── attribute.go ├── nodeset.go ├── helper.h ├── fragment.go ├── fragment_test.go ├── attribute_test.go ├── search_test.go ├── utils_test.go ├── helper.c ├── document_test.go ├── node_test.go ├── document.go └── node.go ├── html ├── tests │ └── document │ │ ├── html_fragment_encoding │ │ ├── output.txt │ │ └── input.txt │ │ └── encoding │ │ └── input.html ├── helper.h ├── crash_test.go ├── xpath_test.go ├── helper.c ├── encoding_test.go ├── utils_test.go ├── fragment.go ├── document_test.go ├── document.go ├── fragment_test.go └── node_test.go ├── util ├── util_test.go └── util.go ├── help ├── help_test.go ├── util_test.go └── help.go ├── .gitignore ├── mem ├── libxml.h ├── mem.go ├── mem_test.go └── libxml.c ├── css ├── test │ ├── inputs │ ├── outputs-global │ └── outputs-local ├── css_test.go ├── notes.txt └── css.go ├── xpath ├── util_test.go ├── xpath_test.go ├── expression.go ├── util.go └── xpath.go ├── LICENSE ├── Readme.md ├── gokogiri.go └── gokogiri_test.go /VERSION: -------------------------------------------------------------------------------- 1 | 1.0 2 | -------------------------------------------------------------------------------- /AUTHOR: -------------------------------------------------------------------------------- 1 | Zhigang Chen 2 | Hampton Catlin -------------------------------------------------------------------------------- /xml/tests/document/empty/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/node/attributes/output.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/document/basic/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/node/add_child/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/node/inner/input.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /xml/tests/node/add_next_sibling/input.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /xml/tests/node/inner_with_attributes/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/node/replace/input.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /xml/tests/node/set_content/input.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /xml/tests/node/add_next_sibling/input.txt~: -------------------------------------------------------------------------------- 1 | "" 2 | -------------------------------------------------------------------------------- /xml/tests/node/set_namespace/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/node/set_ns_attr/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/node/add_previous_sibling/input.txt: -------------------------------------------------------------------------------- 1 | fun 2 | -------------------------------------------------------------------------------- /xml/tests/node/add_previous_sibling2/input.txt: -------------------------------------------------------------------------------- 1 | fun 2 | -------------------------------------------------------------------------------- /xml/tests/node/declare_namespace/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/node/set_children/input.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /xml/tests/node/set_default_namespace/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/tests/document/empty/output.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /xml/tests/node/add_ancestor/input.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xml/comment.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | type CommentNode struct { 4 | *XmlNode 5 | } 6 | -------------------------------------------------------------------------------- /xml/element.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | type ElementNode struct { 4 | *XmlNode 5 | } 6 | -------------------------------------------------------------------------------- /html/tests/document/html_fragment_encoding/output.txt: -------------------------------------------------------------------------------- 1 | CHUCK FREAKINNORRIS -------------------------------------------------------------------------------- /xml/tests/document/basic/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /xml/tests/node/add_child/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /xml/tests/node/attributes/input.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /html/tests/document/html_fragment_encoding/input.txt: -------------------------------------------------------------------------------- 1 | CHUCK FREAKINNORRIS 2 | -------------------------------------------------------------------------------- /xml/pi.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | type ProcessingInstructionNode struct { 4 | *XmlNode 5 | } 6 | -------------------------------------------------------------------------------- /xml/tests/node/replace/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /xml/tests/node/search/input.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /util/util_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | //please check the search tests in gokogiri/xml and gokogiri/html 4 | -------------------------------------------------------------------------------- /xml/tests/node/add_previous_sibling2/output.txt: -------------------------------------------------------------------------------- 1 | 2 | COOLfun 3 | -------------------------------------------------------------------------------- /xml/tests/node/add_next_sibling/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /xml/tests/node/set_children/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /xml/tests/node/inner/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /xml/tests/node/set_content/output.txt: -------------------------------------------------------------------------------- 1 | 2 | <fun></fun> 3 | -------------------------------------------------------------------------------- /html/tests/document/encoding/input.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moovweb/gokogiri/HEAD/html/tests/document/encoding/input.html -------------------------------------------------------------------------------- /xml/tests/node/add_previous_sibling/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | fun 5 | -------------------------------------------------------------------------------- /help/help_test.go: -------------------------------------------------------------------------------- 1 | package help 2 | 3 | import "testing" 4 | 5 | func TestCheckMemoryLeaks(t *testing.T) { 6 | CheckXmlMemoryLeaks(t) 7 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.out 2 | build/* 3 | _* 4 | *.6 5 | *.o 6 | libxml/test/ctest/test 7 | .DS_Store 8 | test_output.txt 9 | .jank 10 | jank.yml -------------------------------------------------------------------------------- /xml/tests/node/attributes/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /xml/tests/node/set_default_namespace/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /xml/tests/node/declare_namespace/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /xml/tests/node/search/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /xml/tests/node/set_namespace/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /mem/libxml.h: -------------------------------------------------------------------------------- 1 | #ifndef _GOKOGIRI_LIBXML_H 2 | #define _GOKOGIRI_LIBXML_H 3 | 4 | unsigned long libxmlGoAllocSize(); 5 | void libxmlGoInit(); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /xml/tests/node/inner_with_attributes/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /xml/tests/node/set_ns_attr/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /xml/tests/node/add_ancestor/output.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /mem/mem.go: -------------------------------------------------------------------------------- 1 | package mem 2 | 3 | /* 4 | #cgo pkg-config: libxml-2.0 5 | 6 | #include 7 | #include "libxml.h" 8 | */ 9 | import "C" 10 | 11 | const LIBXML_VERSION = C.LIBXML_DOTTED_VERSION 12 | 13 | func init() { 14 | C.libxmlGoInit() 15 | } 16 | 17 | func AllocSize() int { 18 | return int(C.libxmlGoAllocSize()) 19 | } 20 | -------------------------------------------------------------------------------- /mem/mem_test.go: -------------------------------------------------------------------------------- 1 | package mem 2 | 3 | import "testing" 4 | 5 | const EXPECTED_VERSION = "2.7.8" 6 | 7 | func TestLibxml(t *testing.T) { 8 | if LIBXML_VERSION != EXPECTED_VERSION { 9 | t.Fatal("Invalid libxml version got:", LIBXML_VERSION, "expected", EXPECTED_VERSION) 10 | } 11 | if AllocSize() != 0 { 12 | t.Fatal(AllocSize(), "remaining allocations") 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | var EmptyStringBytes = []byte{0} 4 | 5 | func AppendCStringTerminator(b []byte) []byte { 6 | if num := len(b); num > 0 { 7 | if b[num-1] != 0 { 8 | return append(b, 0) 9 | } 10 | } 11 | return b 12 | } 13 | 14 | func GetCString(b []byte) []byte { 15 | b = AppendCStringTerminator(b) 16 | if len(b) == 0 { 17 | return EmptyStringBytes 18 | } 19 | return b 20 | } 21 | -------------------------------------------------------------------------------- /css/test/inputs: -------------------------------------------------------------------------------- 1 | div 2 | > div 3 | div, > span 4 | div.foo 5 | div.foo.bar 6 | div#foo 7 | div#foo.bar#hux 8 | > div#foo.bar#hux 9 | .bar 10 | :first-child 11 | div:first-child 12 | div:nth-child(odd) 13 | div:nth-child(even) 14 | div:nth-child(2n + 1) 15 | div:nth-child(-3n-6) 16 | div:nth-of-type(5) 17 | :nth-child(4) 18 | div :nth-child(2) 19 | div[a='b'] 20 | div[a~='b'] 21 | div[a|='b'] 22 | div[a*='b'] 23 | div[a ^= 'b' ] 24 | div [ a $= 'b' ] 25 | > :only-of-type 26 | div[a='b']:first-of-type.foo 27 | div.bar:not(#foo:first-child) -------------------------------------------------------------------------------- /xml/cdata.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | /* CDataNode represents a CDATA section. This XML node type allows the embedding of unescaped, verbatim text within an XML document. 4 | 5 | It is otherwise identical to a TextNode. It is most often used to wrap content that is whitespace-sensitive or likely to contain 6 | large numbers of less-than or greater-than signs (such as code snippets or example documents). 7 | 8 | If you use the XML_PARSE_NOCDATA parsing option, the parser will always present the CDATA sections as TextNodes. 9 | */ 10 | type CDataNode struct { 11 | *XmlNode 12 | } 13 | -------------------------------------------------------------------------------- /help/util_test.go: -------------------------------------------------------------------------------- 1 | package help 2 | 3 | import "testing" 4 | 5 | func CheckXmlMemoryLeaks(t *testing.T) { 6 | // LibxmlCleanUpParser() should only be called once during the lifetime of the 7 | // program, but because there's no way to know when the last test of the suite 8 | // runs in go, we can't accurately call it strictly once, so just avoid calling 9 | // it for now because it's known to cause crashes if called multiple times. 10 | //LibxmlCleanUpParser() 11 | 12 | if !LibxmlCheckMemoryLeak() { 13 | t.Errorf("Memory leaks: %d!!!", LibxmlGetMemoryAllocation()) 14 | LibxmlReportMemoryLeak() 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /xpath/util_test.go: -------------------------------------------------------------------------------- 1 | package xpath 2 | 3 | import "testing" 4 | import "github.com/moovweb/gokogiri/help" 5 | 6 | func CheckXmlMemoryLeaks(t *testing.T) { 7 | // LibxmlCleanUpParser() should only be called once during the lifetime of the 8 | // program, but because there's no way to know when the last test of the suite 9 | // runs in go, we can't accurately call it strictly once, so just avoid calling 10 | // it for now because it's known to cause crashes if called multiple times. 11 | //help.LibxmlCleanUpParser() 12 | 13 | if !help.LibxmlCheckMemoryLeak() { 14 | t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation()) 15 | help.LibxmlReportMemoryLeak() 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /html/helper.h: -------------------------------------------------------------------------------- 1 | #ifndef __CHELPER_H__ 2 | #define __CHELPER_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | htmlDocPtr htmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int errror_buffer_len); 11 | xmlNode* htmlParseFragment(void* doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len); 12 | xmlNode* htmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len); 13 | 14 | #endif //__CHELPER_H__ 15 | -------------------------------------------------------------------------------- /xpath/xpath_test.go: -------------------------------------------------------------------------------- 1 | package xpath 2 | 3 | //please check the search tests in gokogiri/xml and gokogiri/html 4 | import "testing" 5 | 6 | func TestCompileGoodExpr(t *testing.T) { 7 | defer CheckXmlMemoryLeaks(t) 8 | e := Compile(`./*`) 9 | if e == nil { 10 | t.Error("expr should be good") 11 | } 12 | e.Free() 13 | } 14 | 15 | func TestCompileBadExpr(t *testing.T) { 16 | //defer CheckXmlMemoryLeaks(t) 17 | //this test causes memory leaks in libxml 18 | //however, the memory leak is very small and does not grow as more bad expressions are compiled 19 | e := Compile("./") 20 | if e != nil { 21 | t.Error("expr should be bad") 22 | } 23 | e = Compile(".//") 24 | if e != nil { 25 | t.Error("expr should be bad") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /xml/text.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | /* 4 | #include 5 | 6 | void disable_escaping(xmlNodePtr node) { 7 | node->name = xmlStringTextNoenc; 8 | } 9 | */ 10 | import "C" 11 | 12 | type TextNode struct { 13 | *XmlNode 14 | } 15 | 16 | // DisableOutputEscaping disables the usual safeguards against creating invalid XML and allows the 17 | // characters '<', '>', and '&' to be written out verbatim. Normally they are safely escaped as entities. 18 | // 19 | // This API is intended to provide support for XSLT processors and similar XML manipulation libraries that 20 | // may need to output unsupported entity references or use the XML API for non-XML output. It should never 21 | // be used in the normal course of XML processing. 22 | func (node *TextNode) DisableOutputEscaping() { 23 | C.disable_escaping(node.Ptr) 24 | } 25 | -------------------------------------------------------------------------------- /html/crash_test.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | import "testing" 4 | 5 | func TestCrazyMove(t *testing.T) { 6 | input := ` 7 | 8 | 9 |
10 |
11 |
12 |
13 |
14 | 15 | ` 16 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 17 | 18 | if err != nil { 19 | t.Error("Parsing has error:", err) 20 | return 21 | } 22 | 23 | foos, err := doc.Search("//div[@id='foo']") 24 | if err != nil { 25 | t.Error("search has error:", err) 26 | return 27 | } 28 | for _, foo := range foos { 29 | bars, _ := foo.Search("//div[@id='bar']") 30 | for _, bar := range bars { 31 | bar.AddChild(foo) 32 | } 33 | } 34 | 35 | doc.Free() 36 | CheckXmlMemoryLeaks(t) 37 | } 38 | -------------------------------------------------------------------------------- /css/css_test.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | import ( 4 | "io/ioutil" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | func read(filename string) string { 10 | contents, err := ioutil.ReadFile(filename) 11 | if err != nil { 12 | panic("css2xpath test could not open a test file") 13 | } 14 | return string(contents) 15 | } 16 | 17 | func TestSelectors(t *testing.T) { 18 | cssSelectors := strings.Split(string(read("./test/inputs")), "\n") 19 | localXPaths := strings.Split(string(read("./test/outputs-local")), "\n") 20 | globalXPaths := strings.Split(string(read("./test/outputs-global")), "\n") 21 | 22 | for i, css := range cssSelectors { 23 | xpathG := strings.TrimSpace(Convert(css, GLOBAL)) 24 | xpathL := strings.TrimSpace(Convert(css, LOCAL)) 25 | if xpathG != strings.TrimSpace(globalXPaths[i]) { 26 | t.Errorf("IN:\t%s \nOUT:\t%s\nEXPECTED:\t%s\n", css, xpathG, globalXPaths[i]) 27 | } 28 | if xpathL != strings.TrimSpace(localXPaths[i]) { 29 | t.Errorf("IN:\t%s \nOUT:\t%s\nEXPECT:\t%s\n", css, xpathL, localXPaths[i]) 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2012 Zhigang Chen and Hampton Catlin 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /xml/attribute.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | /* 4 | AttributeNode represents an attribute, which has a name and a value. 5 | 6 | AttributeNodes are created by calling SetAttr or SetNsAttr on an element node, 7 | and retrieved by the Attribute and Attributes functions on an element node. 8 | 9 | Note that while mamespace declarations resemble attributes, they are a distinct node type 10 | and cannot be used or retreived as an AttributeNode. 11 | */ 12 | type AttributeNode struct { 13 | *XmlNode 14 | } 15 | 16 | // String returns the value of the attribute. 17 | func (attrNode *AttributeNode) String() string { 18 | return attrNode.Content() 19 | } 20 | 21 | // Value returns the value of the attribute. 22 | func (attrNode *AttributeNode) Value() string { 23 | return attrNode.Content() 24 | } 25 | 26 | //SetValue sets the value of the attribute. Note that the argument will 27 | // be converted to a string, and automatically XML-escaped when the 28 | // document is serialized. 29 | func (attrNode *AttributeNode) SetValue(val interface{}) { 30 | attrNode.SetContent(val) 31 | } 32 | 33 | /* 34 | alias :value :content 35 | alias :to_s :content 36 | alias :content= :value= 37 | */ 38 | -------------------------------------------------------------------------------- /html/xpath_test.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | import "testing" 4 | 5 | func TestUnfoundFuncInXpath(t *testing.T) { 6 | defer CheckXmlMemoryLeaks(t) 7 | 8 | doc, err := Parse([]byte("

"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 9 | 10 | if err != nil { 11 | t.Error("Parsing has error:", err) 12 | return 13 | } 14 | 15 | html := doc.Root().FirstChild() 16 | results, _ := html.Search("./div[matches(text(), 'foo')]") 17 | if results != nil { 18 | t.Error("should return nil because the function is not found") 19 | } 20 | doc.Free() 21 | } 22 | 23 | func TestXpathEmptyResult(t *testing.T) { 24 | defer CheckXmlMemoryLeaks(t) 25 | 26 | doc, err := Parse([]byte("

"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 27 | 28 | if err != nil { 29 | t.Error("Parsing has error:", err) 30 | return 31 | } 32 | 33 | html := doc.Root().FirstChild() 34 | results, err := html.Search("./div[@calass='cool']") 35 | if err != nil { 36 | t.Error("Xpath eval should not return nil") 37 | } 38 | if len(results) > 0 { 39 | t.Error("Xpath should return empty result") 40 | } 41 | doc.Free() 42 | } 43 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | Gokogiri 2 | ======== 3 | LibXML bindings for the Go programming language. 4 | ------------------------------------------------ 5 | By Zhigang Chen and Hampton Catlin 6 | 7 | 8 | This is a major rewrite from v0 in the following places: 9 | 10 | - Separation of XML and HTML 11 | - Put more burden of memory allocation/deallocation on Go 12 | - Fragment parsing -- no more deep-copy 13 | - Serialization 14 | - Some API adjustment 15 | 16 | ## Installation 17 | 18 | ```bash 19 | # Linux 20 | sudo apt-get install libxml2-dev 21 | # Mac 22 | brew install libxml2 23 | 24 | go get github.com/moovweb/gokogiri 25 | ``` 26 | 27 | ## Running tests 28 | 29 | ```bash 30 | go test github.com/moovweb/gokogiri/... 31 | ``` 32 | 33 | ## Basic example 34 | 35 | ```go 36 | package main 37 | 38 | import ( 39 | "net/http" 40 | "io/ioutil" 41 | "github.com/moovweb/gokogiri" 42 | ) 43 | 44 | func main() { 45 | // fetch and read a web page 46 | resp, _ := http.Get("http://www.google.com") 47 | page, _ := ioutil.ReadAll(resp.Body) 48 | 49 | // parse the web page 50 | doc, _ := gokogiri.ParseHtml(page) 51 | 52 | // perform operations on the parsed page -- consult the tests for examples 53 | 54 | // important -- don't forget to free the resources when you're done! 55 | doc.Free() 56 | } 57 | ``` 58 | -------------------------------------------------------------------------------- /xml/nodeset.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | /* 4 | #cgo pkg-config: libxml-2.0 5 | 6 | #include 7 | #include 8 | 9 | */ 10 | import "C" 11 | 12 | import "unsafe" 13 | 14 | type Nodeset []Node 15 | 16 | // Produce a slice of unsafe.Pointer objects, suitable for passing to a C function 17 | func (n Nodeset) ToPointers() (pointers []unsafe.Pointer) { 18 | for _, node := range n { 19 | pointers = append(pointers, node.NodePtr()) 20 | } 21 | return 22 | } 23 | 24 | // Produce a C.xmlXPathObjectPtr suitable for passing to libxml2 25 | func (n Nodeset) ToXPathNodeset() (ret C.xmlXPathObjectPtr) { 26 | ret = C.xmlXPathNewNodeSet(nil) 27 | for _, node := range n { 28 | C.xmlXPathNodeSetAdd(ret.nodesetval, (*C.xmlNode)(node.NodePtr())) 29 | } 30 | return 31 | } 32 | 33 | // Produce a C.xmlXPathObjectPtr marked as a ResultValueTree, suitable for passing to libxml2 34 | func (n Nodeset) ToXPathValueTree() (ret C.xmlXPathObjectPtr) { 35 | if len(n) == 0 { 36 | ret = C.xmlXPathNewValueTree(nil) 37 | return 38 | } 39 | 40 | ret = C.xmlXPathNewValueTree(nil) 41 | for _, node := range n { 42 | C.xmlXPathNodeSetAdd(ret.nodesetval, (*C.xmlNode)(node.NodePtr())) 43 | } 44 | //this hack-ish looking line tells libxml2 not to free the RVT 45 | //if we don't do this we get horrible double-free crashes everywhere 46 | ret.boolval = 0 47 | return 48 | } 49 | -------------------------------------------------------------------------------- /html/helper.c: -------------------------------------------------------------------------------- 1 | #include "helper.h" 2 | #include "../xml/helper.h" 3 | #include 4 | 5 | htmlDocPtr htmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) { 6 | const char *c_buffer = (char*)buffer; 7 | const char *c_url = (char*)url; 8 | const char *c_encoding = (char*)encoding; 9 | xmlDoc *doc = NULL; 10 | 11 | xmlResetLastError(); 12 | doc = htmlReadMemory(c_buffer, buffer_len, c_url, c_encoding, options); 13 | 14 | return doc; 15 | } 16 | 17 | xmlNode* htmlParseFragment(void *doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len) { 18 | xmlNode* root_element = NULL; 19 | xmlParserErrors errCode; 20 | errCode = xmlParseInNodeContext((xmlNodePtr)doc, buffer, buffer_len, options, &root_element); 21 | if (errCode != XML_ERR_OK) { 22 | return NULL; 23 | } 24 | return root_element; 25 | } 26 | 27 | xmlNode* htmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) { 28 | xmlDoc* tmpDoc = NULL; 29 | xmlNode* tmpRoot = NULL; 30 | tmpDoc = htmlReadMemory((char*)buffer, buffer_len, (char*)url, (char*)encoding, options); 31 | if (tmpDoc == NULL) { 32 | return NULL; 33 | } 34 | tmpRoot = xmlDocGetRootElement(tmpDoc); 35 | if (tmpRoot == NULL) { 36 | return NULL; 37 | } 38 | tmpRoot = xmlDocCopyNode(tmpRoot, doc, 1); 39 | xmlFreeDoc(tmpDoc); 40 | return tmpRoot; 41 | } 42 | -------------------------------------------------------------------------------- /xml/helper.h: -------------------------------------------------------------------------------- 1 | #ifndef __CHELPER_H__ 2 | #define __CHELPER_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | xmlDoc* xmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int errror_buffer_len); 13 | xmlNode* xmlParseFragment(void* doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len); 14 | xmlNode* xmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len); 15 | int xmlSaveNode(void *wbuffer, void *node, void *encoding, int options); 16 | void xmlRemoveDefaultNamespace(xmlNode *node); 17 | 18 | void xmlSetContent(void *gonode, void *node, void *content); 19 | 20 | xmlDoc* newEmptyXmlDoc(); 21 | xmlElementType getNodeType(xmlNode *node); 22 | char *xmlDocDumpToString(xmlDoc *doc, void *encoding, int format); 23 | char *htmlDocDumpToString(xmlDoc *doc, int format); 24 | void xmlFreeChars(char *buffer); 25 | int xmlUnlinkNodeWithCheck(xmlNode *node); 26 | int xmlNodePtrCheck(void *node); 27 | void xmlNodeWriteCallback(void *buffer, void *data, int data_len); 28 | void xmlUnlinkNodeCallback(void *nodePtr, void *gonodePtr); 29 | 30 | typedef struct XmlBufferContext { 31 | void *obj; 32 | char *buffer; 33 | int buffer_len; 34 | int data_size; 35 | } XmlBufferContext; 36 | 37 | #endif //__CHELPER_H__ 38 | -------------------------------------------------------------------------------- /html/encoding_test.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "testing" 7 | ) 8 | 9 | func TestParseDocument_CP1252(t *testing.T) { 10 | input, err := ioutil.ReadFile("./tests/document/encoding/input.html") 11 | if err != nil { 12 | t.Error("err:", err.Error()) 13 | return 14 | } 15 | doc, err := Parse(input, []byte("windows-1252"), nil, DefaultParseOption, DefaultEncodingBytes) 16 | if err != nil { 17 | t.Error("err:", err.Error()) 18 | return 19 | } 20 | out := doc.String() 21 | if index := bytes.IndexByte([]byte(out), byte(146)); index >= 0 { 22 | t.Error("the output is not properly encoded") 23 | } 24 | doc.Free() 25 | CheckXmlMemoryLeaks(t) 26 | } 27 | 28 | func TestParseDocumentWithInOutEncodings(t *testing.T) { 29 | println("Starting to read input file.") 30 | input, err := ioutil.ReadFile("./tests/document/encoding/input.html") 31 | if err != nil { 32 | t.Error("err:", err.Error()) 33 | return 34 | } 35 | println("Succesfully read input file, beginning parsing.") 36 | doc, err := Parse(input, []byte("windows-1252"), nil, DefaultParseOption, []byte("windows-1252")) 37 | if err != nil { 38 | t.Error("err:", err.Error()) 39 | return 40 | } 41 | println("Successfully parsed, getting document as a string...") 42 | out := doc.String() 43 | if index := bytes.IndexByte([]byte(out), byte(146)); index < 0 { 44 | t.Error("the output is not properly encoded") 45 | } 46 | 47 | println("Test complete, about to free document.") 48 | doc.Free() 49 | println("Successfully freed document, checking for memory leaks...") 50 | CheckXmlMemoryLeaks(t) 51 | println("Finished checking for leaks.") 52 | } 53 | -------------------------------------------------------------------------------- /gokogiri.go: -------------------------------------------------------------------------------- 1 | /* 2 | The gokogiri package provides a Go interface to the libxml2 library. 3 | 4 | It is inspired by the ruby-based Nokogiri API, and allows one to parse, manipulate, and create HTML and XML 5 | documents. Nodes can be selected using either CSS selectors (in much the same fashion as jQuery) or XPath 1.0 expressions, 6 | and a simple DOM-like inteface allows for building up documents from scratch. 7 | */ 8 | package gokogiri 9 | 10 | import ( 11 | "github.com/moovweb/gokogiri/html" 12 | "github.com/moovweb/gokogiri/xml" 13 | ) 14 | 15 | /* 16 | ParseHtml parses an UTF-8 encoded byte array and returns an html.HtmlDocument. It uses parsing default options that ignore 17 | errors or warnings, making it suitable for the poorly-formed 'tag soup' often found on the web. 18 | 19 | If the content is not UTF-8 encoded or you want to customize the parsing options, you should call html.Parse directly. 20 | */ 21 | func ParseHtml(content []byte) (doc *html.HtmlDocument, err error) { 22 | return html.Parse(content, html.DefaultEncodingBytes, nil, html.DefaultParseOption, html.DefaultEncodingBytes) 23 | } 24 | 25 | /* 26 | ParseXml parses an UTF-8 encoded byte array and returns an xml.XmlDocument. By default the parsing options ignore validation 27 | and suppress errors and warnings. This allows one to liberal in accepting badly-formed documents, but is not standards-compliant. 28 | 29 | If the content is not UTF-8 encoded or you want to customize the parsing options, you should call the Parse or ReadFile functions 30 | found in the github.com/moovweb/gokogiri/xml package. The xml.StrictParsingOption is conveniently provided for standards-compliant 31 | behaviour. 32 | */ 33 | func ParseXml(content []byte) (doc *xml.XmlDocument, err error) { 34 | return xml.Parse(content, xml.DefaultEncodingBytes, nil, xml.DefaultParseOption, xml.DefaultEncodingBytes) 35 | } 36 | -------------------------------------------------------------------------------- /css/test/outputs-global: -------------------------------------------------------------------------------- 1 | /descendant-or-self::*/*[self::div] 2 | /child::*[self::div] 3 | /descendant-or-self::*/*[self::div] | /child::*[self::span] 4 | /descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ")] 5 | /descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ") and contains(concat(" ", @class, " "), " bar ")] 6 | /descendant-or-self::*/*[self::div and @id="foo"] 7 | /descendant-or-self::*/*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"] 8 | /child::*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"] 9 | /descendant-or-self::*/*[contains(concat(" ", @class, " "), " bar ")] 10 | /descendant-or-self::*/*[position()=1] 11 | /descendant-or-self::*/*[self::div and position()=1] 12 | /descendant-or-self::*/*[self::div and position() mod 2 = 1] 13 | /descendant-or-self::*/*[self::div and position() mod 2 = 0] 14 | /descendant-or-self::*/*[self::div and (position() - 1) mod 2 = 0] 15 | /descendant-or-self::*/*[self::div and (position() + 6) mod -3 = 0] 16 | /descendant-or-self::*/*[self::div][position() = 5] 17 | /descendant-or-self::*/*[position() = 4] 18 | /descendant-or-self::*/*[self::div]/descendant-or-self::*/*[position() = 2] 19 | /descendant-or-self::*/*[self::div and @a='b'] 20 | /descendant-or-self::*/*[self::div and contains(concat(" ", @a, " "), concat(" ", 'b', " "))] 21 | /descendant-or-self::*/*[self::div and (@a='b' or starts-with(@a, concat('b', "-")))] 22 | /descendant-or-self::*/*[self::div and contains(@a, 'b')] 23 | /descendant-or-self::*/*[self::div and starts-with(@a, 'b')] 24 | /descendant-or-self::*/*[self::div]/descendant-or-self::*/*[substring(@a, string-length(@a) - string-length('b') + 1) = 'b'] 25 | /child::*[position() = 1 and position() = last()] 26 | /descendant-or-self::*/*[self::div and @a='b'][position()=1 and contains(concat(" ", @class, " "), " foo ")] 27 | /descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " bar ") and not(@id="foo" and position()=1)] -------------------------------------------------------------------------------- /css/test/outputs-local: -------------------------------------------------------------------------------- 1 | ./descendant-or-self::*/*[self::div] 2 | ./child::*[self::div] 3 | ./descendant-or-self::*/*[self::div] | ./child::*[self::span] 4 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ")] 5 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " foo ") and contains(concat(" ", @class, " "), " bar ")] 6 | ./descendant-or-self::*/*[self::div and @id="foo"] 7 | ./descendant-or-self::*/*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"] 8 | ./child::*[self::div and @id="foo" and contains(concat(" ", @class, " "), " bar ") and @id="hux"] 9 | ./descendant-or-self::*/*[contains(concat(" ", @class, " "), " bar ")] 10 | ./descendant-or-self::*/*[position()=1] 11 | ./descendant-or-self::*/*[self::div and position()=1] 12 | ./descendant-or-self::*/*[self::div and position() mod 2 = 1] 13 | ./descendant-or-self::*/*[self::div and position() mod 2 = 0] 14 | ./descendant-or-self::*/*[self::div and (position() - 1) mod 2 = 0] 15 | ./descendant-or-self::*/*[self::div and (position() + 6) mod -3 = 0] 16 | ./descendant-or-self::*/*[self::div][position() = 5] 17 | ./descendant-or-self::*/*[position() = 4] 18 | ./descendant-or-self::*/*[self::div]/descendant-or-self::*/*[position() = 2] 19 | ./descendant-or-self::*/*[self::div and @a='b'] 20 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @a, " "), concat(" ", 'b', " "))] 21 | ./descendant-or-self::*/*[self::div and (@a='b' or starts-with(@a, concat('b', "-")))] 22 | ./descendant-or-self::*/*[self::div and contains(@a, 'b')] 23 | ./descendant-or-self::*/*[self::div and starts-with(@a, 'b')] 24 | ./descendant-or-self::*/*[self::div]/descendant-or-self::*/*[substring(@a, string-length(@a) - string-length('b') + 1) = 'b'] 25 | ./child::*[position() = 1 and position() = last()] 26 | ./descendant-or-self::*/*[self::div and @a='b'][position()=1 and contains(concat(" ", @class, " "), " foo ")] 27 | ./descendant-or-self::*/*[self::div and contains(concat(" ", @class, " "), " bar ") and not(@id="foo" and position()=1)] -------------------------------------------------------------------------------- /xpath/expression.go: -------------------------------------------------------------------------------- 1 | package xpath 2 | 3 | /* 4 | #include 5 | #include 6 | #include 7 | 8 | void check_xpath_syntax_noop(void *ctx, const char *fmt, ...) { 9 | } 10 | 11 | char *check_xpath_syntax(const char *xpath) { 12 | xmlGenericErrorFunc err_func = check_xpath_syntax_noop; 13 | initGenericErrorDefaultFunc(&err_func); 14 | xmlResetLastError(); 15 | xmlXPathCompile((const xmlChar *)xpath); 16 | xmlErrorPtr err = xmlGetLastError(); 17 | if (err != NULL) { 18 | if (err->code == XML_XPATH_EXPR_ERROR) { 19 | // TODO: Not the cleanest but should scale well 20 | int size = strlen(err->message) + strlen(err->str1) + err->int1 + 16; 21 | char *msg = malloc(size); 22 | sprintf(msg, "%s%s\n%*s^", err->message, err->str1, err->int1, " "); 23 | return msg; 24 | } else { 25 | char *msg = malloc(strlen(err->message)); 26 | sprintf(msg, "%s", err->message); 27 | return msg; 28 | } 29 | } 30 | return NULL; 31 | } 32 | */ 33 | import "C" 34 | import "unsafe" 35 | import . "github.com/moovweb/gokogiri/util" 36 | 37 | //import "runtime" 38 | import "errors" 39 | 40 | type Expression struct { 41 | Ptr *C.xmlXPathCompExpr 42 | xpath string 43 | } 44 | 45 | func Check(path string) (err error) { 46 | str := C.CString(path) 47 | defer C.free(unsafe.Pointer(str)) 48 | cstr := C.check_xpath_syntax(str) 49 | if cstr != nil { 50 | defer C.free(unsafe.Pointer(cstr)) 51 | err = errors.New(C.GoString(cstr)) 52 | } 53 | return 54 | } 55 | 56 | func Compile(path string) (expr *Expression) { 57 | if len(path) == 0 { 58 | return 59 | } 60 | 61 | xpathBytes := GetCString([]byte(path)) 62 | xpathPtr := unsafe.Pointer(&xpathBytes[0]) 63 | ptr := C.xmlXPathCompile((*C.xmlChar)(xpathPtr)) 64 | if ptr == nil { 65 | return 66 | } 67 | expr = &Expression{Ptr: ptr, xpath: path} 68 | //runtime.SetFinalizer(expr, (*Expression).Free) 69 | return 70 | } 71 | 72 | func (exp *Expression) String() string { 73 | return exp.xpath 74 | } 75 | 76 | func (exp *Expression) Free() { 77 | if exp.Ptr != nil { 78 | C.xmlXPathFreeCompExpr(exp.Ptr) 79 | exp.Ptr = nil 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /html/utils_test.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | import ( 4 | "fmt" 5 | "github.com/moovweb/gokogiri/help" 6 | "io/ioutil" 7 | "path/filepath" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func badOutput(actual string, expected string) { 13 | fmt.Printf("Got:\n[%v]\n", actual) 14 | fmt.Printf("Expected:\n[%v]\n", expected) 15 | } 16 | 17 | func getTestData(name string) (input []byte, output []byte, error string) { 18 | var errorMessage string 19 | offset := "\t" 20 | inputFile := filepath.Join(name, "input.txt") 21 | 22 | input, err := ioutil.ReadFile(inputFile) 23 | 24 | if err != nil { 25 | errorMessage += fmt.Sprintf("%vCouldn't read test (%v) input:\n%v\n", offset, name, offset+err.Error()) 26 | } 27 | 28 | output, err = ioutil.ReadFile(filepath.Join(name, "output.txt")) 29 | 30 | if err != nil { 31 | errorMessage += fmt.Sprintf("%vCouldn't read test (%v) output:\n%v\n", offset, name, offset+err.Error()) 32 | } 33 | 34 | return input, output, errorMessage 35 | } 36 | 37 | func collectTests(suite string) (names []string, error string) { 38 | testPath := filepath.Join("tests", suite) 39 | entries, err := ioutil.ReadDir(testPath) 40 | 41 | if err != nil { 42 | return nil, fmt.Sprintf("Couldn't read tests:\n%v\n", err.Error()) 43 | } 44 | 45 | for _, entry := range entries { 46 | if strings.HasPrefix(entry.Name(), "_") || strings.HasPrefix(entry.Name(), ".") { 47 | continue 48 | } 49 | 50 | if entry.IsDir() { 51 | names = append(names, filepath.Join(testPath, entry.Name())) 52 | } 53 | } 54 | 55 | return 56 | } 57 | 58 | func CheckXmlMemoryLeaks(t *testing.T) { 59 | // LibxmlCleanUpParser() should only be called once during the lifetime of the 60 | // program, but because there's no way to know when the last test of the suite 61 | // runs in go, we can't accurately call it strictly once, so just avoid calling 62 | // it for now because it's known to cause crashes if called multiple times. 63 | //println("Cleaning up parser...") 64 | //help.LibxmlCleanUpParser() 65 | 66 | println("Checking for libxml leaks...") 67 | if !help.LibxmlCheckMemoryLeak() { 68 | println("Found memory leaks!") 69 | t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation()) 70 | help.LibxmlReportMemoryLeak() 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /help/help.go: -------------------------------------------------------------------------------- 1 | package help 2 | 3 | /* 4 | #cgo pkg-config: libxml-2.0 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | void printMemoryLeak() { xmlMemDisplay(stdout); } 13 | */ 14 | import "C" 15 | 16 | import ( 17 | "sync" 18 | "sync/atomic" 19 | ) 20 | 21 | /** 22 | * With regards to Thread Safety 23 | * 24 | * xmlInitParser and xmlCleanupParser need to be called *once* each during the 25 | * lifetime of the program, regardless of how many documents you parse. 26 | * 27 | * xmlInitParser should be called at the very beginning before doing anything 28 | * parser related. Luckly, using the call below, we can guarantee that by 29 | * making sure it gets called exactly once if anyone uses any gokogiri 30 | * related functions. 31 | * 32 | * xmlCleanupParser is trickier because it also can only be called once, but it 33 | * should strictly be called at the very end of program execution, after we're 34 | * sure that no more documents will be parsed. If it's ever called, and a new 35 | * document is parsed, there is a potential for a segfault. 36 | * 37 | * For more information: 38 | * 39 | * http://www.xmlsoft.org/threads.html 40 | * http://www.xmlsoft.org/FAQ.html#Developer (In particular, question #7) 41 | **/ 42 | 43 | var once sync.Once 44 | var cleaned = new(int32) 45 | 46 | func LibxmlInitParser() { 47 | if called_clean := atomic.LoadInt32(cleaned); called_clean != 0 { 48 | panic("LibxmlCleanUpParser has been called. Please make sure you only " + 49 | "call it if no more document parsing will take place.") 50 | } 51 | once.Do(func() { C.xmlInitParser() }) 52 | } 53 | 54 | func LibxmlCleanUpParser() { 55 | // Because of our test structure, this method is called several 56 | // times during a test run (but it should only be called once 57 | // during the lifetime of the program). 58 | once.Do(func() { 59 | atomic.AddInt32(cleaned, 1) 60 | C.xmlCleanupParser() 61 | }) 62 | } 63 | 64 | func LibxmlGetMemoryAllocation() int { 65 | return (int)(C.xmlMemBlocks()) 66 | } 67 | 68 | func LibxmlCheckMemoryLeak() bool { 69 | return (C.xmlMemBlocks() == 0) 70 | } 71 | 72 | func LibxmlReportMemoryLeak() { 73 | C.printMemoryLeak() 74 | } 75 | -------------------------------------------------------------------------------- /gokogiri_test.go: -------------------------------------------------------------------------------- 1 | package gokogiri 2 | 3 | import ( 4 | "github.com/moovweb/gokogiri/help" 5 | "testing" 6 | ) 7 | 8 | func TestParseHtml(t *testing.T) { 9 | input := "

" 10 | expected := ` 11 |

12 | ` 13 | doc, err := ParseHtml([]byte(input)) 14 | if err != nil { 15 | t.Error("Parsing has error:", err) 16 | return 17 | } 18 | if doc.String() != expected { 19 | t.Error("the output of the html doc does not match the expected") 20 | } 21 | 22 | expected = ` 23 | 24 | 25 |

26 | 27 | ` 28 | doc.Root().FirstChild().AddPreviousSibling("") 29 | 30 | if doc.String() != expected { 31 | println(doc.String()) 32 | t.Error("the output of the html doc does not match the expected") 33 | } 34 | doc.Free() 35 | CheckXmlMemoryLeaks(t) 36 | } 37 | 38 | func TestParseXml(t *testing.T) { 39 | input := "" 40 | expected := ` 41 | 42 | ` 43 | doc, err := ParseXml([]byte(input)) 44 | if err != nil { 45 | t.Error("Parsing has error:", err) 46 | return 47 | } 48 | 49 | if doc.String() != expected { 50 | t.Error("the output of the xml doc does not match the expected") 51 | } 52 | 53 | expected = ` 54 | 55 | 56 | 57 | ` 58 | doc.Root().AddChild("") 59 | if doc.String() != expected { 60 | t.Error("the output of the xml doc does not match the expected") 61 | } 62 | doc.Free() 63 | CheckXmlMemoryLeaks(t) 64 | } 65 | 66 | func CheckXmlMemoryLeaks(t *testing.T) { 67 | // LibxmlCleanUpParser() should only be called once during the lifetime of the 68 | // program, but because there's no way to know when the last test of the suite 69 | // runs in go, we can't accurately call it strictly once, so just avoid calling 70 | // it for now because it's known to cause crashes if called multiple times. 71 | //help.LibxmlCleanUpParser() 72 | 73 | if !help.LibxmlCheckMemoryLeak() { 74 | t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation()) 75 | help.LibxmlReportMemoryLeak() 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /mem/libxml.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | //#define TRACE_MEM 9 | //#define CUSTOM_GC 10 | 11 | unsigned long alloc_count = 0; 12 | 13 | #ifndef strdup 14 | char *strdup (const char *str) { 15 | char *new = malloc(strlen(str)); 16 | strcpy(new, str); 17 | return new; 18 | } 19 | #endif 20 | 21 | #ifdef CUSTOM_GC 22 | #pragma pack(push) 23 | #pragma pack(1) 24 | typedef struct go_xml_allocation { 25 | size_t size; 26 | struct timespec timestamp; 27 | void *p; 28 | } go_xml_allocation; 29 | #pragma pack(pop) 30 | #endif 31 | 32 | unsigned long libxmlGoAllocSize() { 33 | if (alloc_count > 0) { 34 | xmlCleanupParser(); 35 | } 36 | return alloc_count; 37 | } 38 | 39 | void libxmlGoFree(void *p) { 40 | alloc_count--; 41 | #ifdef CUSTOM_GC 42 | go_xml_allocation *gxa = (go_xml_allocation *)(p - sizeof(go_xml_allocation)); 43 | fprintf(stderr, "Freeing %lu bytes @ %p created at: %lu\n", gxa->size, gxa->p, gxa->timestamp.tv_nsec); 44 | return free(gxa); 45 | #else 46 | #ifdef TRACE_MEM 47 | fprintf(stderr, "%08lu Free %p\n", alloc_count, p); 48 | #endif 49 | return free(p); 50 | #endif 51 | } 52 | 53 | void *libxmlGoMalloc(int size) { 54 | alloc_count++; 55 | #ifdef CUSTOM_GC 56 | go_xml_allocation *gxa = (go_xml_allocation *)malloc(size + sizeof(go_xml_allocation)); 57 | gxa->p = (void *)gxa + sizeof(go_xml_allocation); 58 | gxa->size = size; 59 | clock_gettime(CLOCK_REALTIME, &(gxa->timestamp)); 60 | fprintf(stderr, "Allocated %lu bytes @ %p timestamp: %lu\n", gxa->size, gxa->p, gxa->timestamp.tv_nsec); 61 | return gxa->p; 62 | #else 63 | #ifdef TRACE_MEM 64 | fprintf(stderr, "%08lu Malloc %d\n", alloc_count, size); 65 | #endif 66 | return malloc(size); 67 | #endif 68 | } 69 | 70 | void *libxmlGoRealloc(void *p, int size) { 71 | #ifdef TRACE_MEM 72 | fprintf(stderr, "Realloc %p, %d\n", p, size); 73 | #endif 74 | return realloc(p, size); 75 | } 76 | 77 | void *libxmlGoStrDup(void *p) { 78 | alloc_count++; 79 | #ifdef TRACE_MEM 80 | fprintf(stderr, "%08lu StrDup %p\n", alloc_count, p); 81 | #endif 82 | return strdup(p); 83 | } 84 | 85 | void libxmlGoInit() { 86 | #ifndef WINDOWS 87 | //fprintf(stderr, "Running xmlMemSetup()...\n"); 88 | xmlMemSetup( 89 | (xmlFreeFunc)libxmlGoFree, 90 | (xmlMallocFunc)libxmlGoMalloc, 91 | (xmlReallocFunc)libxmlGoRealloc, 92 | (xmlStrdupFunc)libxmlGoStrDup 93 | ); 94 | #endif 95 | 96 | //char *_LIBXML_VERSION = strdup(LIBXML_DOTTED_VERSION); 97 | //char *_LIBXML_PARSER_VERSION = strdup(xmlParserVersion); 98 | //fprintf(stderr, "LIBXML_VERSION: %s\n", _LIBXML_VERSION); 99 | //fprintf(stderr, "LIBXML_PARSER_VERSION: %s\n", _LIBXML_PARSER_VERSION); 100 | 101 | #ifdef LIBXML_ICONV_ENABLED 102 | //fprintf(stderr, "LIBXML_ICONV_ENABLED: %s\n", "true"); 103 | #else 104 | //fprintf(stderr, "LIBXML_ICONV_ENABLED: %s\n", "false"); 105 | #endif 106 | 107 | //xmlInitParser(); 108 | } 109 | 110 | -------------------------------------------------------------------------------- /html/fragment.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | //#include "helper.h" 4 | import "C" 5 | import ( 6 | "bytes" 7 | "errors" 8 | . "github.com/moovweb/gokogiri/util" 9 | "github.com/moovweb/gokogiri/xml" 10 | "unsafe" 11 | ) 12 | 13 | var fragmentWrapperStart = []byte("
") 14 | var fragmentWrapperEnd = []byte("
") 15 | var fragmentWrapper = []byte("") 16 | var bodySigBytes = []byte(" 0 { 27 | urlPtr = unsafe.Pointer(&url[0]) 28 | } 29 | 30 | var root xml.Node 31 | if node == nil { 32 | containBody := (bytes.Index(content, bodySigBytes) >= 0) 33 | 34 | content = append(fragmentWrapper, content...) 35 | contentPtr = unsafe.Pointer(&content[0]) 36 | contentLen := len(content) 37 | 38 | inEncoding := document.InputEncoding() 39 | var encodingPtr unsafe.Pointer 40 | if len(inEncoding) > 0 { 41 | encodingPtr = unsafe.Pointer(&inEncoding[0]) 42 | } 43 | htmlPtr := C.htmlParseFragmentAsDoc(document.DocPtr(), contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0) 44 | 45 | //Note we've parsed the fragment within the given document 46 | //the root is not the root of the document; rather it's the root of the subtree from the fragment 47 | html := xml.NewNode(unsafe.Pointer(htmlPtr), document) 48 | 49 | if html == nil { 50 | err = ErrFailParseFragment 51 | return 52 | } 53 | root = html 54 | 55 | if !containBody { 56 | root = html.FirstChild() 57 | html.AddPreviousSibling(root) 58 | html.Remove() //remove html otherwise it's leaked 59 | } 60 | } else { 61 | //wrap the content 62 | newContent := append(fragmentWrapperStart, content...) 63 | newContent = append(newContent, fragmentWrapperEnd...) 64 | contentPtr = unsafe.Pointer(&newContent[0]) 65 | contentLen := len(newContent) 66 | rootElementPtr := C.htmlParseFragment(node.NodePtr(), contentPtr, C.int(contentLen), urlPtr, C.int(options), nil, 0) 67 | if rootElementPtr == nil { 68 | //try to parse it as a doc 69 | fragment, err = parsefragment(document, nil, content, url, options) 70 | return 71 | } 72 | if rootElementPtr == nil { 73 | err = ErrFailParseFragment 74 | return 75 | } 76 | root = xml.NewNode(unsafe.Pointer(rootElementPtr), document) 77 | } 78 | 79 | fragment = &xml.DocumentFragment{} 80 | fragment.Node = root 81 | fragment.InEncoding = document.InputEncoding() 82 | fragment.OutEncoding = document.OutputEncoding() 83 | 84 | document.BookkeepFragment(fragment) 85 | return 86 | } 87 | 88 | func ParseFragment(content, inEncoding, url []byte, options xml.ParseOption, outEncoding []byte) (fragment *xml.DocumentFragment, err error) { 89 | inEncoding = AppendCStringTerminator(inEncoding) 90 | outEncoding = AppendCStringTerminator(outEncoding) 91 | document := CreateEmptyDocument(inEncoding, outEncoding) 92 | fragment, err = parsefragment(document, nil, content, url, options) 93 | return 94 | } 95 | -------------------------------------------------------------------------------- /html/document_test.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | import "testing" 4 | import "fmt" 5 | 6 | func TestParseDocument(t *testing.T) { 7 | expected := 8 | ` 9 |

10 | ` 11 | expected_xml := 12 | ` 13 | 14 | 15 | 16 |
17 |

18 |

19 | 20 | 21 | ` 22 | doc, err := Parse([]byte("

"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 23 | 24 | if err != nil { 25 | t.Error("Parsing has error:", err) 26 | return 27 | } 28 | 29 | if doc.String() != expected { 30 | println("got:\n", doc.String()) 31 | println("expected:\n", expected) 32 | t.Error("the output of the html doc does not match") 33 | } 34 | 35 | s, _ := doc.ToXml(nil, nil) 36 | if string(s) != expected_xml { 37 | println("got:\n", string(s)) 38 | println("expected:\n", expected_xml) 39 | t.Error("the xml output of the html doc does not match") 40 | } 41 | 42 | doc.Free() 43 | CheckXmlMemoryLeaks(t) 44 | } 45 | 46 | func TestEmptyDocument(t *testing.T) { 47 | expected := 48 | ` 49 | 50 | ` 51 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 52 | 53 | if err != nil { 54 | t.Error("Parsing has error:", err) 55 | return 56 | } 57 | 58 | if doc.String() != expected { 59 | println(doc.String()) 60 | t.Error("the output of the html doc does not match the empty xml") 61 | } 62 | doc.Free() 63 | CheckXmlMemoryLeaks(t) 64 | } 65 | 66 | func TestNodeById(t *testing.T) { 67 | html := "
success
fail
" 68 | doc, _ := Parse([]byte(html), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 69 | p := doc.NodeById("yup") 70 | if p == nil { 71 | t.Errorf("Did not find node by ID!") 72 | return 73 | } 74 | output := fmt.Sprintf("%v", p.Content()) 75 | if output != "success" { 76 | t.Errorf("Incorrect node selected by ID!") 77 | } 78 | } 79 | 80 | /* 81 | func TestHTMLFragmentEncoding(t *testing.T) { 82 | defer CheckXmlMemoryLeaks(t) 83 | 84 | input, output, error := getTestData(filepath.Join("tests", "document", "html_fragment_encoding")) 85 | 86 | if len(error) > 0 { 87 | t.Errorf("Error gathering test data for %v:\n%v\n", "html_fragment_encoding", error) 88 | t.FailNow() 89 | } 90 | 91 | expected := string(output) 92 | 93 | inputEncodingBytes := []byte("utf-8") 94 | 95 | buffer := make([]byte, 100) 96 | fragment, err := ParseFragment([]byte(input), inputEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes, buffer) 97 | 98 | if err != nil { 99 | println("WHAT") 100 | t.Error(err.Error()) 101 | } 102 | 103 | if fragment.String() != expected { 104 | badOutput(fragment.String(), expected) 105 | t.Error("the output of the xml doc does not match") 106 | } 107 | 108 | fragment.Node.MyDocument().Free() 109 | } 110 | */ 111 | -------------------------------------------------------------------------------- /css/notes.txt: -------------------------------------------------------------------------------- 1 | group -> selector (',' selector)* 2 | 3 | selector -> '>'? sequence (combinator sequence)* 4 | 5 | sequence -> spaces? element qualifier* 6 | -> spaces? qualifier+ 7 | 8 | combinator -> spaces? ('+' | '~' | '>') 9 | -> spaces 10 | 11 | element -> universal | type 12 | 13 | universal -> LEXEME ('*') 14 | 15 | type -> LEXEME (\w+) 16 | 17 | qualifier -> attribute 18 | -> class 19 | -> id 20 | -> pseudo-class 21 | 22 | attribute -> LEXEME (complicated regexp) 23 | 24 | class -> LEXEME ('.' identifier) 25 | 26 | id -> LEXEME ('#' identifier) 27 | 28 | pseudo-class -> ':first-child' 29 | -> ':first-of-type' 30 | -> ':only-child' 31 | -> ':only-of-type' 32 | -> ':empty' 33 | -> (':nth-child' | 'nth-of-type') '(' predicate ')' 34 | -> ':not' '(' (element | qualifier) ') 35 | 36 | predicate -> LEXEME(odd|even) 37 | -> LEXEME([-+]?\d+n[-+]\d+) 38 | 39 | 40 | 41 | 42 |

> :first-child 43 |

/*[1][./self:: and ] 44 | #

//*/*[1][./self::] 45 | 46 | :first-child 47 | /*[position() = 1 and ./self:: and ] 48 | //*/*[position = 1 and ./self:: and ] 49 | 50 | 51 | div, DEEP 52 | /descendant-or-self::*/*[./self::div] 53 | 54 | div, FLAT 55 | /child::*[./self::div] 56 | 57 | div span 58 | /descendant-or-self::*/*[./self::div] /descendant-or-self::*/*[./self::span] 59 | 60 | div > span 61 | /descendant-or-self::*/*[./self::div] /child::*[./self::span] 62 | 63 | div ~ span 64 | /descendant-or-self::*/*[./self::div] /following-sibling::*[./self::span] 65 | 66 | div + span 67 | /descendant-or-self::*/*[./self::div] /following-sibling::*[./self::span and position()=1] 68 | 69 | div:first-child, DEEP 70 | /descendant-or-self::*/*[./self::div and position()=1] 71 | 72 | div:first-child, FLAT 73 | /child::*[./self::div and position()=1] 74 | 75 | div:first-of-type, DEEP 76 | /descendant-or-self::*/*[./self::div][position()=1] 77 | 78 | div:last-of-type, DEEP 79 | /descendant-or-self::*/*[./self::div][position()=last()] 80 | 81 | 82 | div:nth-child(odd), DEEP 83 | /child::*[./self::div and position() mod 2 = 1] 84 | 85 | div:nth-child(a), DEEP 86 | /child::*[./self::div and position()=a] 87 | 88 | 89 | 90 | :first-child 91 | descendant-or-self::*/*[./self:: and position()=1] 92 | 93 | 94 | :first-child:last-child 95 | 96 | /*[position() = 1 and ./self::] 97 | 98 | 99 | foo + bar + hux --> foo [+ bar] [+ hux] 100 | foo/following-sibling::*[1]/self::bar/following-sibling::*[1]/self::hux 101 | 102 | foo ~ bar ~ hux 103 | foo/following-sibling::bar/following-sibling::hux 104 | 105 | foo bar 106 | foo//bar 107 | 108 | foo > bar 109 | foo/bar 110 | 111 | div.foo:nth-of-type(3) 112 | div[@class='foo'][3] 113 | 114 | div:nth-child(3).foo 115 | 116 | *[3][./self::div][@class='foo'] 117 | 118 | foo.bar:first-child 119 | *[1][./self::foo[@class='bar']] 120 | 121 | :first-child 122 | *[position()=1][./self::] 123 | 124 | :not(:first-child) 125 | *[not(position()=1)][./self::] 126 | 127 | 128 | div:nth-child(3) 129 | *[3][./self::div] 130 | 131 | // div *[3][./self:: 132 | 133 | 134 | :nth-child(an+b) 135 | *[(position() - b) mod a = 0] 136 | 137 | 138 | :not(div) 139 | *[not(./self::div)] 140 | 141 | :first-child 142 | *[position() = 1] 143 | :nth-child(4) 144 | *[position() = 4] 145 | 146 | :not(:first-child) 147 | *[not(position() = 1)] 148 | :not(:nth-child(4)) 149 | *[not(position() = 4)] -------------------------------------------------------------------------------- /xml/fragment.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | //#include "helper.h" 4 | import "C" 5 | import ( 6 | "errors" 7 | . "github.com/moovweb/gokogiri/util" 8 | "unsafe" 9 | ) 10 | 11 | type DocumentFragment struct { 12 | Node 13 | InEncoding []byte 14 | OutEncoding []byte 15 | } 16 | 17 | var ( 18 | fragmentWrapperStart = []byte("") 19 | fragmentWrapperEnd = []byte("") 20 | ) 21 | 22 | var ErrFailParseFragment = errors.New("failed to parse xml fragment") 23 | var ErrEmptyFragment = errors.New("empty xml fragment") 24 | 25 | const initChildrenNumber = 4 26 | 27 | func parsefragment(document Document, node *XmlNode, content, url []byte, options ParseOption) (fragment *DocumentFragment, err error) { 28 | //wrap the content before parsing 29 | content = append(fragmentWrapperStart, content...) 30 | content = append(content, fragmentWrapperEnd...) 31 | 32 | //set up pointers before calling the C function 33 | var contentPtr, urlPtr unsafe.Pointer 34 | contentPtr = unsafe.Pointer(&content[0]) 35 | contentLen := len(content) 36 | if len(url) > 0 { 37 | url = AppendCStringTerminator(url) 38 | urlPtr = unsafe.Pointer(&url[0]) 39 | } 40 | 41 | var rootElementPtr *C.xmlNode 42 | 43 | if node == nil { 44 | inEncoding := document.InputEncoding() 45 | var encodingPtr unsafe.Pointer 46 | if len(inEncoding) > 0 { 47 | encodingPtr = unsafe.Pointer(&inEncoding[0]) 48 | } 49 | rootElementPtr = C.xmlParseFragmentAsDoc(document.DocPtr(), contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0) 50 | 51 | } else { 52 | rootElementPtr = C.xmlParseFragment(node.NodePtr(), contentPtr, C.int(contentLen), urlPtr, C.int(options), nil, 0) 53 | } 54 | 55 | //Note we've parsed the fragment within the given document 56 | //the root is not the root of the document; rather it's the root of the subtree from the fragment 57 | root := NewNode(unsafe.Pointer(rootElementPtr), document) 58 | 59 | //the fragment was in invalid 60 | if root == nil { 61 | err = ErrFailParseFragment 62 | return 63 | } 64 | 65 | fragment = &DocumentFragment{} 66 | fragment.Node = root 67 | fragment.InEncoding = document.InputEncoding() 68 | fragment.OutEncoding = document.OutputEncoding() 69 | 70 | document.BookkeepFragment(fragment) 71 | return 72 | } 73 | 74 | func ParseFragment(content, inEncoding, url []byte, options ParseOption, outEncoding []byte) (fragment *DocumentFragment, err error) { 75 | inEncoding = AppendCStringTerminator(inEncoding) 76 | outEncoding = AppendCStringTerminator(outEncoding) 77 | document := CreateEmptyDocument(inEncoding, outEncoding) 78 | fragment, err = parsefragment(document, nil, content, url, options) 79 | return 80 | } 81 | 82 | func (fragment *DocumentFragment) Remove() { 83 | fragment.Node.Remove() 84 | } 85 | 86 | func (fragment *DocumentFragment) Children() []Node { 87 | nodes := make([]Node, 0, initChildrenNumber) 88 | child := fragment.FirstChild() 89 | for ; child != nil; child = child.NextSibling() { 90 | nodes = append(nodes, child) 91 | } 92 | return nodes 93 | } 94 | 95 | func (fragment *DocumentFragment) ToBuffer(outputBuffer []byte) []byte { 96 | var b []byte 97 | var size int 98 | for _, node := range fragment.Children() { 99 | if docType := node.MyDocument().DocType(); docType == XML_HTML_DOCUMENT_NODE { 100 | b, size = node.ToHtml(fragment.OutEncoding, nil) 101 | } else { 102 | b, size = node.ToXml(fragment.OutEncoding, nil) 103 | } 104 | outputBuffer = append(outputBuffer, b[:size]...) 105 | } 106 | return outputBuffer 107 | } 108 | 109 | func (fragment *DocumentFragment) String() string { 110 | b := fragment.ToBuffer(nil) 111 | if b == nil { 112 | return "" 113 | } 114 | return string(b) 115 | } 116 | -------------------------------------------------------------------------------- /xml/fragment_test.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | import "testing" 4 | 5 | func TestParseDocumentFragmentBasic(t *testing.T) { 6 | defer CheckXmlMemoryLeaks(t) 7 | 8 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 9 | if err != nil { 10 | t.Error("parsing error:", err.Error()) 11 | return 12 | } 13 | root := doc.Root() 14 | if root != nil { 15 | println("root:", root.String()) 16 | } 17 | docFragment, err := doc.ParseFragment([]byte("hi"), nil, DefaultParseOption) 18 | if err != nil { 19 | t.Error(err.Error()) 20 | doc.Free() 21 | return 22 | } 23 | if len(docFragment.Children()) != 1 { 24 | t.Error("the number of children from the fragment does not match") 25 | } 26 | doc.Free() 27 | } 28 | 29 | func TestParseDocumentFragment(t *testing.T) { 30 | defer CheckXmlMemoryLeaks(t) 31 | 32 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 33 | if err != nil { 34 | t.Error("parsing error:", err.Error()) 35 | return 36 | } 37 | docFragment, err := doc.ParseFragment([]byte("fun"), nil, DefaultParseOption) 38 | if err != nil { 39 | t.Error(err.Error()) 40 | doc.Free() 41 | return 42 | } 43 | if docFragment.String() != "fun" { 44 | t.Error("fragment output is wrong\n") 45 | doc.Free() 46 | return 47 | } 48 | if len(docFragment.Children()) != 3 { 49 | t.Error("the number of children from the fragment does not match") 50 | } 51 | doc.Free() 52 | } 53 | 54 | func TestSearchDocumentFragment(t *testing.T) { 55 | defer CheckXmlMemoryLeaks(t) 56 | 57 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 58 | if err != nil { 59 | t.Error("parsing error:", err.Error()) 60 | return 61 | } 62 | docFragment, err := doc.ParseFragment([]byte("fun"), nil, DefaultParseOption) 63 | if err != nil { 64 | t.Error(err.Error()) 65 | doc.Free() 66 | return 67 | } 68 | nodes, err := docFragment.Search(".//*") 69 | if err != nil { 70 | t.Error("fragment search has error") 71 | doc.Free() 72 | return 73 | } 74 | if len(nodes) != 2 { 75 | t.Error("the number of children from the fragment does not match") 76 | } 77 | nodes, err = docFragment.Search("//*") 78 | 79 | if err != nil { 80 | t.Error("fragment search has error") 81 | doc.Free() 82 | return 83 | } 84 | 85 | if len(nodes) != 3 { 86 | t.Error("the number of children from the fragment's document does not match") 87 | } 88 | 89 | doc.Free() 90 | } 91 | 92 | func TestSearchDocumentFragmentWithEmptyDoc(t *testing.T) { 93 | defer CheckXmlMemoryLeaks(t) 94 | 95 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 96 | if err != nil { 97 | t.Error("parsing error:", err.Error()) 98 | return 99 | } 100 | docFragment, err := doc.ParseFragment([]byte("fun"), nil, DefaultParseOption) 101 | if err != nil { 102 | t.Error(err.Error()) 103 | doc.Free() 104 | return 105 | } 106 | nodes, err := docFragment.Search(".//*") 107 | if err != nil { 108 | t.Error("fragment search has error") 109 | doc.Free() 110 | return 111 | } 112 | if len(nodes) != 2 { 113 | t.Error("the number of children from the fragment does not match") 114 | } 115 | nodes, err = docFragment.Search("//*") 116 | 117 | if err != nil { 118 | t.Error("fragment search has error") 119 | doc.Free() 120 | return 121 | } 122 | 123 | if len(nodes) != 0 { 124 | t.Error("the number of children from the fragment's document does not match") 125 | } 126 | 127 | doc.Free() 128 | } 129 | -------------------------------------------------------------------------------- /xpath/util.go: -------------------------------------------------------------------------------- 1 | package xpath 2 | 3 | /* 4 | #cgo pkg-config: libxml-2.0 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | int getXPathObjectType(xmlXPathObject* o); 11 | 12 | */ 13 | import "C" 14 | 15 | import "unsafe" 16 | import "reflect" 17 | import . "github.com/moovweb/gokogiri/util" 18 | 19 | //export go_resolve_variables 20 | func go_resolve_variables(ctxt unsafe.Pointer, name, ns *C.char) (ret C.xmlXPathObjectPtr) { 21 | variable := C.GoString(name) 22 | namespace := C.GoString(ns) 23 | 24 | context := (*VariableScope)(ctxt) 25 | if context != nil { 26 | val := (*context).ResolveVariable(variable, namespace) 27 | ret = ValueToXPathObject(val) 28 | } 29 | return 30 | } 31 | 32 | // Convert an arbitrary value into a C.xmlXPathObjectPtr 33 | // Unrecognised and nil values are converted to empty node sets. 34 | func ValueToXPathObject(val interface{}) (ret C.xmlXPathObjectPtr) { 35 | if val == nil { 36 | //return the empty node set 37 | ret = C.xmlXPathNewNodeSet(nil) 38 | return 39 | } 40 | switch v := val.(type) { 41 | case unsafe.Pointer: 42 | return (C.xmlXPathObjectPtr)(v) 43 | case []unsafe.Pointer: 44 | ptrs := v 45 | if len(ptrs) > 0 { 46 | //default - return a node set 47 | ret = C.xmlXPathNewNodeSet(nil) 48 | for _, p := range ptrs { 49 | C.xmlXPathNodeSetAdd(ret.nodesetval, (*C.xmlNode)(p)) 50 | } 51 | } else { 52 | ret = C.xmlXPathNewNodeSet(nil) 53 | return 54 | } 55 | case float64: 56 | ret = C.xmlXPathNewFloat(C.double(v)) 57 | case string: 58 | xpathBytes := GetCString([]byte(v)) 59 | xpathPtr := unsafe.Pointer(&xpathBytes[0]) 60 | ret = C.xmlXPathNewString((*C.xmlChar)(xpathPtr)) 61 | default: 62 | typ := reflect.TypeOf(val) 63 | // if a pointer to a struct is passed, get the type of the dereferenced object 64 | if typ.Kind() == reflect.Ptr { 65 | typ = typ.Elem() 66 | } 67 | //log the unknown type, return an empty node set 68 | //fmt.Println("go-resolve wrong-type", typ.Kind()) 69 | ret = C.xmlXPathNewNodeSet(nil) 70 | } 71 | return 72 | } 73 | 74 | //export exec_xpath_function 75 | func exec_xpath_function(ctxt C.xmlXPathParserContextPtr, nargs C.int) { 76 | function := C.GoString((*C.char)(unsafe.Pointer(ctxt.context.function))) 77 | namespace := C.GoString((*C.char)(unsafe.Pointer(ctxt.context.functionURI))) 78 | context := (*VariableScope)(ctxt.context.funcLookupData) 79 | 80 | argcount := int(nargs) 81 | var args []interface{} 82 | 83 | for i := 0; i < argcount; i = i + 1 { 84 | args = append(args, XPathObjectToValue(C.valuePop(ctxt))) 85 | } 86 | 87 | // arguments are popped off the stack in reverse order, so 88 | // we reverse the slice before invoking our callback 89 | if argcount > 1 { 90 | for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { 91 | args[i], args[j] = args[j], args[i] 92 | } 93 | } 94 | 95 | // push the result onto the stack 96 | // if for some reason we are unable to resolve the 97 | // function we push an empty nodeset 98 | f := (*context).ResolveFunction(function, namespace) 99 | if f != nil { 100 | retval := f(*context, args) 101 | C.valuePush(ctxt, ValueToXPathObject(retval)) 102 | } else { 103 | ret := C.xmlXPathNewNodeSet(nil) 104 | C.valuePush(ctxt, ret) 105 | } 106 | 107 | } 108 | 109 | //export go_can_resolve_function 110 | func go_can_resolve_function(ctxt unsafe.Pointer, name, ns *C.char) (ret C.int) { 111 | function := C.GoString(name) 112 | namespace := C.GoString(ns) 113 | context := (*VariableScope)(ctxt) 114 | if *context == nil { 115 | return C.int(0) 116 | } 117 | if (*context).IsFunctionRegistered(function, namespace) { 118 | return C.int(1) 119 | } 120 | return C.int(0) 121 | } 122 | -------------------------------------------------------------------------------- /xml/attribute_test.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | import "testing" 4 | import "fmt" 5 | 6 | func TestSetValue(t *testing.T) { 7 | defer CheckXmlMemoryLeaks(t) 8 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 9 | if err != nil { 10 | t.Error("Parsing has error:", err) 11 | return 12 | } 13 | root := doc.Root() 14 | attributes := root.Attributes() 15 | if len(attributes) != 2 || attributes["myname"].String() != "ff" { 16 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String()) 17 | t.Error("root's attributes do not match") 18 | } 19 | child := root.FirstChild() 20 | childAttributes := child.Attributes() 21 | if len(childAttributes) != 1 || childAttributes["class"].String() != "shine" { 22 | t.Error("child's attributes do not match") 23 | } 24 | attributes["myname"].SetValue("new") 25 | expected := 26 | ` 27 | 28 | ` 29 | if root.String() != expected { 30 | println("got:\n", root.String()) 31 | println("expected:\n", expected) 32 | t.Error("root's new attr do not match") 33 | } 34 | attributes["id"].Remove() 35 | expected = 36 | ` 37 | 38 | ` 39 | 40 | if root.String() != expected { 41 | println("got:\n", root.String()) 42 | println("expected:\n", expected) 43 | t.Error("root's remove attr do not match") 44 | } 45 | doc.Free() 46 | } 47 | 48 | func TestSetAttribute(t *testing.T) { 49 | defer CheckXmlMemoryLeaks(t) 50 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 51 | if err != nil { 52 | t.Error("Parsing has error:", err) 53 | return 54 | } 55 | root := doc.Root() 56 | attributes := root.Attributes() 57 | if len(attributes) != 2 || attributes["myname"].String() != "ff" { 58 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String()) 59 | t.Error("root's attributes do not match") 60 | } 61 | 62 | root.SetAttr("id", "cooler") 63 | root.SetAttr("id2", "hot") 64 | root.SetAttr("id3", "") 65 | expected := 66 | ` 67 | 68 | ` 69 | if root.String() != expected { 70 | println("got:\n", root.String()) 71 | println("expected:\n", expected) 72 | t.Error("root's new attr do not match") 73 | } 74 | if root.Attr("id3") != "" { 75 | println("got:\n", root.Attr("id3")) 76 | println("expected:\n", "") 77 | t.Error("root's attr should have empty val") 78 | } 79 | if root.Attribute("id3") == nil { 80 | t.Error("root's attr should not be nil") 81 | } 82 | doc.Free() 83 | } 84 | 85 | func TestSetEmptyAttribute(t *testing.T) { 86 | defer CheckXmlMemoryLeaks(t) 87 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 88 | if err != nil { 89 | t.Error("Parsing has error:", err) 90 | return 91 | } 92 | root := doc.Root() 93 | attributes := root.Attributes() 94 | if len(attributes) != 2 || attributes["myname"].String() != "ff" { 95 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String()) 96 | t.Error("root's attributes do not match") 97 | } 98 | 99 | root.SetAttr("", "cool") 100 | expected := 101 | ` 102 | 103 | ` 104 | if root.String() != expected { 105 | println("got:\n", root.String()) 106 | println("expected:\n", expected) 107 | t.Error("root's new attr do not match") 108 | } 109 | 110 | root.SetAttr("", "") 111 | expected = 112 | ` 113 | 114 | ` 115 | if root.String() != expected { 116 | println("got:\n", root.String()) 117 | println("expected:\n", expected) 118 | t.Error("root's new attr do not match") 119 | } 120 | doc.Free() 121 | } 122 | -------------------------------------------------------------------------------- /html/document.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | /* 4 | #cgo pkg-config: libxml-2.0 5 | 6 | #include 7 | #include 8 | #include "helper.h" 9 | */ 10 | import "C" 11 | 12 | import ( 13 | "errors" 14 | "github.com/moovweb/gokogiri/help" 15 | . "github.com/moovweb/gokogiri/util" 16 | "github.com/moovweb/gokogiri/xml" 17 | //"runtime" 18 | "unsafe" 19 | ) 20 | 21 | //xml parse option 22 | const ( 23 | HTML_PARSE_RECOVER xml.ParseOption = 1 << 0 /* Relaxed parsing */ 24 | HTML_PARSE_NODEFDTD xml.ParseOption = 1 << 2 /* do not default a doctype if not found */ 25 | HTML_PARSE_NOERROR xml.ParseOption = 1 << 5 /* suppress error reports */ 26 | HTML_PARSE_NOWARNING xml.ParseOption = 1 << 6 /* suppress warning reports */ 27 | HTML_PARSE_PEDANTIC xml.ParseOption = 1 << 7 /* pedantic error reporting */ 28 | HTML_PARSE_NOBLANKS xml.ParseOption = 1 << 8 /* remove blank nodes */ 29 | HTML_PARSE_NONET xml.ParseOption = 1 << 11 /* Forbid network access */ 30 | HTML_PARSE_NOIMPLIED xml.ParseOption = 1 << 13 /* Do not add implied html/body... elements */ 31 | HTML_PARSE_COMPACT xml.ParseOption = 1 << 16 /* compact small text nodes */ 32 | ) 33 | 34 | const EmptyHtmlDoc = "" 35 | 36 | //default parsing option: relax parsing 37 | var DefaultParseOption xml.ParseOption = HTML_PARSE_RECOVER | 38 | HTML_PARSE_NONET | 39 | HTML_PARSE_NOERROR | 40 | HTML_PARSE_NOWARNING 41 | 42 | type HtmlDocument struct { 43 | *xml.XmlDocument 44 | } 45 | 46 | //default encoding in byte slice 47 | var DefaultEncodingBytes = []byte(xml.DefaultEncoding) 48 | var emptyHtmlDocBytes = []byte(EmptyHtmlDoc) 49 | 50 | var ErrSetMetaEncoding = errors.New("Set Meta Encoding failed") 51 | var ERR_FAILED_TO_PARSE_HTML = errors.New("failed to parse html input") 52 | var emptyStringBytes = []byte{0} 53 | 54 | //create a document 55 | func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *HtmlDocument) { 56 | doc = &HtmlDocument{} 57 | doc.XmlDocument = xml.NewDocument(p, contentLen, inEncoding, outEncoding) 58 | doc.Me = doc 59 | node := doc.Node.(*xml.XmlNode) 60 | node.Document = doc 61 | //runtime.SetFinalizer(doc, (*HtmlDocument).Free) 62 | return 63 | } 64 | 65 | //parse a string to document 66 | func Parse(content, inEncoding, url []byte, options xml.ParseOption, outEncoding []byte) (doc *HtmlDocument, err error) { 67 | inEncoding = AppendCStringTerminator(inEncoding) 68 | outEncoding = AppendCStringTerminator(outEncoding) 69 | 70 | var docPtr *C.xmlDoc 71 | contentLen := len(content) 72 | 73 | if contentLen > 0 { 74 | var contentPtr, urlPtr, encodingPtr unsafe.Pointer 75 | 76 | contentPtr = unsafe.Pointer(&content[0]) 77 | if len(url) > 0 { 78 | url = AppendCStringTerminator(url) 79 | urlPtr = unsafe.Pointer(&url[0]) 80 | } 81 | if len(inEncoding) > 0 { 82 | encodingPtr = unsafe.Pointer(&inEncoding[0]) 83 | } 84 | 85 | docPtr = C.htmlParse(contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0) 86 | 87 | if docPtr == nil { 88 | err = ERR_FAILED_TO_PARSE_HTML 89 | } else { 90 | doc = NewDocument(unsafe.Pointer(docPtr), contentLen, inEncoding, outEncoding) 91 | } 92 | } 93 | if docPtr == nil { 94 | doc = CreateEmptyDocument(inEncoding, outEncoding) 95 | } 96 | return 97 | } 98 | 99 | func CreateEmptyDocument(inEncoding, outEncoding []byte) (doc *HtmlDocument) { 100 | help.LibxmlInitParser() 101 | docPtr := C.htmlNewDoc(nil, nil) 102 | doc = NewDocument(unsafe.Pointer(docPtr), 0, inEncoding, outEncoding) 103 | return 104 | } 105 | 106 | func (document *HtmlDocument) ParseFragment(input, url []byte, options xml.ParseOption) (fragment *xml.DocumentFragment, err error) { 107 | root := document.Root() 108 | if root == nil { 109 | fragment, err = parsefragment(document, nil, input, url, options) 110 | } else { 111 | fragment, err = parsefragment(document, root.XmlNode, input, url, options) 112 | } 113 | return 114 | } 115 | 116 | func (doc *HtmlDocument) MetaEncoding() string { 117 | metaEncodingXmlCharPtr := C.htmlGetMetaEncoding((*C.xmlDoc)(doc.DocPtr())) 118 | return C.GoString((*C.char)(unsafe.Pointer(metaEncodingXmlCharPtr))) 119 | } 120 | 121 | func (doc *HtmlDocument) SetMetaEncoding(encoding string) (err error) { 122 | var encodingPtr unsafe.Pointer = nil 123 | if len(encoding) > 0 { 124 | encodingBytes := AppendCStringTerminator([]byte(encoding)) 125 | encodingPtr = unsafe.Pointer(&encodingBytes[0]) 126 | } 127 | ret := int(C.htmlSetMetaEncoding((*C.xmlDoc)(doc.DocPtr()), (*C.xmlChar)(encodingPtr))) 128 | if ret == -1 { 129 | err = ErrSetMetaEncoding 130 | } 131 | return 132 | } 133 | -------------------------------------------------------------------------------- /html/fragment_test.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | import "testing" 4 | 5 | func TestParseDocumentFragmentText(t *testing.T) { 6 | doc, err := Parse(nil, []byte("iso-8859-1"), nil, DefaultParseOption, []byte("iso-8859-1")) 7 | if err != nil { 8 | println(err.Error()) 9 | } 10 | docFragment, err := doc.ParseFragment([]byte("ok\r\n"), nil, DefaultParseOption) 11 | if err != nil { 12 | t.Error(err.Error()) 13 | return 14 | } 15 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "ok\r\n" { 16 | println(docFragment.String()) 17 | t.Error("the children from the fragment text do not match") 18 | } 19 | doc.Free() 20 | CheckXmlMemoryLeaks(t) 21 | } 22 | 23 | func TestParseDocumentFragment(t *testing.T) { 24 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 25 | if err != nil { 26 | println(err.Error()) 27 | } 28 | docFragment, err := doc.ParseFragment([]byte("

"), nil, DefaultParseOption) 29 | if err != nil { 30 | t.Error(err.Error()) 31 | return 32 | } 33 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "

" { 34 | t.Error("the of children from the fragment do not match") 35 | } 36 | 37 | doc.Free() 38 | CheckXmlMemoryLeaks(t) 39 | 40 | } 41 | 42 | func TestParseDocumentFragment2(t *testing.T) { 43 | docStr := ` 44 | 45 | 46 | 47 | ` 48 | doc, err := Parse([]byte(docStr), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 49 | if err != nil { 50 | println(err.Error()) 51 | } 52 | docFragment, err := doc.ParseFragment([]byte(""), nil, DefaultParseOption) 53 | if err != nil { 54 | t.Error(err.Error()) 55 | return 56 | } 57 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "" { 58 | t.Error("the of children from the fragment do not match") 59 | } 60 | 61 | doc.Free() 62 | CheckXmlMemoryLeaks(t) 63 | } 64 | 65 | func TestSearchDocumentFragment(t *testing.T) { 66 | doc, err := Parse([]byte("
"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 67 | if err != nil { 68 | println(err.Error()) 69 | } 70 | docFragment, err := doc.ParseFragment([]byte("

"), nil, DefaultParseOption) 71 | if err != nil { 72 | t.Error(err.Error()) 73 | return 74 | } 75 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "

" { 76 | t.Error("the of children from the fragment do not match") 77 | } 78 | 79 | nodes, err := docFragment.Search(".//*") 80 | if err != nil { 81 | t.Error("fragment search has error") 82 | return 83 | } 84 | if len(nodes) != 2 { 85 | t.Error("the number of children from the fragment does not match") 86 | } 87 | nodes, err = docFragment.Search("//div[@class='cool']") 88 | 89 | if err != nil { 90 | t.Error("fragment search has error") 91 | return 92 | } 93 | 94 | if len(nodes) != 1 { 95 | println(len(nodes)) 96 | for _, node := range nodes { 97 | println(node.String()) 98 | } 99 | t.Error("the number of children from the fragment's document does not match") 100 | } 101 | 102 | doc.Free() 103 | CheckXmlMemoryLeaks(t) 104 | } 105 | 106 | func TestAddFragmentWithNamespace(t *testing.T) { 107 | doc, err := Parse([]byte("
"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 108 | if err != nil { 109 | println(err.Error()) 110 | } 111 | defer doc.Free() 112 | docFragment, err := doc.ParseFragment([]byte("

"), nil, DefaultParseOption) 113 | if err != nil { 114 | t.Error(err.Error()) 115 | return 116 | } 117 | if docFragment.String() != `

` { 118 | t.Errorf("doc fragment does not match\n") 119 | } 120 | doc2, err := Parse([]byte("
"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 121 | if err != nil { 122 | println(err.Error()) 123 | return 124 | } 125 | defer doc2.Free() 126 | body := doc2.Root().FirstChild() 127 | body.AddChild(docFragment) 128 | if doc2.String() != ` 129 | 130 |
131 |

132 | 133 | ` { 134 | t.Errorf("document does not match after adding a fragment with namespace\n") 135 | } 136 | CheckXmlMemoryLeaks(t) 137 | } 138 | -------------------------------------------------------------------------------- /xml/search_test.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | import "testing" 4 | 5 | func TestSearch(t *testing.T) { 6 | 7 | testLogic := func(t *testing.T, doc *XmlDocument) { 8 | root := doc.Root() 9 | result, _ := root.Search(".//*[@class]") 10 | if len(result) != 2 { 11 | t.Error("search at root does not match") 12 | } 13 | result, _ = root.Search("//*[@class]") 14 | if len(result) != 3 { 15 | t.Error("search at root does not match") 16 | } 17 | result, _ = doc.Search(".//*[@class]") 18 | if len(result) != 3 { 19 | t.Error("search at doc does not match") 20 | } 21 | result, _ = doc.Search(".//*[@class='shine']") 22 | if len(result) != 2 { 23 | t.Error("search with value at doc does not match") 24 | } 25 | } 26 | 27 | RunTest(t, "node", "search", testLogic) 28 | } 29 | 30 | func BenchmarkSearch(b *testing.B) { 31 | 32 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 33 | root := doc.Root() 34 | 35 | for i := 0; i < b.N; i++ { 36 | root.Search(".//*[@class]") 37 | } 38 | } 39 | 40 | RunBenchmark(b, "node", "search", benchmarkLogic) 41 | } 42 | 43 | func BenchmarkBigDocDeepSearchyTagName(b *testing.B) { 44 | 45 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 46 | 47 | for i := 0; i < b.N; i++ { 48 | doc.Search("//div") 49 | } 50 | } 51 | 52 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 53 | } 54 | 55 | func BenchmarkBigDocPunctuatedDeepSearch(b *testing.B) { 56 | 57 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 58 | 59 | for i := 0; i < b.N; i++ { 60 | doc.Search("//*[@class='filters']//div") 61 | } 62 | } 63 | 64 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 65 | } 66 | 67 | func BenchmarkBigDocDeepSearchByID(b *testing.B) { 68 | 69 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 70 | 71 | for i := 0; i < b.N; i++ { 72 | doc.Search("//*[@id='ppp']") 73 | //nodes, _ := doc.Search("//*[@id='ppp']") 74 | //fmt.Printf("%v\t", len(nodes)) 75 | } 76 | } 77 | 78 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 79 | } 80 | 81 | func BenchmarkBigDocDeepSearchByClass(b *testing.B) { 82 | 83 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 84 | 85 | for i := 0; i < b.N; i++ { 86 | doc.Search("//*[@class]") 87 | //nodes, _ := doc.Search("//*[@class]") 88 | //fmt.Printf("%v\t", len(nodes)) 89 | } 90 | } 91 | 92 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 93 | } 94 | 95 | func BenchmarkBigDocDeepSearchByClassContains(b *testing.B) { 96 | 97 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 98 | 99 | for i := 0; i < b.N; i++ { 100 | doc.Search("//*[contains(@class, 'header')]") 101 | } 102 | } 103 | 104 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 105 | } 106 | 107 | func BenchmarkBigDocDeepSearchBySemanticClass(b *testing.B) { 108 | 109 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 110 | 111 | for i := 0; i < b.N; i++ { 112 | doc.Search("//*[contains(concat(concat(' ', @class), ' '), concat(concat(' ','header'), ' '))]") 113 | } 114 | } 115 | 116 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 117 | } 118 | 119 | func BenchmarkBigDocDeepSearchByText(b *testing.B) { 120 | 121 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 122 | 123 | for i := 0; i < b.N; i++ { 124 | doc.Search("//*[text()='hey']") 125 | } 126 | } 127 | 128 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 129 | } 130 | 131 | func BenchmarkBigDocDeepSearchByTextContains(b *testing.B) { 132 | 133 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 134 | 135 | for i := 0; i < b.N; i++ { 136 | doc.Search("//*[contains(text(),'hey')]") 137 | } 138 | } 139 | 140 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 141 | } 142 | 143 | func BenchmarkBigDocSearchAncestorAxes(b *testing.B) { 144 | 145 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 146 | elem, _ := doc.Search("//*[@id='ppp']") 147 | for i := 0; i < b.N; i++ { 148 | elem[0].Search("ancestor::html") 149 | } 150 | } 151 | 152 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 153 | } 154 | 155 | func BenchmarkBigDocSearchLongTraverseUpToRoot(b *testing.B) { 156 | 157 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 158 | elem, _ := doc.Search("//*[@id='ppp']") 159 | 160 | for i := 0; i < b.N; i++ { 161 | elem[0].Search("../../../../../../../../..") 162 | } 163 | } 164 | 165 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 166 | } 167 | 168 | func BenchmarkBigDocSearchShortTraverseUpToRoot(b *testing.B) { 169 | 170 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 171 | elem, _ := doc.Search("//*[@id='ppp']") 172 | 173 | for i := 0; i < b.N; i++ { 174 | elem[0].Search("../../../..") 175 | } 176 | } 177 | 178 | RunBenchmark(b, "document", "big_un", benchmarkLogic) 179 | } 180 | -------------------------------------------------------------------------------- /xml/utils_test.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "github.com/moovweb/gokogiri/help" 7 | "github.com/moovweb/gokogiri/xpath" 8 | "io/ioutil" 9 | "path/filepath" 10 | "strings" 11 | "testing" 12 | ) 13 | 14 | func badOutput(actual string, expected string) { 15 | fmt.Printf("Got:\n[%v]\n", actual) 16 | fmt.Printf("Expected:\n[%v]\n", expected) 17 | } 18 | 19 | func RunTest(t *testing.T, suite string, name string, specificLogic func(t *testing.T, doc *XmlDocument), extraAssertions ...func(doc *XmlDocument) (string, string, string)) { 20 | defer CheckXmlMemoryLeaks(t) 21 | 22 | //println("Initiating test:" + suite + ":" + name) 23 | 24 | input, output, error := getTestData(filepath.Join("tests", suite, name)) 25 | 26 | if len(error) > 0 { 27 | t.Errorf("Error gathering test data for %v:\n%v\n", name, error) 28 | t.FailNow() 29 | } 30 | 31 | expected := string(output) 32 | 33 | //println("Got raw input/output") 34 | 35 | doc, err := parseInput(input) 36 | 37 | if err != nil { 38 | t.Error(err.Error()) 39 | } 40 | 41 | //println("parsed input") 42 | 43 | if specificLogic != nil { 44 | specificLogic(t, doc) 45 | } 46 | if doc.String() != expected { 47 | badOutput(doc.String(), expected) 48 | t.Error("the output of the xml doc does not match") 49 | } 50 | for _, extraAssertion := range extraAssertions { 51 | actual, expected, message := extraAssertion(doc) 52 | 53 | if actual != expected { 54 | badOutput(actual, expected) 55 | t.Error(message) 56 | } 57 | } 58 | 59 | doc.Free() 60 | } 61 | 62 | func RunBenchmark(b *testing.B, suite string, name string, specificLogic func(b *testing.B, doc *XmlDocument)) { 63 | b.StopTimer() 64 | 65 | // defer CheckXmlMemoryLeaks(b) 66 | 67 | input, _, error := getTestData(filepath.Join("tests", suite, name)) 68 | 69 | if len(error) > 0 { 70 | panic(fmt.Sprintf("Error gathering test data for %v:\n%v\n", name, error)) 71 | } 72 | 73 | doc, err := parseInput(input) 74 | 75 | if err != nil { 76 | panic("Error:" + err.Error()) 77 | } 78 | 79 | b.StartTimer() 80 | 81 | if specificLogic != nil { 82 | specificLogic(b, doc) 83 | } 84 | 85 | doc.Free() 86 | 87 | // println("----------- END OF BENCHMARK -----------") 88 | } 89 | 90 | func parseInput(input interface{}) (*XmlDocument, error) { 91 | var realInput []byte 92 | 93 | switch thisInput := input.(type) { 94 | case []byte: 95 | realInput = thisInput 96 | case string: 97 | realInput = []byte(thisInput) 98 | default: 99 | return nil, errors.New("Unrecognized parsing input!") 100 | } 101 | 102 | doc, err := Parse(realInput, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 103 | 104 | if err != nil { 105 | return nil, errors.New(fmt.Sprintf("parsing error:%v\n", err.Error())) 106 | } 107 | 108 | return doc, nil 109 | } 110 | 111 | func getTestData(name string) (input []byte, output []byte, error string) { 112 | var errorMessage string 113 | offset := "\t" 114 | inputFile := filepath.Join(name, "input.txt") 115 | 116 | input, err := ioutil.ReadFile(inputFile) 117 | 118 | if err != nil { 119 | errorMessage += fmt.Sprintf("%vCouldn't read test (%v) input:\n%v\n", offset, name, offset+err.Error()) 120 | } 121 | 122 | output, err = ioutil.ReadFile(filepath.Join(name, "output.txt")) 123 | 124 | if err != nil { 125 | errorMessage += fmt.Sprintf("%vCouldn't read test (%v) output:\n%v\n", offset, name, offset+err.Error()) 126 | } 127 | 128 | return input, output, errorMessage 129 | } 130 | 131 | func collectTests(suite string) (names []string, error string) { 132 | testPath := filepath.Join("tests", suite) 133 | entries, err := ioutil.ReadDir(testPath) 134 | 135 | if err != nil { 136 | return nil, fmt.Sprintf("Couldn't read tests:\n%v\n", err.Error()) 137 | } 138 | 139 | for _, entry := range entries { 140 | if strings.HasPrefix(entry.Name(), "_") || strings.HasPrefix(entry.Name(), ".") { 141 | continue 142 | } 143 | 144 | if entry.IsDir() { 145 | names = append(names, filepath.Join(testPath, entry.Name())) 146 | } 147 | } 148 | 149 | return 150 | } 151 | 152 | func CheckXmlMemoryLeaks(t *testing.T) { 153 | // LibxmlCleanUpParser() should only be called once during the lifetime of the 154 | // program, but because there's no way to know when the last test of the suite 155 | // runs in go, we can't accurately call it strictly once, so just avoid calling 156 | // it for now because it's known to cause crashes if called multiple times. 157 | //help.LibxmlCleanUpParser() 158 | 159 | if !help.LibxmlCheckMemoryLeak() { 160 | t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation()) 161 | help.LibxmlReportMemoryLeak() 162 | } 163 | } 164 | 165 | // This is a simple test implementation of the VariableScope interface. 166 | // Currently it's os simple it ignores the namespace argument. 167 | type SimpleVariableScope struct { 168 | variables map[string]interface{} 169 | functions map[string]xpath.XPathFunction 170 | } 171 | 172 | func (s *SimpleVariableScope) ResolveVariable(name, ns string) interface{} { 173 | v, _ := s.variables[name] 174 | return v 175 | } 176 | 177 | func (s *SimpleVariableScope) IsFunctionRegistered(name, ns string) bool { 178 | _, ok := s.functions[name] 179 | return ok 180 | } 181 | func (s *SimpleVariableScope) ResolveFunction(name, ns string) xpath.XPathFunction { 182 | f, _ := s.functions[name] 183 | return f 184 | } 185 | 186 | func newSimpleVariableScope() *SimpleVariableScope { 187 | s := &SimpleVariableScope{make(map[string]interface{}), make(map[string]xpath.XPathFunction)} 188 | return s 189 | } 190 | -------------------------------------------------------------------------------- /xml/helper.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "helper.h" 3 | 4 | //internal callback functions 5 | int xml_write_callback(void *ctx, char *buffer, int len) { 6 | if (len > 0) { 7 | xmlNodeWriteCallback(ctx, buffer, len); 8 | } 9 | return len; 10 | } 11 | 12 | int close_callback(void * ctx) { 13 | return 0; 14 | } 15 | 16 | xmlDoc* newEmptyXmlDoc() { 17 | return xmlNewDoc(BAD_CAST XML_DEFAULT_VERSION); 18 | } 19 | 20 | xmlElementType getNodeType(xmlNode *node) { return node->type; } 21 | 22 | void xmlFreeChars(char *buffer) { 23 | if (buffer) { 24 | xmlFree((xmlChar*)buffer); 25 | } 26 | } 27 | 28 | char *xmlDocDumpToString(xmlDoc *doc, void *encoding, int format) { 29 | xmlChar *buff; 30 | int buffersize; 31 | xmlDocDumpFormatMemoryEnc(doc, &buff, &buffersize, (char*)encoding, format); 32 | return (char*)buff; 33 | } 34 | 35 | char *htmlDocDumpToString(htmlDocPtr doc, int format) { 36 | xmlChar *buff; 37 | int buffersize; 38 | htmlDocDumpMemoryFormat(doc, &buff, &buffersize, format); 39 | return (char*)buff; 40 | } 41 | 42 | xmlDoc* xmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) { 43 | const char *c_buffer = (char*)buffer; 44 | const char *c_url = (char*)url; 45 | const char *c_encoding = (char*)encoding; 46 | xmlDoc *doc = NULL; 47 | 48 | xmlResetLastError(); 49 | doc = xmlReadMemory(c_buffer, buffer_len, c_url, c_encoding, options); 50 | 51 | if(doc == NULL) { 52 | xmlErrorPtr error; 53 | xmlFreeDoc(doc); 54 | error = xmlGetLastError(); 55 | if(error != NULL && error_buffer != NULL && error->level >= XML_ERR_ERROR) { 56 | char *c_error_buffer = (char*)error_buffer; 57 | if (error->message != NULL) { 58 | strncpy(c_error_buffer, error->message, error_buffer_len-1); 59 | c_error_buffer[error_buffer_len-1] = '\0'; 60 | } 61 | else { 62 | //snprintf(c_error_buffer, error_buffer_len, "xml parsing error:%d", error->code); 63 | } 64 | } 65 | } 66 | return doc; 67 | } 68 | 69 | xmlNode* xmlParseFragment(void *doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len) { 70 | xmlNodePtr root_element = NULL; 71 | xmlParserErrors errCode; 72 | errCode = xmlParseInNodeContext((xmlNodePtr)doc, buffer, buffer_len, options, &root_element); 73 | if (errCode != XML_ERR_OK) { 74 | if (error_buffer != NULL && error_buffer_len > 0) { 75 | //char *c_error_buffer = (char*)error_buffer; 76 | //snprintf(c_error_buffer, error_buffer_len, "xml fragemnt parsing error (xmlParserErrors):%d", errCode); 77 | } 78 | printf("errorcode %d\n", errCode); 79 | return NULL; 80 | } 81 | return root_element; 82 | } 83 | 84 | xmlNode* xmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) { 85 | xmlDoc* tmpDoc = NULL; 86 | xmlNode* tmpRoot = NULL; 87 | tmpDoc = xmlReadMemory((char*)buffer, buffer_len, (char*)url, (char*)encoding, options); 88 | if (tmpDoc == NULL) { 89 | return NULL; 90 | } 91 | tmpRoot = xmlDocGetRootElement(tmpDoc); 92 | if (tmpRoot == NULL) { 93 | return NULL; 94 | } 95 | tmpRoot = xmlDocCopyNode(tmpRoot, doc, 1); 96 | xmlFreeDoc(tmpDoc); 97 | return tmpRoot; 98 | } 99 | 100 | void xmlSetContent(void *gonode, void *n, void *content) { 101 | xmlNode *node = (xmlNode*)n; 102 | xmlNode *child = node->children; 103 | xmlNode *next = NULL; 104 | unsigned char *encoded = xmlEncodeSpecialChars(node->doc, content); 105 | if (encoded) { 106 | while (child) { 107 | next = child->next ; 108 | xmlUnlinkNode(child); 109 | //xmlFreeNode(child); 110 | xmlUnlinkNodeCallback(child, gonode); 111 | child = next ; 112 | } 113 | xmlNodeSetContent(node, (xmlChar*)encoded); 114 | xmlFree(encoded); 115 | } 116 | } 117 | 118 | int xmlUnlinkNodeWithCheck(xmlNode *node) { 119 | if (xmlNodePtrCheck(node->parent)) { 120 | xmlUnlinkNode(node); 121 | return 1; 122 | } 123 | return 0; 124 | } 125 | 126 | int xmlNodePtrCheck(void *node) { 127 | if (node == (void*)(-1)) 128 | return 0; 129 | return 1; 130 | } 131 | 132 | int xmlSaveNode(void *wbuffer, void *node, void *encoding, int options) { 133 | xmlSaveCtxtPtr savectx; 134 | const char *c_encoding = (char*)encoding; 135 | 136 | savectx = xmlSaveToIO( 137 | (xmlOutputWriteCallback)xml_write_callback, 138 | (xmlOutputCloseCallback)close_callback, 139 | wbuffer, 140 | encoding, 141 | options 142 | ); 143 | xmlSaveTree(savectx, (xmlNode*)node); 144 | return xmlSaveClose(savectx); 145 | } 146 | 147 | void removeNamespace(xmlNs **source, xmlNs *target) { 148 | xmlNs *ns, *prevns = NULL; 149 | 150 | for (ns = *source; ns; ns = ns->next) { 151 | if (ns == target) { 152 | if (!prevns) { 153 | // we are the first element 154 | *source = ns->next; 155 | } else { 156 | prevns->next = ns->next; 157 | } 158 | 159 | break; 160 | } 161 | 162 | prevns = ns; 163 | } 164 | } 165 | 166 | void removeDefaultNamespace(xmlNs *ns, xmlNode *node) { 167 | removeNamespace(&node->nsDef, ns); 168 | 169 | xmlAttr *attr; 170 | 171 | for (attr = node->properties; attr; attr = attr->next) { 172 | if (!attr->ns) 173 | continue; 174 | 175 | removeNamespace(&attr->ns, ns); 176 | } 177 | 178 | if (node->ns == ns) 179 | node->ns = NULL; 180 | 181 | xmlNode *child; 182 | 183 | for (child = xmlFirstElementChild(node); child; child = xmlNextElementSibling(child)) { 184 | removeDefaultNamespace(ns, child); 185 | } 186 | } 187 | 188 | void xmlRemoveDefaultNamespace(xmlNode *node) { 189 | if (node->ns && node->ns->prefix) { 190 | // not a default namespace 191 | return; 192 | } 193 | 194 | removeDefaultNamespace(node->ns, node); 195 | } 196 | -------------------------------------------------------------------------------- /html/node_test.go: -------------------------------------------------------------------------------- 1 | package html 2 | 3 | import "testing" 4 | 5 | func TestInnerScript(t *testing.T) { 6 | defer CheckXmlMemoryLeaks(t) 7 | 8 | doc, err := Parse([]byte("

"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 9 | 10 | if err != nil { 11 | t.Error("Parsing has error:", err) 12 | return 13 | } 14 | 15 | h1 := doc.Root().FirstChild().FirstChild().FirstChild() 16 | h1.SetInnerHtml("") 17 | if h1.String() != "

" { 18 | t.Error("script does not match") 19 | } 20 | doc.Free() 21 | } 22 | 23 | func TestInnerScript2(t *testing.T) { 24 | defer CheckXmlMemoryLeaks(t) 25 | script := `` 56 | 57 | doc, err := Parse([]byte("

"), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 58 | 59 | if err != nil { 60 | t.Error("Parsing has error:", err) 61 | return 62 | } 63 | 64 | h1 := doc.Root().FirstChild().FirstChild().FirstChild() 65 | h1.SetInnerHtml(script) 66 | if h1.String() != "

"+script+"

" { 67 | t.Error("script does not match") 68 | } 69 | doc.Free() 70 | } 71 | 72 | func TestInsertMyselfBefore(t *testing.T) { 73 | input := ` 74 | 75 | Title 76 | 77 | 78 | 79 |

Welcome to Tritium Tester

80 | 81 | 82 | ` 83 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 84 | 85 | if err != nil { 86 | t.Error("Parsing has error:", err) 87 | return 88 | } 89 | 90 | divs, _ := doc.Search("//div") 91 | if len(divs) != 1 { 92 | t.Error("should have 1 div") 93 | return 94 | } 95 | 96 | div := divs[0] 97 | div.InsertBefore(div) 98 | 99 | expected := ` 100 | 101 | 102 | 103 | Title 104 | 105 | 106 | 107 |

Welcome to Tritium Tester

108 | 109 | 110 | ` 111 | if expected != doc.String() { 112 | t.Error("doc is not expected:\n", doc.String(), "\n", expected) 113 | } 114 | doc.Free() 115 | CheckXmlMemoryLeaks(t) 116 | } 117 | 118 | func TestInsertMyselfAfter(t *testing.T) { 119 | input := ` 120 | 121 | Title 122 | 123 | 124 | 125 |

Welcome to Tritium Tester

126 | 127 | 128 | ` 129 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 130 | 131 | if err != nil { 132 | t.Error("Parsing has error:", err) 133 | return 134 | } 135 | 136 | divs, _ := doc.Search("//div") 137 | if len(divs) != 1 { 138 | t.Error("should have 1 div") 139 | return 140 | } 141 | 142 | div := divs[0] 143 | div.InsertAfter(div) 144 | 145 | expected := ` 146 | 147 | 148 | 149 | Title 150 | 151 | 152 | 153 |

Welcome to Tritium Tester

154 | 155 | 156 | ` 157 | if expected != doc.String() { 158 | t.Error("doc is not expected:\n", doc.String(), "\n", expected) 159 | } 160 | doc.Free() 161 | CheckXmlMemoryLeaks(t) 162 | } 163 | 164 | func TestAddMyselfChild(t *testing.T) { 165 | input := ` 166 | 167 | Title 168 | 169 | 170 | 171 |

Welcome to Tritium Tester

172 | 173 | 174 | ` 175 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 176 | 177 | if err != nil { 178 | t.Error("Parsing has error:", err) 179 | return 180 | } 181 | 182 | divs, _ := doc.Search("//div") 183 | if len(divs) != 1 { 184 | t.Error("should have 1 div") 185 | return 186 | } 187 | 188 | div := divs[0] 189 | div.AddChild(div) 190 | 191 | expected := ` 192 | 193 | 194 | 195 | Title 196 | 197 | 198 | 199 |

Welcome to Tritium Tester

200 | 201 | 202 | ` 203 | if expected != doc.String() { 204 | t.Error("doc is not expected:\n", doc.String(), "\n", expected) 205 | } 206 | doc.Free() 207 | CheckXmlMemoryLeaks(t) 208 | } 209 | 210 | func TestRemoveMeRemoveParent(t *testing.T) { 211 | input := ` 212 | 213 | Title 214 | 215 | 216 | 217 | 218 | 219 | ` 220 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 221 | 222 | if err != nil { 223 | t.Error("Parsing has error:", err) 224 | return 225 | } 226 | 227 | divs, _ := doc.Search("//div") 228 | if len(divs) != 1 { 229 | t.Error("should have 1 div") 230 | return 231 | } 232 | 233 | div := divs[0] 234 | h1 := div.FirstChild() 235 | nodes, _ := h1.Search("..") 236 | h1.Remove() 237 | nodes, _ = h1.Search("..") 238 | if len(nodes) != 1 { 239 | t.Error("removed node should have a parent , i.e. its document") 240 | } 241 | nodes[0].Remove() 242 | doc.Free() 243 | CheckXmlMemoryLeaks(t) 244 | } 245 | -------------------------------------------------------------------------------- /xpath/xpath.go: -------------------------------------------------------------------------------- 1 | package xpath 2 | 3 | /* 4 | #cgo pkg-config: libxml-2.0 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | xmlNode* fetchNode(xmlNodeSet *nodeset, int index) { 11 | return nodeset->nodeTab[index]; 12 | } 13 | 14 | xmlXPathObjectPtr go_resolve_variables(void* ctxt, char* name, char* ns); 15 | int go_can_resolve_function(void* ctxt, char* name, char* ns); 16 | void exec_xpath_function(xmlXPathParserContextPtr ctxt, int nargs); 17 | 18 | xmlXPathFunction go_resolve_function(void* ctxt, char* name, char* ns) { 19 | if (go_can_resolve_function(ctxt, name, ns)) 20 | return exec_xpath_function; 21 | 22 | return 0; 23 | } 24 | 25 | static void set_var_lookup(xmlXPathContext* c, void* data) { 26 | c->varLookupFunc = (void *)go_resolve_variables; 27 | c->varLookupData = data; 28 | } 29 | 30 | static void set_function_lookup(xmlXPathContext* c, void* data) { 31 | c->funcLookupFunc = (void *)go_resolve_function; 32 | c->funcLookupData = data; 33 | } 34 | 35 | int getXPathObjectType(xmlXPathObject* o) { 36 | if(o == 0) 37 | return 0; 38 | return o->type; 39 | } 40 | */ 41 | import "C" 42 | 43 | import "unsafe" 44 | import . "github.com/moovweb/gokogiri/util" 45 | import "runtime" 46 | import "errors" 47 | 48 | type XPath struct { 49 | ContextPtr *C.xmlXPathContext 50 | ResultPtr *C.xmlXPathObject 51 | } 52 | 53 | type XPathObjectType int 54 | 55 | const ( 56 | XPATH_UNDEFINED XPathObjectType = 0 57 | XPATH_NODESET = 1 58 | XPATH_BOOLEAN = 2 59 | XPATH_NUMBER = 3 60 | XPATH_STRING = 4 61 | XPATH_POINT = 5 62 | XPATH_RANGE = 6 63 | XPATH_LOCATIONSET = 7 64 | XPATH_USERS = 8 65 | XPATH_XSLT_TREE = 9 // An XSLT value tree, non modifiable 66 | ) 67 | 68 | type XPathFunction func(context VariableScope, args []interface{}) interface{} 69 | 70 | // Types that provide the VariableScope interface know how to resolve 71 | // XPath variable names into values. 72 | 73 | //This interface exist primarily for the benefit of XSLT processors. 74 | type VariableScope interface { 75 | ResolveVariable(string, string) interface{} 76 | IsFunctionRegistered(string, string) bool 77 | ResolveFunction(string, string) XPathFunction 78 | } 79 | 80 | func NewXPath(docPtr unsafe.Pointer) (xpath *XPath) { 81 | if docPtr == nil { 82 | return 83 | } 84 | xpath = &XPath{ContextPtr: C.xmlXPathNewContext((*C.xmlDoc)(docPtr)), ResultPtr: nil} 85 | runtime.SetFinalizer(xpath, (*XPath).Free) 86 | return 87 | } 88 | 89 | func (xpath *XPath) RegisterNamespace(prefix, href string) bool { 90 | var prefixPtr unsafe.Pointer = nil 91 | if len(prefix) > 0 { 92 | prefixBytes := AppendCStringTerminator([]byte(prefix)) 93 | prefixPtr = unsafe.Pointer(&prefixBytes[0]) 94 | } 95 | 96 | var hrefPtr unsafe.Pointer = nil 97 | if len(href) > 0 { 98 | hrefBytes := AppendCStringTerminator([]byte(href)) 99 | hrefPtr = unsafe.Pointer(&hrefBytes[0]) 100 | } 101 | 102 | result := C.xmlXPathRegisterNs(xpath.ContextPtr, (*C.xmlChar)(prefixPtr), (*C.xmlChar)(hrefPtr)) 103 | return result == 0 104 | } 105 | 106 | // Evaluate an XPath and attempt to consume the result as a nodeset. 107 | func (xpath *XPath) EvaluateAsNodeset(nodePtr unsafe.Pointer, xpathExpr *Expression) (nodes []unsafe.Pointer, err error) { 108 | if nodePtr == nil { 109 | //evaluating xpath on a nil node returns no result. 110 | return 111 | } 112 | 113 | err = xpath.Evaluate(nodePtr, xpathExpr) 114 | if err != nil { 115 | return 116 | } 117 | 118 | nodes, err = xpath.ResultAsNodeset() 119 | return 120 | } 121 | 122 | // Evaluate an XPath. The returned result is stored in the struct. Call ReturnType to 123 | // discover the type of result, and call one of the ResultAs* functions to return a 124 | // copy of the result as a particular type. 125 | func (xpath *XPath) Evaluate(nodePtr unsafe.Pointer, xpathExpr *Expression) (err error) { 126 | if nodePtr == nil { 127 | //evaluating xpath on a nil node returns no result. 128 | return 129 | } 130 | 131 | oldXPContextDoc := xpath.ContextPtr.doc 132 | oldXPContextNode := xpath.ContextPtr.node 133 | oldXPProximityPosition := xpath.ContextPtr.proximityPosition 134 | oldXPContextSize := xpath.ContextPtr.contextSize 135 | oldXPNsNr := xpath.ContextPtr.nsNr 136 | oldXPNamespaces := xpath.ContextPtr.namespaces 137 | 138 | xpath.ContextPtr.node = (*C.xmlNode)(nodePtr) 139 | if xpath.ResultPtr != nil { 140 | C.xmlXPathFreeObject(xpath.ResultPtr) 141 | } 142 | xpath.ResultPtr = C.xmlXPathCompiledEval(xpathExpr.Ptr, xpath.ContextPtr) 143 | 144 | xpath.ContextPtr.doc = oldXPContextDoc 145 | xpath.ContextPtr.node = oldXPContextNode 146 | xpath.ContextPtr.proximityPosition = oldXPProximityPosition 147 | xpath.ContextPtr.contextSize = oldXPContextSize 148 | xpath.ContextPtr.nsNr = oldXPNsNr 149 | xpath.ContextPtr.namespaces = oldXPNamespaces 150 | 151 | if xpath.ResultPtr == nil { 152 | err = errors.New("err in evaluating xpath: " + xpathExpr.String()) 153 | return 154 | } 155 | return 156 | } 157 | 158 | // Determine the actual return type of the XPath evaluation. 159 | func (xpath *XPath) ReturnType() XPathObjectType { 160 | return XPathObjectType(C.getXPathObjectType(xpath.ResultPtr)) 161 | } 162 | 163 | // Get the XPath result as a nodeset. 164 | func (xpath *XPath) ResultAsNodeset() (nodes []unsafe.Pointer, err error) { 165 | if xpath.ResultPtr == nil { 166 | return 167 | } 168 | 169 | if xpath.ReturnType() != XPATH_NODESET { 170 | err = errors.New("Cannot convert XPath result to nodeset") 171 | } 172 | 173 | if nodesetPtr := xpath.ResultPtr.nodesetval; nodesetPtr != nil { 174 | if nodesetSize := int(nodesetPtr.nodeNr); nodesetSize > 0 { 175 | nodes = make([]unsafe.Pointer, nodesetSize) 176 | for i := 0; i < nodesetSize; i++ { 177 | nodes[i] = unsafe.Pointer(C.fetchNode(nodesetPtr, C.int(i))) 178 | } 179 | } 180 | } 181 | return 182 | } 183 | 184 | // Coerce the result into a string 185 | func (xpath *XPath) ResultAsString() (val string, err error) { 186 | if xpath.ReturnType() != XPATH_STRING { 187 | xpath.ResultPtr = C.xmlXPathConvertString(xpath.ResultPtr) 188 | } 189 | val = C.GoString((*C.char)(unsafe.Pointer(xpath.ResultPtr.stringval))) 190 | return 191 | } 192 | 193 | // Coerce the result into a number 194 | func (xpath *XPath) ResultAsNumber() (val float64, err error) { 195 | if xpath.ReturnType() != XPATH_NUMBER { 196 | xpath.ResultPtr = C.xmlXPathConvertNumber(xpath.ResultPtr) 197 | } 198 | val = float64(xpath.ResultPtr.floatval) 199 | return 200 | } 201 | 202 | // Coerce the result into a boolean 203 | func (xpath *XPath) ResultAsBoolean() (val bool, err error) { 204 | xpath.ResultPtr = C.xmlXPathConvertBoolean(xpath.ResultPtr) 205 | val = xpath.ResultPtr.boolval != 0 206 | return 207 | } 208 | 209 | // Add a variable resolver. 210 | func (xpath *XPath) SetResolver(v VariableScope) { 211 | C.set_var_lookup(xpath.ContextPtr, unsafe.Pointer(&v)) 212 | C.set_function_lookup(xpath.ContextPtr, unsafe.Pointer(&v)) 213 | } 214 | 215 | // SetContextPosition sets the internal values needed to 216 | // determine the values of position() and last() for the 217 | // current context node. 218 | func (xpath *XPath) SetContextPosition(position, size int) { 219 | xpath.ContextPtr.proximityPosition = C.int(position) 220 | xpath.ContextPtr.contextSize = C.int(size) 221 | } 222 | 223 | // GetContextPosition retrieves the internal values used to 224 | // determine the values of position() and last() for the 225 | // current context node. 226 | 227 | // This allows values to saved and restored during processing 228 | // of a document. 229 | func (xpath *XPath) GetContextPosition() (position, size int) { 230 | position = int(xpath.ContextPtr.proximityPosition) 231 | size = int(xpath.ContextPtr.contextSize) 232 | return 233 | } 234 | 235 | func (xpath *XPath) Free() { 236 | if xpath.ContextPtr != nil { 237 | C.xmlXPathFreeContext(xpath.ContextPtr) 238 | xpath.ContextPtr = nil 239 | } 240 | if xpath.ResultPtr != nil { 241 | C.xmlXPathFreeObject(xpath.ResultPtr) 242 | xpath.ResultPtr = nil 243 | } 244 | } 245 | 246 | func XPathObjectToValue(obj C.xmlXPathObjectPtr) (result interface{}) { 247 | rt := XPathObjectType(C.getXPathObjectType(obj)) 248 | switch rt { 249 | case XPATH_NODESET, XPATH_XSLT_TREE: 250 | if nodesetPtr := obj.nodesetval; nodesetPtr != nil { 251 | if nodesetSize := int(nodesetPtr.nodeNr); nodesetSize > 0 { 252 | nodes := make([]unsafe.Pointer, nodesetSize) 253 | for i := 0; i < nodesetSize; i++ { 254 | nodes[i] = unsafe.Pointer(C.fetchNode(nodesetPtr, C.int(i))) 255 | } 256 | result = nodes 257 | return 258 | } 259 | } 260 | result = nil 261 | case XPATH_NUMBER: 262 | obj = C.xmlXPathConvertNumber(obj) 263 | result = float64(obj.floatval) 264 | case XPATH_BOOLEAN: 265 | obj = C.xmlXPathConvertBoolean(obj) 266 | result = obj.boolval != 0 267 | default: 268 | obj = C.xmlXPathConvertString(obj) 269 | result = C.GoString((*C.char)(unsafe.Pointer(obj.stringval))) 270 | } 271 | return 272 | } 273 | -------------------------------------------------------------------------------- /xml/document_test.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "path/filepath" 8 | "runtime" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestDocuments(t *testing.T) { 14 | if runtime.GOOS == "windows" { 15 | return 16 | } 17 | tests, err := collectTests("document") 18 | 19 | if len(err) > 0 { 20 | t.Errorf(err) 21 | } 22 | 23 | errors := make([]string, 0) 24 | 25 | print("\nTesting: Basic Parsing [") 26 | 27 | for _, test := range tests { 28 | error := RunDocumentParseTest(t, test) 29 | 30 | if error != nil { 31 | errors = append(errors, fmt.Sprintf("Test %v failed:\n%v\n", test, *error)) 32 | print("F") 33 | } else { 34 | print(".") 35 | } 36 | } 37 | 38 | println("]") 39 | 40 | if len(errors) > 0 { 41 | errorMessage := "\t" + strings.Join(strings.Split(strings.Join(errors, "\n\n"), "\n"), "\n\t") 42 | t.Errorf("\nSome tests failed! (%d passed / %d total) :\n%v", len(tests)-len(errors), len(tests), errorMessage) 43 | } else { 44 | fmt.Printf("\nAll (%d) tests passed!\n", len(tests)) 45 | } 46 | } 47 | 48 | func TestBufferedDocuments(t *testing.T) { 49 | if runtime.GOOS == "windows" { 50 | return 51 | } 52 | tests, err := collectTests("document") 53 | 54 | if len(err) > 0 { 55 | t.Errorf(err) 56 | } 57 | 58 | errors := make([]string, 0) 59 | 60 | print("\nTesting: Buffered Parsing [") 61 | 62 | for _, test := range tests { 63 | error := RunParseDocumentWithBufferTest(t, test) 64 | 65 | if error != nil { 66 | errors = append(errors, fmt.Sprintf("Test %v failed:\n%v\n", test, *error)) 67 | print("F") 68 | } else { 69 | print(".") 70 | } 71 | } 72 | 73 | println("]") 74 | 75 | if len(errors) > 0 { 76 | errorMessage := "\t" + strings.Join(strings.Split(strings.Join(errors, "\n\n"), "\n"), "\n\t") 77 | t.Errorf("\nSome tests failed! (%d passed / %d total) :\n%v", len(tests)-len(errors), len(tests), errorMessage) 78 | } else { 79 | fmt.Printf("\nAll (%d) tests passed!\n", len(tests)) 80 | } 81 | } 82 | 83 | func RunParseDocumentWithBufferTest(t *testing.T, name string) (error *string) { 84 | var errorMessage string 85 | offset := "\t" 86 | 87 | defer CheckXmlMemoryLeaks(t) 88 | 89 | input, output, dataError := getTestData(name) 90 | 91 | if len(dataError) > 0 { 92 | errorMessage += dataError 93 | } 94 | 95 | buffer := make([]byte, 500000) 96 | 97 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 98 | 99 | if err != nil { 100 | errorMessage = fmt.Sprintf("parsing error:%v\n", err) 101 | } 102 | 103 | if string(doc.ToBuffer(buffer)) != string(output) { 104 | formattedOutput := offset + strings.Join(strings.Split("["+doc.String()+"]", "\n"), "\n"+offset) 105 | formattedExpectedOutput := offset + strings.Join(strings.Split("["+string(output)+"]", "\n"), "\n"+offset) 106 | errorMessage = fmt.Sprintf("%v-- Got --\n%v\n%v-- Expected --\n%v\n", offset, formattedOutput, offset, formattedExpectedOutput) 107 | } 108 | doc.Free() 109 | 110 | if len(errorMessage) > 0 { 111 | return &errorMessage 112 | } 113 | return nil 114 | 115 | } 116 | 117 | func RunDocumentParseTest(t *testing.T, name string) (error *string) { 118 | 119 | var errorMessage string 120 | offset := "\t" 121 | 122 | defer CheckXmlMemoryLeaks(t) 123 | 124 | input, output, dataError := getTestData(name) 125 | 126 | if len(dataError) > 0 { 127 | errorMessage += dataError 128 | } 129 | 130 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 131 | 132 | if err != nil { 133 | errorMessage = fmt.Sprintf("parsing error:%v\n", err) 134 | } 135 | 136 | if doc.String() != string(output) { 137 | formattedOutput := offset + strings.Join(strings.Split("["+doc.String()+"]", "\n"), "\n"+offset) 138 | formattedExpectedOutput := offset + strings.Join(strings.Split("["+string(output)+"]", "\n"), "\n"+offset) 139 | errorMessage = fmt.Sprintf("%v-- Got --\n%v\n%v-- Expected --\n%v\n", offset, formattedOutput, offset, formattedExpectedOutput) 140 | testOutput := filepath.Join(name, "test_output.txt") 141 | ioutil.WriteFile(testOutput, []byte(doc.String()), os.FileMode(0666)) 142 | errorMessage += fmt.Sprintf("%v Output test output to: %v\n", offset, testOutput) 143 | } 144 | doc.Free() 145 | 146 | if len(errorMessage) > 0 { 147 | return &errorMessage 148 | } 149 | return nil 150 | 151 | } 152 | 153 | func BenchmarkDocOutput(b *testing.B) { 154 | b.StopTimer() 155 | 156 | tests, err := collectTests("document") 157 | 158 | if len(err) > 0 { 159 | fmt.Printf(err) 160 | return 161 | } 162 | 163 | docs := make([]*XmlDocument, 0) 164 | 165 | for _, testName := range tests { 166 | 167 | input, _, dataError := getTestData(testName) 168 | 169 | if len(dataError) > 0 { 170 | fmt.Printf(dataError) 171 | return 172 | } 173 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 174 | 175 | if err != nil { 176 | fmt.Printf("parsing error:%v\n", err) 177 | return 178 | } 179 | docs = append(docs, doc) 180 | } 181 | 182 | b.StartTimer() 183 | 184 | for i := 0; i < b.N; i++ { 185 | for index, _ := range tests { 186 | _ = docs[index].String() 187 | } 188 | } 189 | 190 | } 191 | 192 | func BenchmarkDocOutputToBuffer(b *testing.B) { 193 | b.StopTimer() 194 | 195 | tests, err := collectTests("document") 196 | 197 | if len(err) > 0 { 198 | fmt.Printf(err) 199 | return 200 | } 201 | 202 | docs := make([]*XmlDocument, 0) 203 | 204 | for _, testName := range tests { 205 | 206 | input, _, dataError := getTestData(testName) 207 | 208 | if len(dataError) > 0 { 209 | fmt.Printf(dataError) 210 | return 211 | } 212 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 213 | 214 | if err != nil { 215 | fmt.Printf("parsing error:%v\n", err) 216 | return 217 | } 218 | docs = append(docs, doc) 219 | } 220 | 221 | buffer := make([]byte, 500*1024) 222 | 223 | b.StartTimer() 224 | 225 | for i := 0; i < b.N; i++ { 226 | 227 | for index, _ := range tests { 228 | 229 | _ = docs[index].ToBuffer(buffer) 230 | 231 | } 232 | } 233 | 234 | } 235 | 236 | func TestRemoveNamespaces(t *testing.T) { 237 | xml := "" 238 | xml_no_namespace := "" 239 | 240 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 241 | doc.Root().RecursivelyRemoveNamespaces() 242 | doc2, _ := Parse([]byte(xml_no_namespace), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 243 | 244 | output := fmt.Sprintf("%v", doc) 245 | output_no_namespace := fmt.Sprintf("%v", doc2) 246 | if output != output_no_namespace { 247 | t.Errorf("Xml namespaces not removed!") 248 | } 249 | } 250 | 251 | func TestRemoveDefaultNamespace(t *testing.T) { 252 | xml := ` 253 | 254 | 255 | 256 | xyz 257 | 258 | 259 | 260 | ` 261 | 262 | xml_no_namespace := ` 263 | 264 | 265 | 266 | xyz 267 | 268 | 269 | 270 | ` 271 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 272 | doc.Root().RemoveDefaultNamespace() 273 | doc2, _ := Parse([]byte(xml_no_namespace), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 274 | 275 | output := fmt.Sprintf("%v", doc) 276 | output_no_namespace := fmt.Sprintf("%v", doc2) 277 | if output != output_no_namespace { 278 | t.Errorf("Default namespace not removed!") 279 | } 280 | } 281 | 282 | func TestNodeById(t *testing.T) { 283 | xml := "\n\n]>\nFailedSuccess" 284 | 285 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 286 | p := doc.NodeById("W11") 287 | 288 | if p == nil { 289 | t.Errorf("Did not find node by ID!") 290 | return 291 | } 292 | 293 | output := fmt.Sprintf("%v", p.Content()) 294 | if output != "Success" { 295 | t.Errorf("Incorrect node selected by ID!") 296 | } 297 | } 298 | 299 | func TestUnparsedEntityURI(t *testing.T) { 300 | xml := "\n\n\n\n]>\n" 301 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 302 | expected := "test.jpg" 303 | actual := doc.UnparsedEntityURI("test") 304 | 305 | if actual == "" { 306 | t.Errorf("Did not find unparsed entity 'test'") 307 | return 308 | } 309 | 310 | if actual != expected { 311 | t.Errorf("Expected '%s', but got '%s' calling doc.UnparsedEntityURI", expected, actual) 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /xml/node_test.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | import "testing" 4 | import "fmt" 5 | 6 | func TestAddChild(t *testing.T) { 7 | 8 | docAssertion := func(doc *XmlDocument) (string, string, string) { 9 | expectedDocAfterAdd := 10 | ` 11 | 12 | 13 | 14 | ` 15 | doc.Root().AddChild("") 16 | 17 | return doc.String(), expectedDocAfterAdd, "output of the xml doc after AddChild does not match" 18 | } 19 | 20 | nodeAssertion := func(doc *XmlDocument) (string, string, string) { 21 | expectedNodeAfterAdd := 22 | ` 23 | 24 | ` 25 | 26 | return doc.Root().String(), expectedNodeAfterAdd, "the output of the xml root after AddChild does not match" 27 | } 28 | 29 | RunTest(t, "node", "add_child", nil, docAssertion, nodeAssertion) 30 | 31 | } 32 | 33 | func TestAddAncestorAsChild(t *testing.T) { 34 | docAssertion := func(doc *XmlDocument) (string, string, string) { 35 | expectedDocAfterAdd := 36 | ` 37 | 38 | ` 39 | 40 | foo := doc.Root() 41 | bar := foo.FirstChild() 42 | holiday := bar.FirstChild() 43 | fun := holiday.FirstChild() 44 | fun.AddChild(bar) 45 | 46 | return doc.String(), expectedDocAfterAdd, "output of the xml doc after AddChild does not match" 47 | } 48 | 49 | nodeAssertion := func(doc *XmlDocument) (string, string, string) { 50 | expectedNodeAfterAdd := 51 | `` 52 | 53 | return doc.Root().String(), expectedNodeAfterAdd, "the output of the xml root after AddChild does not match" 54 | } 55 | 56 | RunTest(t, "node", "add_ancestor", nil, docAssertion, nodeAssertion) 57 | 58 | } 59 | 60 | func addChildBenchLogic(b *testing.B, doc *XmlDocument) { 61 | root := doc.Root() 62 | 63 | for i := 0; i < b.N; i++ { 64 | root.AddChild("") 65 | } 66 | } 67 | 68 | func BenchmarkAddChild(b *testing.B) { 69 | RunBenchmark(b, "document", "big_un", addChildBenchLogic) // Run against big doc 70 | } 71 | 72 | func BenchmarkAddChildBigDoc(b *testing.B) { 73 | RunBenchmark(b, "node", "add_child", addChildBenchLogic) 74 | } 75 | 76 | func TestAddPreviousSibling(t *testing.T) { 77 | 78 | testLogic := func(t *testing.T, doc *XmlDocument) { 79 | err := doc.Root().AddPreviousSibling("") 80 | 81 | if err != nil { 82 | t.Errorf("Error adding previous sibling:\n%v\n", err.Error()) 83 | } 84 | } 85 | 86 | RunTest(t, "node", "add_previous_sibling", testLogic) 87 | } 88 | 89 | func TestAddPreviousSibling2(t *testing.T) { 90 | 91 | testLogic := func(t *testing.T, doc *XmlDocument) { 92 | err := doc.Root().FirstChild().AddPreviousSibling("COOL") 93 | 94 | if err != nil { 95 | t.Errorf("Error adding previous sibling:\n%v\n", err.Error()) 96 | } 97 | } 98 | 99 | RunTest(t, "node", "add_previous_sibling2", testLogic) 100 | } 101 | 102 | func TestAddNextSibling(t *testing.T) { 103 | 104 | testLogic := func(t *testing.T, doc *XmlDocument) { 105 | doc.Root().AddNextSibling("") 106 | } 107 | 108 | RunTest(t, "node", "add_next_sibling", testLogic) 109 | } 110 | 111 | func TestSetContent(t *testing.T) { 112 | 113 | testLogic := func(t *testing.T, doc *XmlDocument) { 114 | root := doc.Root() 115 | root.SetContent("") 116 | } 117 | 118 | RunTest(t, "node", "set_content", testLogic) 119 | } 120 | 121 | func BenchmarkSetContent(b *testing.B) { 122 | 123 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 124 | root := doc.Root() 125 | for i := 0; i < b.N; i++ { 126 | root.SetContent("") 127 | } 128 | } 129 | 130 | RunBenchmark(b, "node", "set_content", benchmarkLogic) 131 | } 132 | 133 | func TestSetChildren(t *testing.T) { 134 | testLogic := func(t *testing.T, doc *XmlDocument) { 135 | root := doc.Root() 136 | root.SetChildren("") 137 | } 138 | 139 | RunTest(t, "node", "set_children", testLogic) 140 | } 141 | 142 | func BenchmarkSetChildren(b *testing.B) { 143 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 144 | root := doc.Root() 145 | for i := 0; i < b.N; i++ { 146 | root.SetChildren("") 147 | } 148 | } 149 | 150 | RunBenchmark(b, "node", "set_children", benchmarkLogic) 151 | } 152 | 153 | func TestReplace(t *testing.T) { 154 | 155 | testLogic := func(t *testing.T, doc *XmlDocument) { 156 | root := doc.Root() 157 | root.Replace("") 158 | } 159 | 160 | rootAssertion := func(doc *XmlDocument) (string, string, string) { 161 | root := doc.Root() 162 | return root.String(), "", "the output of the xml root does not match" 163 | } 164 | 165 | RunTest(t, "node", "replace", testLogic, rootAssertion) 166 | } 167 | 168 | func BenchmarkReplace(b *testing.B) { 169 | 170 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 171 | root := doc.Root() 172 | for i := 0; i < b.N; i++ { 173 | root.Replace("") 174 | root = doc.Root() //once the node has been replaced, we need to get a new node 175 | } 176 | } 177 | 178 | RunBenchmark(b, "node", "replace", benchmarkLogic) 179 | } 180 | 181 | func TestAttributes(t *testing.T) { 182 | 183 | testLogic := func(t *testing.T, doc *XmlDocument) { 184 | 185 | root := doc.Root() 186 | attributes := root.Attributes() 187 | 188 | if len(attributes) != 2 || attributes["myname"].String() != "ff" { 189 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String()) 190 | t.Error("root's attributes do not match") 191 | } 192 | 193 | child := root.FirstChild() 194 | childAttributes := child.Attributes() 195 | 196 | if len(childAttributes) != 1 || childAttributes["class"].String() != "shine" { 197 | t.Error("child's attributes do not match") 198 | } 199 | } 200 | 201 | RunTest(t, "node", "attributes", testLogic) 202 | 203 | } 204 | 205 | func BenchmarkAttributes(b *testing.B) { 206 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) { 207 | 208 | root := doc.Root() 209 | 210 | for i := 0; i < b.N; i++ { 211 | root.SetAttr("garfield", "spaghetti") 212 | } 213 | } 214 | 215 | RunBenchmark(b, "node", "attributes", benchmarkLogic) 216 | } 217 | 218 | func TestInner(t *testing.T) { 219 | 220 | testLogic := func(t *testing.T, doc *XmlDocument) { 221 | root := doc.Root() 222 | root.SetInnerHtml("") 223 | } 224 | 225 | RunTest(t, "node", "inner", testLogic) 226 | } 227 | func TestInnerWithAttributes(t *testing.T) { 228 | 229 | testLogic := func(t *testing.T, doc *XmlDocument) { 230 | root := doc.Root() 231 | root.SetInnerHtml("") 232 | } 233 | 234 | RunTest(t, "node", "inner_with_attributes", testLogic) 235 | } 236 | 237 | func TestSetNamespace(t *testing.T) { 238 | testLogic := func(t *testing.T, doc *XmlDocument) { 239 | root := doc.Root() 240 | root.SetNamespace("foo", "bar") 241 | } 242 | 243 | RunTest(t, "node", "set_namespace", testLogic) 244 | } 245 | 246 | func TestSetDefaultNamespace(t *testing.T) { 247 | testLogic := func(t *testing.T, doc *XmlDocument) { 248 | root := doc.Root() 249 | root.SetNamespace("", "bar") 250 | } 251 | 252 | RunTest(t, "node", "set_default_namespace", testLogic) 253 | } 254 | 255 | func TestDeclareNamespace(t *testing.T) { 256 | testLogic := func(t *testing.T, doc *XmlDocument) { 257 | root := doc.Root() 258 | root.DeclareNamespace("foo", "bar") 259 | child := root.FirstChild() 260 | child.SetNamespace("foo", "bar") 261 | } 262 | 263 | RunTest(t, "node", "declare_namespace", testLogic) 264 | } 265 | 266 | func TestNamespaceAttribute(t *testing.T) { 267 | testLogic := func(t *testing.T, doc *XmlDocument) { 268 | root := doc.Root() 269 | root.DeclareNamespace("foo", "bar") 270 | root.SetNsAttr("bar", "hello", "world") 271 | } 272 | 273 | RunTest(t, "node", "set_ns_attr", testLogic) 274 | } 275 | 276 | func TestUnformattedXml(t *testing.T) { 277 | xml := "\n\n\tTest\n" 278 | expected := "\n\tTest\n" 279 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 280 | root := doc.Root() 281 | out := root.ToUnformattedXml() 282 | if out != expected { 283 | t.Errorf("TestUnformattedXml Expected: %v\nActual: %v", expected, out) 284 | } 285 | 286 | } 287 | 288 | func TestSerializewithFomat(t *testing.T) { 289 | xml := "\n\n\tTest\n" 290 | expected := "\n\tTest\n" 291 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 292 | root := doc.Root() 293 | b, size := root.SerializeWithFormat(XML_SAVE_AS_XML|XML_SAVE_NO_DECL, nil, nil) 294 | if b == nil { 295 | t.Errorf("SerializeWithFormat Expected: %v\nActual: (nil)", expected) 296 | return 297 | } 298 | out := string(b[:size]) 299 | if out != expected { 300 | t.Errorf("SerializeWithFormat Expected: %v\nActual: %v", expected, out) 301 | } 302 | 303 | } 304 | 305 | func TestEvalVariableExpr(t *testing.T) { 306 | xml := "" 307 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 308 | s := newSimpleVariableScope() 309 | root := doc.Root() 310 | s.variables["spec"] = "XSLT 1.0" 311 | s.variables["number"] = 7 312 | v, err := root.EvalXPath("$spec", s) 313 | if err != nil { 314 | t.Errorf("%v", err) 315 | } 316 | out := v.(string) 317 | if out != "XSLT 1.0" { 318 | t.Errorf("TestEvalVariableExpr Expected: %v\nActual: %v", "XSLT 1.0", out) 319 | } 320 | } 321 | 322 | func TestEvalStringExpr(t *testing.T) { 323 | xml := "" 324 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 325 | root := doc.Root() 326 | v, err := root.EvalXPath("\"Hello\"", nil) 327 | if err != nil { 328 | t.Errorf("%v", err) 329 | } 330 | out := v.(string) 331 | if out != "Hello" { 332 | t.Errorf("TestEvalStringExpr Expected: %v\nActual: %v", "Hello", out) 333 | } 334 | } 335 | 336 | func TestEvalNumericExpr(t *testing.T) { 337 | xml := "" 338 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes) 339 | root := doc.Root() 340 | v, err := root.EvalXPath("7", nil) 341 | if err != nil { 342 | t.Errorf("%v", err) 343 | } 344 | out := v.(float64) 345 | if out != 7 { 346 | t.Errorf("TestEvalNumericExpr Expected: %v\nActual: %v", 7, out) 347 | } 348 | } 349 | 350 | func TestDisableOutputEscaping(t *testing.T) { 351 | doc := CreateEmptyDocument(DefaultEncodingBytes, DefaultEncodingBytes) 352 | n := doc.CreateTextNode("
") 353 | 354 | //normal usage escapes the output 355 | escaped := "<br/>" 356 | if n.String() != escaped { 357 | t.Errorf("TestDisableOutputEscaping (escaping enabled) Expected: %v\nActual: %v", escaped, n.String()) 358 | } 359 | 360 | //now we disable the output escaping 361 | unescaped := "
" 362 | n.DisableOutputEscaping() 363 | if n.String() != unescaped { 364 | t.Errorf("TestDisableOutputEscaping (escaping disabled) Expected: %v\nActual: %v", unescaped, n.String()) 365 | } 366 | } 367 | -------------------------------------------------------------------------------- /css/css.go: -------------------------------------------------------------------------------- 1 | package css 2 | 3 | // package main 4 | 5 | import ( 6 | "fmt" 7 | "github.com/moovweb/rubex" 8 | "strings" 9 | ) 10 | 11 | type Lexeme int 12 | 13 | const ( 14 | SPACES Lexeme = iota 15 | COMMA 16 | UNIVERSAL 17 | TYPE 18 | ELEMENT 19 | CLASS 20 | ID 21 | LBRACKET 22 | RBRACKET 23 | ATTR_NAME 24 | ATTR_VALUE 25 | EQUALS 26 | CONTAINS_CLASS 27 | DASH_PREFIXED 28 | STARTS_WITH 29 | ENDS_WITH 30 | CONTAINS 31 | MATCH_OP 32 | PSEUDO_CLASS 33 | FIRST_CHILD 34 | FIRST_OF_TYPE 35 | NTH_CHILD 36 | NTH_OF_TYPE 37 | ONLY_CHILD 38 | ONLY_OF_TYPE 39 | LAST_CHILD 40 | LAST_OF_TYPE 41 | NOT 42 | LPAREN 43 | RPAREN 44 | COEFFICIENT 45 | SIGNED 46 | UNSIGNED 47 | ODD 48 | EVEN 49 | N 50 | OPERATOR 51 | PLUS 52 | MINUS 53 | BINOMIAL 54 | ADJACENT_TO 55 | PRECEDES 56 | PARENT_OF 57 | ANCESTOR_OF 58 | // and a counter ... I can't believe I didn't think of this sooner 59 | NUM_LEXEMES 60 | ) 61 | 62 | var pattern [NUM_LEXEMES]string 63 | 64 | func init() { 65 | pattern[SPACES] = `\s+` 66 | pattern[COMMA] = `\s*,` 67 | pattern[UNIVERSAL] = `\*` 68 | pattern[TYPE] = `[_a-zA-Z]\w*` 69 | pattern[ELEMENT] = `(\*|[_a-zA-Z]\w*)` 70 | pattern[CLASS] = `\.[-\w]+` 71 | pattern[ID] = `\#[-\w]+` 72 | pattern[LBRACKET] = `\[` 73 | pattern[RBRACKET] = `\]` 74 | pattern[ATTR_NAME] = `[-_:a-zA-Z][-\w:.]*` 75 | pattern[ATTR_VALUE] = `("(\\.|[^"\\])*"|'(\\.|[^'\\])*')` 76 | pattern[EQUALS] = `=` 77 | pattern[CONTAINS_CLASS] = `~=` 78 | pattern[DASH_PREFIXED] = `\|=` 79 | pattern[STARTS_WITH] = `\^=` 80 | pattern[ENDS_WITH] = `\$=` 81 | pattern[CONTAINS] = `\*=` 82 | pattern[MATCH_OP] = "(" + strings.Join([]string{pattern[EQUALS], pattern[CONTAINS_CLASS], pattern[DASH_PREFIXED], pattern[STARTS_WITH], pattern[ENDS_WITH], pattern[CONTAINS]}, "|") + ")" 83 | pattern[PSEUDO_CLASS] = `:[-a-z]+` 84 | pattern[FIRST_CHILD] = `:first-child` 85 | pattern[FIRST_OF_TYPE] = `:first-of-type` 86 | pattern[NTH_CHILD] = `:nth-child` 87 | pattern[NTH_OF_TYPE] = `:nth-of-type` 88 | pattern[ONLY_CHILD] = `:only-child` 89 | pattern[ONLY_OF_TYPE] = `:only-of-type` 90 | pattern[LAST_CHILD] = `:last-child` 91 | pattern[LAST_OF_TYPE] = `:last-of-type` 92 | pattern[NOT] = `:not` 93 | pattern[LPAREN] = `\s*\(` 94 | pattern[RPAREN] = `\s*\)` 95 | pattern[COEFFICIENT] = `[-+]?(\d+)?` 96 | pattern[SIGNED] = `[-+]?\d+` 97 | pattern[UNSIGNED] = `\d+` 98 | pattern[ODD] = `odd` 99 | pattern[EVEN] = `even` 100 | pattern[N] = `[nN]` 101 | pattern[OPERATOR] = `[-+]` 102 | pattern[PLUS] = `\+` 103 | pattern[MINUS] = `-` 104 | pattern[BINOMIAL] = strings.Join([]string{pattern[COEFFICIENT], pattern[N], `\s*`, pattern[OPERATOR], `\s*`, pattern[UNSIGNED]}, "") 105 | pattern[ADJACENT_TO] = `\s*\+` 106 | pattern[PRECEDES] = `\s*~` 107 | pattern[PARENT_OF] = `\s*>` 108 | pattern[ANCESTOR_OF] = `\s+` 109 | } 110 | 111 | type Scope int 112 | 113 | const ( 114 | GLOBAL Scope = iota 115 | LOCAL 116 | ) 117 | 118 | func Convert(css string, scope Scope) string { 119 | matchers := allocate() 120 | defer deallocate(matchers) 121 | xpath, _ := selectors(matchers, []byte(css), scope) 122 | return xpath 123 | } 124 | 125 | func allocate() []*rubex.Regexp { 126 | // some overlap in here, but it'll make the parsing functions clearer 127 | matchers := make([]*rubex.Regexp, 0, NUM_LEXEMES) 128 | for _, p := range pattern { 129 | matchers = append(matchers, rubex.MustCompile(`\A`+p)) 130 | } 131 | return matchers 132 | } 133 | 134 | func deallocate(matchers []*rubex.Regexp) { 135 | for _, m := range matchers { 136 | m.Free() 137 | } 138 | } 139 | 140 | func selectors(matchers []*rubex.Regexp, input []byte, scope Scope) (string, []byte) { 141 | x, input := selector(matchers, input, scope) 142 | xs := []string{x} 143 | for peek(matchers, COMMA, input) { 144 | _, input = token(matchers, COMMA, input) 145 | x, input = selector(matchers, input, scope) 146 | xs = append(xs, x) 147 | } 148 | return strings.Join(xs, " | "), input 149 | } 150 | 151 | func selector(matchers []*rubex.Regexp, input []byte, scope Scope) (string, []byte) { 152 | var combinator Lexeme 153 | var xs []string 154 | if scope == LOCAL { 155 | xs = []string{"."} 156 | } 157 | if matched, remainder := token(matchers, PARENT_OF, input); matched != nil { 158 | combinator, input = PARENT_OF, remainder 159 | } else { 160 | combinator = ANCESTOR_OF 161 | } 162 | x, input := sequence(matchers, input, combinator) 163 | xs = append(xs, x) 164 | for { 165 | if matched, remainder := token(matchers, ADJACENT_TO, input); matched != nil { 166 | combinator, input = ADJACENT_TO, remainder 167 | } else if matched, remainder := token(matchers, PRECEDES, input); matched != nil { 168 | combinator, input = PRECEDES, remainder 169 | } else if matched, remainder := token(matchers, PARENT_OF, input); matched != nil { 170 | combinator, input = PARENT_OF, remainder 171 | } else if matched, remainder := token(matchers, ANCESTOR_OF, input); matched != nil { 172 | combinator, input = ANCESTOR_OF, remainder 173 | } else { 174 | break 175 | } 176 | x, input = sequence(matchers, input, combinator) 177 | xs = append(xs, x) 178 | } 179 | return strings.Join(xs, ""), input 180 | } 181 | 182 | func sequence(matchers []*rubex.Regexp, input []byte, combinator Lexeme) (string, []byte) { 183 | _, input = token(matchers, SPACES, input) 184 | x, ps := "", []string{} 185 | 186 | switch combinator { 187 | case ANCESTOR_OF: 188 | x = "/descendant-or-self::*/*" 189 | case PARENT_OF: 190 | x = "/child::*" 191 | case PRECEDES: 192 | x = "/following-sibling::*" 193 | case ADJACENT_TO: 194 | x = "/following-sibling::*" 195 | ps = append(ps, "position()=1") 196 | } 197 | 198 | if e, remainder := token(matchers, ELEMENT, input); e != nil { 199 | input = remainder 200 | if len(ps) > 0 { 201 | ps = append(ps, " and ") 202 | } 203 | ps = append(ps, "self::"+string(e)) 204 | if !(peek(matchers, ID, input) || peek(matchers, CLASS, input) || peek(matchers, PSEUDO_CLASS, input) || peek(matchers, LBRACKET, input)) { 205 | pstr := strings.Join(ps, "") 206 | if pstr != "" { 207 | pstr = fmt.Sprintf("[%s]", pstr) 208 | } 209 | return x + pstr, input 210 | } 211 | } 212 | q, input, connective := qualifier(matchers, input) 213 | if q == "" { 214 | panic("Invalid CSS selector") 215 | } 216 | if len(ps) > 0 { 217 | ps = append(ps, connective) 218 | } 219 | ps = append(ps, q) 220 | for q, r, c := qualifier(matchers, input); q != ""; q, r, c = qualifier(matchers, input) { 221 | ps, input = append(ps, c, q), r 222 | } 223 | pstr := strings.Join(ps, "") 224 | if combinator != NOT { 225 | pstr = fmt.Sprintf("[%s]", pstr) 226 | } 227 | return x + pstr, input 228 | } 229 | 230 | func qualifier(matchers []*rubex.Regexp, input []byte) (string, []byte, string) { 231 | p, connective := "", "" 232 | if t, remainder := token(matchers, CLASS, input); t != nil { 233 | p = fmt.Sprintf(`contains(concat(" ", @class, " "), " %s ")`, string(t[1:])) 234 | input = remainder 235 | connective = " and " 236 | } else if t, remainder := token(matchers, ID, input); t != nil { 237 | p, input, connective = fmt.Sprintf(`@id="%s"`, string(t[1:])), remainder, " and " 238 | } else if peek(matchers, PSEUDO_CLASS, input) { 239 | p, input, connective = pseudoClass(matchers, input) 240 | } else if peek(matchers, LBRACKET, input) { 241 | p, input = attribute(matchers, input) 242 | connective = " and " 243 | } 244 | return p, input, connective 245 | } 246 | 247 | func pseudoClass(matchers []*rubex.Regexp, input []byte) (string, []byte, string) { 248 | class, input := token(matchers, PSEUDO_CLASS, input) 249 | var p, connective string 250 | switch string(class) { 251 | case ":first-child": 252 | p, connective = "position()=1", " and " 253 | case ":first-of-type": 254 | p, connective = "position()=1", "][" 255 | case ":last-child": 256 | p, connective = "position()=last()", " and " 257 | case ":last-of-type": 258 | p, connective = "position()=last()", "][" 259 | case ":only-child": 260 | p, connective = "position() = 1 and position() = last()", " and " 261 | case ":only-of-type": 262 | p, connective = "position() = 1 and position() = last()", "][" 263 | case ":nth-child": 264 | p, input = nth(matchers, input) 265 | connective = " and " 266 | case ":nth-of-type": 267 | p, input = nth(matchers, input) 268 | connective = "][" 269 | case ":not": 270 | p, input = negate(matchers, input) 271 | connective = " and " 272 | default: 273 | panic(`Cannot convert CSS pseudo-class "` + string(class) + `" to XPath.`) 274 | } 275 | return p, input, connective 276 | } 277 | 278 | func nth(matchers []*rubex.Regexp, input []byte) (string, []byte) { 279 | lparen, input := token(matchers, LPAREN, input) 280 | if lparen == nil { 281 | panic(":nth-child and :nth-of-type require an parenthesized argument") 282 | } 283 | _, input = token(matchers, SPACES, input) 284 | var expr string 285 | if e, rem := token(matchers, EVEN, input); e != nil { 286 | expr, input = "position() mod 2 = 0", rem 287 | } else if e, rem := token(matchers, ODD, input); e != nil { 288 | expr, input = "position() mod 2 = 1", rem 289 | } else if e, _ := token(matchers, BINOMIAL, input); e != nil { 290 | var coefficient, operator, constant []byte 291 | coefficient, input = token(matchers, COEFFICIENT, input) 292 | switch string(coefficient) { 293 | case "", "+": 294 | coefficient = []byte("1") 295 | case "-": 296 | coefficient = []byte("-1") 297 | } 298 | _, input = token(matchers, N, input) 299 | _, input = token(matchers, SPACES, input) 300 | operator, input = token(matchers, OPERATOR, input) 301 | _, input = token(matchers, SPACES, input) 302 | constant, input = token(matchers, UNSIGNED, input) 303 | expr = fmt.Sprintf("(position() %s %s) mod %s = 0", invert(string(operator)), string(constant), string(coefficient)) 304 | } else if e, rem := token(matchers, SIGNED, input); e != nil { 305 | expr, input = "position() = "+string(e), rem 306 | } else { 307 | panic("Invalid argument to :nth-child or :nth-of-type.") 308 | } 309 | fmt.Println(string(input)) 310 | _, input = token(matchers, SPACES, input) 311 | rparen, input := token(matchers, RPAREN, input) 312 | if rparen == nil { 313 | panic("Unterminated argument to :nth-child or :nth-of-type.") 314 | } 315 | return expr, input 316 | } 317 | 318 | func invert(op string) string { 319 | op = strings.TrimSpace(op) 320 | if op == "+" { 321 | op = "-" 322 | } else { 323 | op = "+" 324 | } 325 | return op 326 | } 327 | 328 | func negate(matchers []*rubex.Regexp, input []byte) (string, []byte) { 329 | _, input = token(matchers, SPACES, input) 330 | lparen, input := token(matchers, LPAREN, input) 331 | if lparen == nil { 332 | panic(":not requires a parenthesized argument.") 333 | } 334 | _, input = token(matchers, SPACES, input) 335 | p, input := sequence(matchers, input, NOT) 336 | _, input = token(matchers, SPACES, input) 337 | rparen, input := token(matchers, RPAREN, input) 338 | if rparen == nil { 339 | panic("Unterminated argument to :not.") 340 | } 341 | return fmt.Sprintf("not(%s)", p), input 342 | } 343 | 344 | func attribute(matchers []*rubex.Regexp, input []byte) (string, []byte) { 345 | _, input = token(matchers, LBRACKET, input) 346 | _, input = token(matchers, SPACES, input) 347 | name, input := token(matchers, ATTR_NAME, input) 348 | if name == nil { 349 | panic("Attribute selector requires an attribute name.") 350 | } 351 | _, input = token(matchers, SPACES, input) 352 | if rbracket, remainder := token(matchers, RBRACKET, input); rbracket != nil { 353 | return "@" + string(name), remainder 354 | } 355 | op, input := token(matchers, MATCH_OP, input) 356 | if op == nil { 357 | panic("Missing operator in attribute selector.") 358 | } 359 | _, input = token(matchers, SPACES, input) 360 | val, input := token(matchers, ATTR_VALUE, input) 361 | if val == nil { 362 | panic("Missing value in attribute selector.") 363 | } 364 | _, input = token(matchers, SPACES, input) 365 | rbracket, input := token(matchers, RBRACKET, input) 366 | if rbracket == nil { 367 | panic("Unterminated attribute selector.") 368 | } 369 | var expr string 370 | n, v := string(name), string(val) 371 | switch string(op) { 372 | case "=": 373 | expr = fmt.Sprintf("@%s=%s", n, v) 374 | case "~=": 375 | expr = fmt.Sprintf(`contains(concat(" ", @%s, " "), concat(" ", %s, " "))`, n, v) 376 | case "|=": 377 | expr = fmt.Sprintf(`(@%s=%s or starts-with(@%s, concat(%s, "-")))`, n, v, n, v) 378 | case "^=": 379 | expr = fmt.Sprintf("starts-with(@%s, %s)", n, v) 380 | case "$=": 381 | // oy, libxml doesn't support ends-with 382 | // generate something like: div[substring(@class, string-length(@class) - string-length('foo') + 1) = 'foo'] 383 | expr = fmt.Sprintf("substring(@%s, string-length(@%s) - string-length(%s) + 1) = %s", n, n, v, v) 384 | case "*=": 385 | expr = fmt.Sprintf("contains(@%s, %s)", n, v) 386 | } 387 | return expr, input 388 | } 389 | 390 | func token(matchers []*rubex.Regexp, lexeme Lexeme, input []byte) ([]byte, []byte) { 391 | matched := matchers[lexeme].Find(input) 392 | length := len(matched) 393 | if length == 0 { 394 | matched = nil 395 | } 396 | return matched, input[length:] 397 | } 398 | 399 | func peek(matchers []*rubex.Regexp, lexeme Lexeme, input []byte) bool { 400 | matched, _ := token(matchers, lexeme, input) 401 | return matched != nil 402 | } 403 | -------------------------------------------------------------------------------- /xml/document.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | /* 4 | #cgo pkg-config: libxml-2.0 5 | 6 | #include "helper.h" 7 | */ 8 | import "C" 9 | 10 | import ( 11 | "errors" 12 | "github.com/moovweb/gokogiri/help" 13 | . "github.com/moovweb/gokogiri/util" 14 | "github.com/moovweb/gokogiri/xpath" 15 | "os" 16 | "unsafe" 17 | ) 18 | 19 | type Document interface { 20 | /* Nokogiri APIs */ 21 | CreateElementNode(string) *ElementNode 22 | CreateCDataNode(string) *CDataNode 23 | CreateTextNode(string) *TextNode 24 | CreateCommentNode(string) *CommentNode 25 | CreatePINode(string, string) *ProcessingInstructionNode 26 | ParseFragment([]byte, []byte, ParseOption) (*DocumentFragment, error) 27 | 28 | DocPtr() unsafe.Pointer 29 | DocType() NodeType 30 | DocRef() Document 31 | InputEncoding() []byte 32 | OutputEncoding() []byte 33 | DocXPathCtx() *xpath.XPath 34 | AddUnlinkedNode(unsafe.Pointer) 35 | RemoveUnlinkedNode(unsafe.Pointer) bool 36 | Free() 37 | String() string 38 | Root() *ElementNode 39 | NodeById(string) *ElementNode 40 | BookkeepFragment(*DocumentFragment) 41 | 42 | RecursivelyRemoveNamespaces() error 43 | UnparsedEntityURI(string) string 44 | Uri() string 45 | } 46 | 47 | // ParseOption values allow you to tune the behaviour of the parsing engine. 48 | type ParseOption int 49 | 50 | const ( 51 | XML_PARSE_RECOVER ParseOption = 1 << iota // recover on errors 52 | XML_PARSE_NOENT // substitute entities 53 | XML_PARSE_DTDLOAD // load the external subset 54 | XML_PARSE_DTDATTR // default DTD attributes 55 | XML_PARSE_DTDVALID // validate with the DTD 56 | XML_PARSE_NOERROR // suppress error reports 57 | XML_PARSE_NOWARNING // suppress warning reports 58 | XML_PARSE_PEDANTIC // pedantic error reporting 59 | XML_PARSE_NOBLANKS // remove blank nodes 60 | XML_PARSE_SAX1 // use the SAX1 interface internally 61 | XML_PARSE_XINCLUDE // Implement XInclude substitition 62 | XML_PARSE_NONET // Forbid network access 63 | XML_PARSE_NODICT // Do not reuse the context dictionnary 64 | XML_PARSE_NSCLEAN // remove redundant namespaces declarations 65 | XML_PARSE_NOCDATA // merge CDATA as text nodes 66 | XML_PARSE_NOXINCNODE // do not generate XINCLUDE START/END nodes 67 | XML_PARSE_COMPACT // compact small text nodes; makes tree read-only 68 | XML_PARSE_OLD10 // parse using XML-1.0 before update 5 69 | XML_PARSE_NOBASEFIX // do not fixup XINCLUDE xml//base uris 70 | XML_PARSE_HUGE // relax any hardcoded limit from the parser 71 | XML_PARSE_OLDSAX // parse using SAX2 interface before 2.7.0 72 | XML_PARSE_IGNORE_ENC // ignore internal document encoding hint 73 | XML_PARSE_BIG_LINES // Store big lines numbers in text PSVI field 74 | ) 75 | 76 | //DefaultParseOption provides liberal parsing highly tolerant of invalid documents. Errors and warnings 77 | // are suppressed and the DTD is not processed. 78 | const DefaultParseOption ParseOption = XML_PARSE_RECOVER | 79 | XML_PARSE_NONET | 80 | XML_PARSE_NOERROR | 81 | XML_PARSE_NOWARNING 82 | 83 | //StrictParseOption provides standard-compliant parsing. The DTD is processed, entity 84 | // substitions are made, and errors and warnings are reported back. 85 | const StrictParseOption ParseOption = XML_PARSE_NOENT | 86 | XML_PARSE_DTDLOAD | 87 | XML_PARSE_DTDATTR | 88 | XML_PARSE_NOCDATA 89 | 90 | //DefaultEncoding is UTF-8, which is also the default for both libxml2 and Go strings. 91 | const DefaultEncoding = "utf-8" 92 | 93 | var ERR_FAILED_TO_PARSE_XML = errors.New("failed to parse xml input") 94 | 95 | /* 96 | XmlDocument is the primary interface for working with XML documents. 97 | */ 98 | type XmlDocument struct { 99 | Ptr *C.xmlDoc 100 | Me Document 101 | Node 102 | InEncoding []byte 103 | OutEncoding []byte 104 | UnlinkedNodes map[*C.xmlNode]bool 105 | XPathCtx *xpath.XPath 106 | Type NodeType 107 | InputLen int 108 | 109 | fragments []*DocumentFragment //save the pointers to free them when the doc is freed 110 | } 111 | 112 | //DefaultEncodingBytes allows us to conveniently pass the DefaultEncoding to various functions that 113 | // expect the encoding as a byte array. 114 | var DefaultEncodingBytes = []byte(DefaultEncoding) 115 | 116 | const initialFragments = 2 117 | 118 | //NewDocument wraps the pointer to the C struct. 119 | // 120 | // TODO: this should probably not be exported. 121 | func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *XmlDocument) { 122 | inEncoding = AppendCStringTerminator(inEncoding) 123 | outEncoding = AppendCStringTerminator(outEncoding) 124 | 125 | xmlNode := &XmlNode{Ptr: (*C.xmlNode)(p)} 126 | docPtr := (*C.xmlDoc)(p) 127 | doc = &XmlDocument{Ptr: docPtr, Node: xmlNode, InEncoding: inEncoding, OutEncoding: outEncoding, InputLen: contentLen} 128 | doc.UnlinkedNodes = make(map[*C.xmlNode]bool) 129 | doc.XPathCtx = xpath.NewXPath(p) 130 | doc.Type = xmlNode.NodeType() 131 | doc.fragments = make([]*DocumentFragment, 0, initialFragments) 132 | doc.Me = doc 133 | xmlNode.Document = doc 134 | //runtime.SetFinalizer(doc, (*XmlDocument).Free) 135 | return 136 | } 137 | 138 | // Parse creates an XmlDocument from some pre-existing content where the input encoding is known. Byte arrays created from 139 | // a Go string are utf-8 encoded (you can pass DefaultEncodingBytes in this scenario). 140 | // 141 | // If you want to build up a document programatically, calling CreateEmptyDocument and building it up using the xml.Node 142 | // interface is a better approach than building a string and calling Parse. 143 | // 144 | // If you have an XML file, then ReadFile will automatically determine the encoding according to the XML specification. 145 | func Parse(content, inEncoding, url []byte, options ParseOption, outEncoding []byte) (doc *XmlDocument, err error) { 146 | inEncoding = AppendCStringTerminator(inEncoding) 147 | outEncoding = AppendCStringTerminator(outEncoding) 148 | 149 | var docPtr *C.xmlDoc 150 | contentLen := len(content) 151 | 152 | if contentLen > 0 { 153 | var contentPtr, urlPtr, encodingPtr unsafe.Pointer 154 | contentPtr = unsafe.Pointer(&content[0]) 155 | 156 | if len(url) > 0 { 157 | url = AppendCStringTerminator(url) 158 | urlPtr = unsafe.Pointer(&url[0]) 159 | } 160 | if len(inEncoding) > 0 { 161 | encodingPtr = unsafe.Pointer(&inEncoding[0]) 162 | } 163 | 164 | docPtr = C.xmlParse(contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0) 165 | 166 | if docPtr == nil { 167 | err = ERR_FAILED_TO_PARSE_XML 168 | } else { 169 | doc = NewDocument(unsafe.Pointer(docPtr), contentLen, inEncoding, outEncoding) 170 | } 171 | 172 | } else { 173 | doc = CreateEmptyDocument(inEncoding, outEncoding) 174 | } 175 | return 176 | } 177 | 178 | // ReadFile loads an XmlDocument from a filename. The encoding declared in the document will be 179 | // used as the input encoding. If no encoding is declared, the library will use the alogrithm 180 | // in the XML standard to determine if the document is encoded with UTF-8 or UTF-16. 181 | func ReadFile(filename string, options ParseOption) (doc *XmlDocument, err error) { 182 | // verify the file exists and can be read before we invoke C API 183 | _, err = os.Stat(filename) 184 | if err != nil { 185 | return 186 | } 187 | 188 | dataBytes := GetCString([]byte(filename)) 189 | dataPtr := unsafe.Pointer(&dataBytes[0]) 190 | var docPtr *C.xmlDoc 191 | docPtr = C.xmlReadFile((*C.char)(dataPtr), nil, C.int(options)) 192 | if docPtr == nil { 193 | err = ERR_FAILED_TO_PARSE_XML 194 | } else { 195 | var encoding []byte 196 | // capture the detected input encoding 197 | p := docPtr.encoding 198 | if p != nil { 199 | encoding = []byte(C.GoString((*C.char)(unsafe.Pointer(p)))) 200 | } 201 | doc = NewDocument(unsafe.Pointer(docPtr), 0, encoding, DefaultEncodingBytes) 202 | } 203 | return 204 | } 205 | 206 | // Create an empty XML document and return an XmlDocument. The root element, along with 207 | // any top-level comments or processing instructions, can be added by calling 208 | // AddChild() on the document itself. 209 | func CreateEmptyDocument(inEncoding, outEncoding []byte) (doc *XmlDocument) { 210 | help.LibxmlInitParser() 211 | docPtr := C.newEmptyXmlDoc() 212 | doc = NewDocument(unsafe.Pointer(docPtr), 0, inEncoding, outEncoding) 213 | return 214 | } 215 | 216 | // DocPtr provides access to the libxml2 structure underlying the document. 217 | func (document *XmlDocument) DocPtr() (ptr unsafe.Pointer) { 218 | ptr = unsafe.Pointer(document.Ptr) 219 | return 220 | } 221 | 222 | // DocType returns one of the node type constants, usually XML_DOCUMENT_NODE. This 223 | // may be of use if you are working with the C API. 224 | func (document *XmlDocument) DocType() (t NodeType) { 225 | t = document.Type 226 | return 227 | } 228 | 229 | // DocRef returns the embedded Document interface. 230 | func (document *XmlDocument) DocRef() (d Document) { 231 | d = document.Me 232 | return 233 | } 234 | 235 | // InputEncoding is the original encoding of the document. 236 | func (document *XmlDocument) InputEncoding() (encoding []byte) { 237 | encoding = document.InEncoding 238 | return 239 | } 240 | 241 | // OutputEncoding is the encoding that will be used when the document is written out. 242 | // This can be overridden by explicitly specifying an encoding as an argument to any of the 243 | // output functions. 244 | func (document *XmlDocument) OutputEncoding() (encoding []byte) { 245 | encoding = document.OutEncoding 246 | return 247 | } 248 | 249 | // Returns an XPath context that can be used to compile and evaluate XPath 250 | // expressions. 251 | // 252 | // In most cases, you should call the Search or EvalXPath functions instead of 253 | // handling the context directly. 254 | func (document *XmlDocument) DocXPathCtx() (ctx *xpath.XPath) { 255 | ctx = document.XPathCtx 256 | return 257 | } 258 | 259 | func (document *XmlDocument) AddUnlinkedNode(nodePtr unsafe.Pointer) { 260 | p := (*C.xmlNode)(nodePtr) 261 | document.UnlinkedNodes[p] = true 262 | } 263 | 264 | func (document *XmlDocument) RemoveUnlinkedNode(nodePtr unsafe.Pointer) bool { 265 | p := (*C.xmlNode)(nodePtr) 266 | if document.UnlinkedNodes[p] { 267 | delete(document.UnlinkedNodes, p) 268 | return true 269 | } 270 | return false 271 | } 272 | 273 | func (document *XmlDocument) BookkeepFragment(fragment *DocumentFragment) { 274 | document.fragments = append(document.fragments, fragment) 275 | } 276 | 277 | // Root returns the root node of the document. Newly created documents do not 278 | // have a root node until an element node is added a child of the document. 279 | // 280 | // Documents that have multiple root nodes are invalid adn the behaviour is 281 | // not well defined. 282 | func (document *XmlDocument) Root() (element *ElementNode) { 283 | nodePtr := C.xmlDocGetRootElement(document.Ptr) 284 | if nodePtr != nil { 285 | element = NewNode(unsafe.Pointer(nodePtr), document).(*ElementNode) 286 | } 287 | return 288 | } 289 | 290 | // Get an element node by the value of its ID attribute. By convention this attribute 291 | // is named id, but the actual name of the attribute is set by the document's DTD or schema. 292 | // 293 | // The value for an ID attribute is guaranteed to be unique within a valid document. 294 | func (document *XmlDocument) NodeById(id string) (element *ElementNode) { 295 | dataBytes := GetCString([]byte(id)) 296 | dataPtr := unsafe.Pointer(&dataBytes[0]) 297 | nodePtr := C.xmlGetID(document.Ptr, (*C.xmlChar)(dataPtr)) 298 | if nodePtr != nil { 299 | idattr := NewNode(unsafe.Pointer(nodePtr), document).(*AttributeNode) 300 | element = idattr.Parent().(*ElementNode) 301 | } 302 | return 303 | } 304 | 305 | /* 306 | CreateElementNode creates an element node with the specified tag name. It can be 307 | added as a child of any other element, or as a child of the document itself. 308 | 309 | Use SetNamespace if the element node needs to be in a namespace. 310 | 311 | Note that valid documents have only one child element, referred to as the root node. 312 | */ 313 | func (document *XmlDocument) CreateElementNode(tag string) (element *ElementNode) { 314 | tagBytes := GetCString([]byte(tag)) 315 | tagPtr := unsafe.Pointer(&tagBytes[0]) 316 | newNodePtr := C.xmlNewNode(nil, (*C.xmlChar)(tagPtr)) 317 | newNode := NewNode(unsafe.Pointer(newNodePtr), document) 318 | element = newNode.(*ElementNode) 319 | return 320 | } 321 | 322 | //CreateTextNode creates a text node. It can be added as a child of an element. 323 | // 324 | // The data argument is XML-escaped and used as the content of the node. 325 | func (document *XmlDocument) CreateTextNode(data string) (text *TextNode) { 326 | dataBytes := GetCString([]byte(data)) 327 | dataPtr := unsafe.Pointer(&dataBytes[0]) 328 | nodePtr := C.xmlNewText((*C.xmlChar)(dataPtr)) 329 | if nodePtr != nil { 330 | nodePtr.doc = (*_Ctype_struct__xmlDoc)(document.DocPtr()) 331 | text = NewNode(unsafe.Pointer(nodePtr), document).(*TextNode) 332 | } 333 | return 334 | } 335 | 336 | //CreateCDataNode creates a CDATA node. CDATA nodes can 337 | // only be children of an element. 338 | // 339 | // The data argument will become the content of the newly created node. 340 | func (document *XmlDocument) CreateCDataNode(data string) (cdata *CDataNode) { 341 | dataLen := len(data) 342 | dataBytes := GetCString([]byte(data)) 343 | dataPtr := unsafe.Pointer(&dataBytes[0]) 344 | nodePtr := C.xmlNewCDataBlock(document.Ptr, (*C.xmlChar)(dataPtr), C.int(dataLen)) 345 | if nodePtr != nil { 346 | cdata = NewNode(unsafe.Pointer(nodePtr), document).(*CDataNode) 347 | } 348 | return 349 | } 350 | 351 | //CreateCommentNode creates a comment node. Comment nodes can 352 | // be children of an element or of the document itself. 353 | // 354 | // The data argument will become the content of the comment. 355 | func (document *XmlDocument) CreateCommentNode(data string) (comment *CommentNode) { 356 | dataBytes := GetCString([]byte(data)) 357 | dataPtr := unsafe.Pointer(&dataBytes[0]) 358 | nodePtr := C.xmlNewComment((*C.xmlChar)(dataPtr)) 359 | if nodePtr != nil { 360 | comment = NewNode(unsafe.Pointer(nodePtr), document).(*CommentNode) 361 | } 362 | return 363 | } 364 | 365 | //CreatePINode creates a processing instruction node with the specified name and data. 366 | // Processing instruction nodes can be children of an element or of the document itself. 367 | // 368 | // While it's common to use an attribute-like syntax for processing instructions, the data 369 | // is actually an arbitrary string that you will need to generate or parse yourself. 370 | func (document *XmlDocument) CreatePINode(name, data string) (pi *ProcessingInstructionNode) { 371 | nameBytes := GetCString([]byte(name)) 372 | namePtr := unsafe.Pointer(&nameBytes[0]) 373 | dataBytes := GetCString([]byte(data)) 374 | dataPtr := unsafe.Pointer(&dataBytes[0]) 375 | nodePtr := C.xmlNewDocPI(document.Ptr, (*C.xmlChar)(namePtr), (*C.xmlChar)(dataPtr)) 376 | if nodePtr != nil { 377 | pi = NewNode(unsafe.Pointer(nodePtr), document).(*ProcessingInstructionNode) 378 | } 379 | return 380 | } 381 | 382 | func (document *XmlDocument) ParseFragment(input, url []byte, options ParseOption) (fragment *DocumentFragment, err error) { 383 | root := document.Root() 384 | if root == nil { 385 | fragment, err = parsefragment(document, nil, input, url, options) 386 | } else { 387 | fragment, err = parsefragment(document, root.XmlNode, input, url, options) 388 | } 389 | return 390 | } 391 | 392 | // Return the value of an NDATA entity declared in the DTD. If there is no such entity or 393 | // the value cannot be encoded as a valid URI, an empty string is returned. 394 | // 395 | // Note that this library assumes you already know the name of entity and does not 396 | // expose any way of getting the list of entities. 397 | func (document *XmlDocument) UnparsedEntityURI(name string) (val string) { 398 | if name == "" { 399 | return 400 | } 401 | 402 | nameBytes := GetCString([]byte(name)) 403 | namePtr := unsafe.Pointer(&nameBytes[0]) 404 | entity := C.xmlGetDocEntity(document.Ptr, (*C.xmlChar)(namePtr)) 405 | if entity == nil { 406 | return 407 | } 408 | 409 | // unlike entity.content (which returns the raw, unprocessed string value of the entity), 410 | // it looks like entity.URI includes any escaping needed to treat the value as a URI. 411 | valPtr := unsafe.Pointer(entity.URI) 412 | if valPtr == nil { 413 | return 414 | } 415 | 416 | val = C.GoString((*C.char)(valPtr)) 417 | return 418 | } 419 | 420 | // Free the C structures associated with this document. 421 | func (document *XmlDocument) Free() { 422 | //must free the xpath context before freeing the fragments or unlinked nodes 423 | //otherwise, it causes memory leaks and crashes when dealing with very large documents (a few MB) 424 | if document.XPathCtx != nil { 425 | document.XPathCtx.Free() 426 | document.XPathCtx = nil 427 | } 428 | //must clear the fragments first 429 | //because the nodes are put in the unlinked list 430 | if document.fragments != nil { 431 | for _, fragment := range document.fragments { 432 | fragment.Remove() 433 | } 434 | } 435 | document.fragments = nil 436 | var p *C.xmlNode 437 | if document.UnlinkedNodes != nil { 438 | for p, _ = range document.UnlinkedNodes { 439 | C.xmlFreeNode(p) 440 | } 441 | } 442 | document.UnlinkedNodes = nil 443 | if document.Ptr != nil { 444 | C.xmlFreeDoc(document.Ptr) 445 | document.Ptr = nil 446 | } 447 | } 448 | 449 | /* Uri returns the URI of the document - typically this is the filename if ReadFile was used to parse 450 | the document. 451 | */ 452 | func (document *XmlDocument) Uri() (val string) { 453 | val = C.GoString((*C.char)(unsafe.Pointer(document.Ptr.URL))) 454 | return 455 | } 456 | -------------------------------------------------------------------------------- /xml/node.go: -------------------------------------------------------------------------------- 1 | package xml 2 | 3 | //#include "helper.h" 4 | //#include 5 | import "C" 6 | 7 | import ( 8 | "errors" 9 | . "github.com/moovweb/gokogiri/util" 10 | "github.com/moovweb/gokogiri/xpath" 11 | "strconv" 12 | "unsafe" 13 | ) 14 | 15 | var ( 16 | ERR_UNDEFINED_COERCE_PARAM = errors.New("unexpected parameter type in coerce") 17 | ERR_UNDEFINED_SET_CONTENT_PARAM = errors.New("unexpected parameter type in SetContent") 18 | ERR_UNDEFINED_SEARCH_PARAM = errors.New("unexpected parameter type in Search") 19 | ERR_CANNOT_MAKE_DUCMENT_AS_CHILD = errors.New("cannot add a document node as a child") 20 | ERR_CANNOT_COPY_TEXT_NODE_WHEN_ADD_CHILD = errors.New("cannot copy a text node when adding it") 21 | ) 22 | 23 | // NodeType is an enumeration that indicates the type of XmlNode. 24 | type NodeType int 25 | 26 | const ( 27 | XML_ELEMENT_NODE NodeType = iota + 1 28 | XML_ATTRIBUTE_NODE 29 | XML_TEXT_NODE 30 | XML_CDATA_SECTION_NODE 31 | XML_ENTITY_REF_NODE 32 | XML_ENTITY_NODE 33 | XML_PI_NODE 34 | XML_COMMENT_NODE 35 | XML_DOCUMENT_NODE 36 | XML_DOCUMENT_TYPE_NODE 37 | XML_DOCUMENT_FRAG_NODE 38 | XML_NOTATION_NODE 39 | XML_HTML_DOCUMENT_NODE 40 | XML_DTD_NODE 41 | XML_ELEMENT_DECL 42 | XML_ATTRIBUTE_DECL 43 | XML_ENTITY_DECL 44 | XML_NAMESPACE_DECL 45 | XML_XINCLUDE_START 46 | XML_XINCLUDE_END 47 | XML_DOCB_DOCUMENT_NODE 48 | ) 49 | 50 | // SerializationOption is a set of flags used to control how a node is written out. 51 | type SerializationOption int 52 | 53 | const ( 54 | XML_SAVE_FORMAT SerializationOption = 1 << iota // format save output 55 | XML_SAVE_NO_DECL //drop the xml declaration 56 | XML_SAVE_NO_EMPTY //no empty tags 57 | XML_SAVE_NO_XHTML //disable XHTML1 specific rules 58 | XML_SAVE_XHTML //force XHTML1 specific rules 59 | XML_SAVE_AS_XML //force XML serialization on HTML doc 60 | XML_SAVE_AS_HTML //force HTML serialization on XML doc 61 | XML_SAVE_WSNONSIG //format with non-significant whitespace 62 | ) 63 | 64 | // NamespaceDeclaration represents a namespace declaration, providing both the prefix and the URI of the namespace. 65 | // It is returned by the DeclaredNamespaces function. 66 | type NamespaceDeclaration struct { 67 | Prefix string 68 | Uri string 69 | } 70 | 71 | type Node interface { 72 | NodePtr() unsafe.Pointer 73 | ResetNodePtr() 74 | MyDocument() Document 75 | 76 | IsValid() bool 77 | 78 | ParseFragment([]byte, []byte, ParseOption) (*DocumentFragment, error) 79 | LineNumber() int 80 | 81 | // 82 | NodeType() NodeType 83 | NextSibling() Node 84 | PreviousSibling() Node 85 | 86 | Parent() Node 87 | FirstChild() Node 88 | LastChild() Node 89 | CountChildren() int 90 | Attributes() map[string]*AttributeNode 91 | 92 | Coerce(interface{}) ([]Node, error) 93 | 94 | AddChild(interface{}) error 95 | AddPreviousSibling(interface{}) error 96 | AddNextSibling(interface{}) error 97 | InsertBefore(interface{}) error 98 | InsertAfter(interface{}) error 99 | InsertBegin(interface{}) error 100 | InsertEnd(interface{}) error 101 | SetInnerHtml(interface{}) error 102 | SetChildren(interface{}) error 103 | Replace(interface{}) error 104 | Wrap(string) error 105 | 106 | SetContent(interface{}) error 107 | 108 | Name() string 109 | SetName(string) 110 | 111 | Attr(string) string 112 | SetAttr(string, string) string 113 | SetNsAttr(string, string, string) string 114 | Attribute(string) *AttributeNode 115 | 116 | Path() string 117 | 118 | Duplicate(int) Node 119 | DuplicateTo(Document, int) Node 120 | 121 | Search(interface{}) ([]Node, error) 122 | SearchWithVariables(interface{}, xpath.VariableScope) ([]Node, error) 123 | EvalXPath(interface{}, xpath.VariableScope) (interface{}, error) 124 | EvalXPathAsBoolean(interface{}, xpath.VariableScope) bool 125 | 126 | Unlink() 127 | Remove() 128 | ResetChildren() 129 | 130 | SerializeWithFormat(SerializationOption, []byte, []byte) ([]byte, int) 131 | ToXml([]byte, []byte) ([]byte, int) 132 | ToUnformattedXml() string 133 | ToHtml([]byte, []byte) ([]byte, int) 134 | ToBuffer([]byte) []byte 135 | String() string 136 | Content() string 137 | InnerHtml() string 138 | 139 | RecursivelyRemoveNamespaces() error 140 | Namespace() string 141 | SetNamespace(string, string) 142 | DeclareNamespace(string, string) 143 | RemoveDefaultNamespace() 144 | DeclaredNamespaces() []NamespaceDeclaration 145 | } 146 | 147 | //run out of memory 148 | var ErrTooLarge = errors.New("Output buffer too large") 149 | 150 | //pre-allocate a buffer for serializing the document 151 | const initialOutputBufferSize = 10 //100K 152 | 153 | /* 154 | XmlNode implements the Node interface, and as such is the heart of the API. 155 | */ 156 | type XmlNode struct { 157 | Ptr *C.xmlNode 158 | Document 159 | valid bool 160 | } 161 | 162 | type WriteBuffer struct { 163 | Node *XmlNode 164 | Buffer []byte 165 | Offset int 166 | } 167 | 168 | // NewNode takes a C pointer from the libxml2 library and returns a Node instance of 169 | // the appropriate type. 170 | func NewNode(nodePtr unsafe.Pointer, document Document) (node Node) { 171 | if nodePtr == nil { 172 | return nil 173 | } 174 | xmlNode := &XmlNode{ 175 | Ptr: (*C.xmlNode)(nodePtr), 176 | Document: document, 177 | valid: true, 178 | } 179 | nodeType := NodeType(C.getNodeType((*C.xmlNode)(nodePtr))) 180 | 181 | switch nodeType { 182 | default: 183 | node = xmlNode 184 | case XML_ATTRIBUTE_NODE: 185 | node = &AttributeNode{XmlNode: xmlNode} 186 | case XML_ELEMENT_NODE: 187 | node = &ElementNode{XmlNode: xmlNode} 188 | case XML_CDATA_SECTION_NODE: 189 | node = &CDataNode{XmlNode: xmlNode} 190 | case XML_COMMENT_NODE: 191 | node = &CommentNode{XmlNode: xmlNode} 192 | case XML_PI_NODE: 193 | node = &ProcessingInstructionNode{XmlNode: xmlNode} 194 | case XML_TEXT_NODE: 195 | node = &TextNode{XmlNode: xmlNode} 196 | } 197 | return 198 | } 199 | 200 | func (xmlNode *XmlNode) coerce(data interface{}) (nodes []Node, err error) { 201 | switch t := data.(type) { 202 | default: 203 | err = ERR_UNDEFINED_COERCE_PARAM 204 | case []Node: 205 | nodes = t 206 | case *DocumentFragment: 207 | nodes = t.Children() 208 | case string: 209 | f, err := xmlNode.MyDocument().ParseFragment([]byte(t), nil, DefaultParseOption) 210 | if err == nil { 211 | nodes = f.Children() 212 | } 213 | case []byte: 214 | f, err := xmlNode.MyDocument().ParseFragment(t, nil, DefaultParseOption) 215 | if err == nil { 216 | nodes = f.Children() 217 | } 218 | } 219 | return 220 | } 221 | 222 | func (xmlNode *XmlNode) Coerce(data interface{}) (nodes []Node, err error) { 223 | return xmlNode.coerce(data) 224 | } 225 | 226 | // Add a node as a child of the current node. 227 | // Passing in a nodeset will add all the nodes as children of the current node. 228 | func (xmlNode *XmlNode) AddChild(data interface{}) (err error) { 229 | switch t := data.(type) { 230 | default: 231 | if nodes, err := xmlNode.coerce(data); err == nil { 232 | for _, node := range nodes { 233 | if err = xmlNode.addChild(node); err != nil { 234 | break 235 | } 236 | } 237 | } 238 | case *DocumentFragment: 239 | if nodes, err := xmlNode.coerce(data); err == nil { 240 | for _, node := range nodes { 241 | if err = xmlNode.addChild(node); err != nil { 242 | break 243 | } 244 | } 245 | } 246 | case Node: 247 | err = xmlNode.addChild(t) 248 | } 249 | return 250 | } 251 | 252 | // Insert a node immediately before this node in the document. 253 | // Passing in a nodeset will add all the nodes, in order. 254 | func (xmlNode *XmlNode) AddPreviousSibling(data interface{}) (err error) { 255 | switch t := data.(type) { 256 | default: 257 | if nodes, err := xmlNode.coerce(data); err == nil { 258 | for _, node := range nodes { 259 | if err = xmlNode.addPreviousSibling(node); err != nil { 260 | break 261 | } 262 | } 263 | } 264 | case *DocumentFragment: 265 | if nodes, err := xmlNode.coerce(data); err == nil { 266 | for _, node := range nodes { 267 | if err = xmlNode.addPreviousSibling(node); err != nil { 268 | break 269 | } 270 | } 271 | } 272 | case Node: 273 | err = xmlNode.addPreviousSibling(t) 274 | } 275 | return 276 | } 277 | 278 | // Insert a node immediately after this node in the document. 279 | // Passing in a nodeset will add all the nodes, in order. 280 | func (xmlNode *XmlNode) AddNextSibling(data interface{}) (err error) { 281 | switch t := data.(type) { 282 | default: 283 | if nodes, err := xmlNode.coerce(data); err == nil { 284 | for i := len(nodes) - 1; i >= 0; i-- { 285 | node := nodes[i] 286 | if err = xmlNode.addNextSibling(node); err != nil { 287 | break 288 | } 289 | } 290 | } 291 | case *DocumentFragment: 292 | if nodes, err := xmlNode.coerce(data); err == nil { 293 | for i := len(nodes) - 1; i >= 0; i-- { 294 | node := nodes[i] 295 | if err = xmlNode.addNextSibling(node); err != nil { 296 | break 297 | } 298 | } 299 | } 300 | case Node: 301 | err = xmlNode.addNextSibling(t) 302 | } 303 | return 304 | } 305 | 306 | func (xmlNode *XmlNode) ResetNodePtr() { 307 | xmlNode.Ptr = nil 308 | return 309 | } 310 | 311 | // Returns true if the node is valid. Nodes become 312 | // invalid when Remove() is called. 313 | func (xmlNode *XmlNode) IsValid() bool { 314 | return xmlNode.valid 315 | } 316 | 317 | // Return the document containing this node. Removed or unlinked 318 | // nodes still have a document associated with them. 319 | func (xmlNode *XmlNode) MyDocument() (document Document) { 320 | document = xmlNode.Document.DocRef() 321 | return 322 | } 323 | 324 | // NodePtr returns a pointer to the underlying C struct. 325 | func (xmlNode *XmlNode) NodePtr() (p unsafe.Pointer) { 326 | p = unsafe.Pointer(xmlNode.Ptr) 327 | return 328 | } 329 | 330 | func (xmlNode *XmlNode) NodeType() (nodeType NodeType) { 331 | nodeType = NodeType(C.getNodeType(xmlNode.Ptr)) 332 | return 333 | } 334 | 335 | // Path returns an XPath expression that can be used to 336 | // select this node in the document. 337 | func (xmlNode *XmlNode) Path() (path string) { 338 | pathPtr := C.xmlGetNodePath(xmlNode.Ptr) 339 | if pathPtr != nil { 340 | p := (*C.char)(unsafe.Pointer(pathPtr)) 341 | defer C.xmlFreeChars(p) 342 | path = C.GoString(p) 343 | } 344 | return 345 | } 346 | 347 | // NextSibling returns the next sibling (if any) of the current node. 348 | // It is often used when iterating over the children of a node. 349 | func (xmlNode *XmlNode) NextSibling() Node { 350 | siblingPtr := (*C.xmlNode)(xmlNode.Ptr.next) 351 | return NewNode(unsafe.Pointer(siblingPtr), xmlNode.Document) 352 | } 353 | 354 | // PreviousSibling returns the previous sibling (if any) of the current node. 355 | // It is often used when iterating over the children of a node in reverse. 356 | func (xmlNode *XmlNode) PreviousSibling() Node { 357 | siblingPtr := (*C.xmlNode)(xmlNode.Ptr.prev) 358 | return NewNode(unsafe.Pointer(siblingPtr), xmlNode.Document) 359 | } 360 | 361 | // CountChildren returns the number of child nodes. 362 | func (xmlNode *XmlNode) CountChildren() int { 363 | return int(C.xmlLsCountNode(xmlNode.Ptr)) 364 | } 365 | 366 | func (xmlNode *XmlNode) FirstChild() Node { 367 | return NewNode(unsafe.Pointer(xmlNode.Ptr.children), xmlNode.Document) 368 | } 369 | 370 | func (xmlNode *XmlNode) LastChild() Node { 371 | return NewNode(unsafe.Pointer(xmlNode.Ptr.last), xmlNode.Document) 372 | } 373 | 374 | /* 375 | Parent returns the parent of the current node (or nil if there isn't one). 376 | This will always be an element or document node, as those are the only node types 377 | that can have children. 378 | */ 379 | func (xmlNode *XmlNode) Parent() Node { 380 | if C.xmlNodePtrCheck(unsafe.Pointer(xmlNode.Ptr.parent)) == C.int(0) { 381 | return nil 382 | } 383 | return NewNode(unsafe.Pointer(xmlNode.Ptr.parent), xmlNode.Document) 384 | } 385 | 386 | func (xmlNode *XmlNode) ResetChildren() { 387 | var p unsafe.Pointer 388 | for childPtr := xmlNode.Ptr.children; childPtr != nil; { 389 | nextPtr := childPtr.next 390 | p = unsafe.Pointer(childPtr) 391 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(p)) 392 | xmlNode.Document.AddUnlinkedNode(p) 393 | childPtr = nextPtr 394 | } 395 | } 396 | 397 | func (xmlNode *XmlNode) SetContent(content interface{}) (err error) { 398 | switch data := content.(type) { 399 | default: 400 | err = ERR_UNDEFINED_SET_CONTENT_PARAM 401 | case string: 402 | err = xmlNode.SetContent([]byte(data)) 403 | case []byte: 404 | contentBytes := GetCString(data) 405 | contentPtr := unsafe.Pointer(&contentBytes[0]) 406 | C.xmlSetContent(unsafe.Pointer(xmlNode), unsafe.Pointer(xmlNode.Ptr), contentPtr) 407 | } 408 | return 409 | } 410 | 411 | func (xmlNode *XmlNode) InsertBefore(data interface{}) (err error) { 412 | err = xmlNode.AddPreviousSibling(data) 413 | return 414 | } 415 | 416 | func (xmlNode *XmlNode) InsertAfter(data interface{}) (err error) { 417 | err = xmlNode.AddNextSibling(data) 418 | return 419 | } 420 | 421 | func (xmlNode *XmlNode) InsertBegin(data interface{}) (err error) { 422 | if parent := xmlNode.Parent(); parent != nil { 423 | if last := parent.LastChild(); last != nil { 424 | err = last.AddPreviousSibling(data) 425 | } 426 | } 427 | return 428 | } 429 | 430 | func (xmlNode *XmlNode) InsertEnd(data interface{}) (err error) { 431 | if parent := xmlNode.Parent(); parent != nil { 432 | if first := parent.FirstChild(); first != nil { 433 | err = first.AddPreviousSibling(data) 434 | } 435 | } 436 | return 437 | } 438 | 439 | func (xmlNode *XmlNode) SetChildren(data interface{}) (err error) { 440 | nodes, err := xmlNode.coerce(data) 441 | if err != nil { 442 | return 443 | } 444 | xmlNode.ResetChildren() 445 | err = xmlNode.AddChild(nodes) 446 | return nil 447 | } 448 | 449 | func (xmlNode *XmlNode) SetInnerHtml(data interface{}) (err error) { 450 | err = xmlNode.SetChildren(data) 451 | return 452 | } 453 | 454 | func (xmlNode *XmlNode) Replace(data interface{}) (err error) { 455 | err = xmlNode.AddPreviousSibling(data) 456 | if err != nil { 457 | return 458 | } 459 | xmlNode.Remove() 460 | return 461 | } 462 | 463 | func (xmlNode *XmlNode) Attributes() (attributes map[string]*AttributeNode) { 464 | attributes = make(map[string]*AttributeNode) 465 | for prop := xmlNode.Ptr.properties; prop != nil; prop = prop.next { 466 | if prop.name != nil { 467 | namePtr := unsafe.Pointer(prop.name) 468 | name := C.GoString((*C.char)(namePtr)) 469 | attrPtr := unsafe.Pointer(prop) 470 | attributeNode := NewNode(attrPtr, xmlNode.Document) 471 | if attr, ok := attributeNode.(*AttributeNode); ok { 472 | attributes[name] = attr 473 | } 474 | } 475 | } 476 | return 477 | } 478 | 479 | // Return the attribute node, or nil if the attribute does not exist. 480 | func (xmlNode *XmlNode) Attribute(name string) (attribute *AttributeNode) { 481 | if xmlNode.NodeType() != XML_ELEMENT_NODE { 482 | return 483 | } 484 | nameBytes := GetCString([]byte(name)) 485 | namePtr := unsafe.Pointer(&nameBytes[0]) 486 | attrPtr := C.xmlHasNsProp(xmlNode.Ptr, (*C.xmlChar)(namePtr), nil) 487 | if attrPtr == nil { 488 | return 489 | } else { 490 | node := NewNode(unsafe.Pointer(attrPtr), xmlNode.Document) 491 | if node, ok := node.(*AttributeNode); ok { 492 | attribute = node 493 | } 494 | } 495 | return 496 | } 497 | 498 | // Attr returns the value of an attribute. 499 | 500 | // If you need to check for the existence of an attribute, 501 | // use Attribute. 502 | func (xmlNode *XmlNode) Attr(name string) (val string) { 503 | if xmlNode.NodeType() != XML_ELEMENT_NODE { 504 | return 505 | } 506 | nameBytes := GetCString([]byte(name)) 507 | namePtr := unsafe.Pointer(&nameBytes[0]) 508 | valPtr := C.xmlGetProp(xmlNode.Ptr, (*C.xmlChar)(namePtr)) 509 | if valPtr == nil { 510 | return 511 | } 512 | p := unsafe.Pointer(valPtr) 513 | defer C.xmlFreeChars((*C.char)(p)) 514 | val = C.GoString((*C.char)(p)) 515 | return 516 | } 517 | 518 | // SetAttr sets the value of an attribute. If the attribute is in a namespace, 519 | // use SetNsAttr instead. 520 | 521 | // While this call accepts QNames for the name parameter, it does not check 522 | // their validity. 523 | 524 | // Attributes such as "xml:lang" or "xml:space" are not is a formal namespace 525 | // and should be set by calling SetAttr with the prefix as part of the name. 526 | func (xmlNode *XmlNode) SetAttr(name, value string) (val string) { 527 | val = value 528 | if xmlNode.NodeType() != XML_ELEMENT_NODE { 529 | return 530 | } 531 | nameBytes := GetCString([]byte(name)) 532 | namePtr := unsafe.Pointer(&nameBytes[0]) 533 | 534 | valueBytes := GetCString([]byte(value)) 535 | valuePtr := unsafe.Pointer(&valueBytes[0]) 536 | 537 | C.xmlSetProp(xmlNode.Ptr, (*C.xmlChar)(namePtr), (*C.xmlChar)(valuePtr)) 538 | return 539 | } 540 | 541 | // SetNsAttr sets the value of a namespaced attribute. 542 | 543 | // Attributes such as "xml:lang" or "xml:space" are not is a formal namespace 544 | // and should be set by calling SetAttr with the xml prefix as part of the name. 545 | 546 | // The namespace should already be declared and in-scope when SetNsAttr is called. 547 | // This restriction will be lifted in a future version. 548 | func (xmlNode *XmlNode) SetNsAttr(href, name, value string) (val string) { 549 | val = value 550 | if xmlNode.NodeType() != XML_ELEMENT_NODE { 551 | return 552 | } 553 | nameBytes := GetCString([]byte(name)) 554 | namePtr := unsafe.Pointer(&nameBytes[0]) 555 | 556 | valueBytes := GetCString([]byte(value)) 557 | valuePtr := unsafe.Pointer(&valueBytes[0]) 558 | 559 | hrefBytes := GetCString([]byte(href)) 560 | hrefPtr := unsafe.Pointer(&hrefBytes[0]) 561 | 562 | ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr)) 563 | if ns == nil { 564 | return 565 | } 566 | 567 | C.xmlSetNsProp(xmlNode.Ptr, ns, (*C.xmlChar)(namePtr), (*C.xmlChar)(valuePtr)) 568 | return 569 | } 570 | 571 | // Search for nodes that match an XPath. This is the simplest way to look for nodes. 572 | func (xmlNode *XmlNode) Search(data interface{}) (result []Node, err error) { 573 | switch data := data.(type) { 574 | default: 575 | err = ERR_UNDEFINED_SEARCH_PARAM 576 | case string: 577 | if xpathExpr := xpath.Compile(data); xpathExpr != nil { 578 | defer xpathExpr.Free() 579 | result, err = xmlNode.Search(xpathExpr) 580 | } else { 581 | err = errors.New("cannot compile xpath: " + data) 582 | } 583 | case []byte: 584 | result, err = xmlNode.Search(string(data)) 585 | case *xpath.Expression: 586 | xpathCtx := xmlNode.Document.DocXPathCtx() 587 | nodePtrs, err := xpathCtx.EvaluateAsNodeset(unsafe.Pointer(xmlNode.Ptr), data) 588 | if nodePtrs == nil || err != nil { 589 | return nil, err 590 | } 591 | for _, nodePtr := range nodePtrs { 592 | result = append(result, NewNode(nodePtr, xmlNode.Document)) 593 | } 594 | } 595 | return 596 | } 597 | 598 | // As the Search function, but passing a VariableScope that can be used to reolve variable 599 | // names or registered function references in the XPath being evaluated. 600 | func (xmlNode *XmlNode) SearchWithVariables(data interface{}, v xpath.VariableScope) (result []Node, err error) { 601 | switch data := data.(type) { 602 | default: 603 | err = ERR_UNDEFINED_SEARCH_PARAM 604 | case string: 605 | if xpathExpr := xpath.Compile(data); xpathExpr != nil { 606 | defer xpathExpr.Free() 607 | result, err = xmlNode.SearchWithVariables(xpathExpr, v) 608 | } else { 609 | err = errors.New("cannot compile xpath: " + data) 610 | } 611 | case []byte: 612 | result, err = xmlNode.SearchWithVariables(string(data), v) 613 | case *xpath.Expression: 614 | xpathCtx := xmlNode.Document.DocXPathCtx() 615 | xpathCtx.SetResolver(v) 616 | nodePtrs, err := xpathCtx.EvaluateAsNodeset(unsafe.Pointer(xmlNode.Ptr), data) 617 | if nodePtrs == nil || err != nil { 618 | return nil, err 619 | } 620 | for _, nodePtr := range nodePtrs { 621 | result = append(result, NewNode(nodePtr, xmlNode.Document)) 622 | } 623 | } 624 | return 625 | } 626 | 627 | // Evaluate an XPath and return a result of the appropriate type. 628 | // If a non-nil VariableScope is provided, any variables or functions present 629 | // in the xpath will be resolved. 630 | 631 | // If the result is a nodeset (or the empty nodeset), a nodeset will be returned. 632 | 633 | // If the result is a number, a float64 will be returned. 634 | 635 | // If the result is a boolean, a bool will be returned. 636 | 637 | // In any other cases, the result will be coerced to a string. 638 | func (xmlNode *XmlNode) EvalXPath(data interface{}, v xpath.VariableScope) (result interface{}, err error) { 639 | switch data := data.(type) { 640 | case string: 641 | if xpathExpr := xpath.Compile(data); xpathExpr != nil { 642 | defer xpathExpr.Free() 643 | result, err = xmlNode.EvalXPath(xpathExpr, v) 644 | } else { 645 | err = errors.New("cannot compile xpath: " + data) 646 | } 647 | case []byte: 648 | result, err = xmlNode.EvalXPath(string(data), v) 649 | case *xpath.Expression: 650 | xpathCtx := xmlNode.Document.DocXPathCtx() 651 | xpathCtx.SetResolver(v) 652 | err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data) 653 | if err != nil { 654 | return nil, err 655 | } 656 | rt := xpathCtx.ReturnType() 657 | switch rt { 658 | case xpath.XPATH_NODESET, xpath.XPATH_XSLT_TREE: 659 | nodePtrs, err := xpathCtx.ResultAsNodeset() 660 | if err != nil { 661 | return nil, err 662 | } 663 | var output []Node 664 | for _, nodePtr := range nodePtrs { 665 | output = append(output, NewNode(nodePtr, xmlNode.Document)) 666 | } 667 | result = output 668 | case xpath.XPATH_NUMBER: 669 | result, _ = xpathCtx.ResultAsNumber() 670 | case xpath.XPATH_BOOLEAN: 671 | result, _ = xpathCtx.ResultAsBoolean() 672 | default: 673 | result, _ = xpathCtx.ResultAsString() 674 | } 675 | default: 676 | err = ERR_UNDEFINED_SEARCH_PARAM 677 | } 678 | return 679 | } 680 | 681 | // Evaluate an XPath and coerce the result to a boolean according to the 682 | // XPath rules. In the presence of an error, this function will return false 683 | // even if the expression cannot actually be evaluated. 684 | 685 | // In most cases you are better advised to call EvalXPath; this function is 686 | // intended for packages that implement XML standards and that are fully aware 687 | // of the consequences of suppressing a compilation error. 688 | 689 | // If a non-nil VariableScope is provided, any variables or registered functions present 690 | // in the xpath will be resolved. 691 | func (xmlNode *XmlNode) EvalXPathAsBoolean(data interface{}, v xpath.VariableScope) (result bool) { 692 | switch data := data.(type) { 693 | case string: 694 | if xpathExpr := xpath.Compile(data); xpathExpr != nil { 695 | defer xpathExpr.Free() 696 | result = xmlNode.EvalXPathAsBoolean(xpathExpr, v) 697 | } else { 698 | //err = errors.New("cannot compile xpath: " + data) 699 | } 700 | case []byte: 701 | result = xmlNode.EvalXPathAsBoolean(string(data), v) 702 | case *xpath.Expression: 703 | xpathCtx := xmlNode.Document.DocXPathCtx() 704 | xpathCtx.SetResolver(v) 705 | err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data) 706 | if err != nil { 707 | return false 708 | } 709 | result, _ = xpathCtx.ResultAsBoolean() 710 | default: 711 | //err = ERR_UNDEFINED_SEARCH_PARAM 712 | } 713 | return 714 | } 715 | 716 | // The local name of the node. Use Namespace() to get the namespace. 717 | func (xmlNode *XmlNode) Name() (name string) { 718 | if xmlNode.Ptr.name != nil { 719 | p := unsafe.Pointer(xmlNode.Ptr.name) 720 | name = C.GoString((*C.char)(p)) 721 | } 722 | return 723 | } 724 | 725 | // The namespace of the node. This is the empty string if there 726 | // no associated namespace. 727 | func (xmlNode *XmlNode) Namespace() (href string) { 728 | if xmlNode.Ptr.ns != nil { 729 | p := unsafe.Pointer(xmlNode.Ptr.ns.href) 730 | href = C.GoString((*C.char)(p)) 731 | } 732 | return 733 | } 734 | 735 | // Set the local name of the node. The namespace is set via SetNamespace(). 736 | func (xmlNode *XmlNode) SetName(name string) { 737 | if len(name) > 0 { 738 | nameBytes := GetCString([]byte(name)) 739 | namePtr := unsafe.Pointer(&nameBytes[0]) 740 | C.xmlNodeSetName(xmlNode.Ptr, (*C.xmlChar)(namePtr)) 741 | } 742 | } 743 | 744 | func (xmlNode *XmlNode) Duplicate(level int) Node { 745 | return xmlNode.DuplicateTo(xmlNode.Document, level) 746 | } 747 | 748 | func (xmlNode *XmlNode) DuplicateTo(doc Document, level int) (dup Node) { 749 | if xmlNode.valid { 750 | dupPtr := C.xmlDocCopyNode(xmlNode.Ptr, (*C.xmlDoc)(doc.DocPtr()), C.int(level)) 751 | if dupPtr != nil { 752 | dup = NewNode(unsafe.Pointer(dupPtr), xmlNode.Document) 753 | } 754 | } 755 | return 756 | } 757 | 758 | func (xmlNode *XmlNode) serialize(format SerializationOption, encoding, outputBuffer []byte) ([]byte, int) { 759 | nodePtr := unsafe.Pointer(xmlNode.Ptr) 760 | var encodingPtr unsafe.Pointer 761 | if len(encoding) == 0 { 762 | encoding = xmlNode.Document.OutputEncoding() 763 | } 764 | if len(encoding) > 0 { 765 | encodingPtr = unsafe.Pointer(&(encoding[0])) 766 | } else { 767 | encodingPtr = nil 768 | } 769 | 770 | wbuffer := &WriteBuffer{Node: xmlNode, Buffer: outputBuffer} 771 | wbufferPtr := unsafe.Pointer(wbuffer) 772 | 773 | ret := int(C.xmlSaveNode(wbufferPtr, nodePtr, encodingPtr, C.int(format))) 774 | if ret < 0 { 775 | panic("output error in xml node serialization: " + strconv.Itoa(ret)) 776 | return nil, 0 777 | } 778 | 779 | return wbuffer.Buffer, wbuffer.Offset 780 | } 781 | 782 | // SerializeWithFormat allows you to control the serialization flags passed to libxml. 783 | // In most cases ToXml() and ToHtml() provide sensible defaults and should be preferred. 784 | 785 | // The format parameter should be a set of SerializationOption constants or'd together. 786 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8. 787 | // If outputBuffer is nil, one will be created for you. 788 | func (xmlNode *XmlNode) SerializeWithFormat(format SerializationOption, encoding, outputBuffer []byte) ([]byte, int) { 789 | return xmlNode.serialize(format, encoding, outputBuffer) 790 | } 791 | 792 | // ToXml generates an indented XML document with an XML declaration. 793 | // It is not guaranteed to be well formed unless xmlNode is an element node, 794 | // or a document node with only one element child. 795 | 796 | // If you need finer control over the formatting, call SerializeWithFormat. 797 | 798 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8. 799 | // If outputBuffer is nil, one will be created for you. 800 | func (xmlNode *XmlNode) ToXml(encoding, outputBuffer []byte) ([]byte, int) { 801 | return xmlNode.serialize(XML_SAVE_AS_XML|XML_SAVE_FORMAT, encoding, outputBuffer) 802 | } 803 | 804 | // ToUnformattedXml generates an unformatted XML document without an XML declaration. 805 | // This is useful for conforming to various standards and for unit testing, although 806 | // the output is not guaranteed to be well formed unless xmlNode is an element node. 807 | func (xmlNode *XmlNode) ToUnformattedXml() string { 808 | var b []byte 809 | var size int 810 | b, size = xmlNode.serialize(XML_SAVE_AS_XML|XML_SAVE_NO_DECL, nil, nil) 811 | if b == nil { 812 | return "" 813 | } 814 | return string(b[:size]) 815 | } 816 | 817 | // ToHtml generates an indented XML document that conforms to HTML 4.0 rules; meaning 818 | // that some elements may be unclosed or forced to use end tags even when empty. 819 | 820 | // If you want to output XHTML, call SerializeWithFormat and enable the XML_SAVE_XHTML 821 | // flag as part of the format. 822 | 823 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8. 824 | // If outputBuffer is nil, one will be created for you. 825 | func (xmlNode *XmlNode) ToHtml(encoding, outputBuffer []byte) ([]byte, int) { 826 | return xmlNode.serialize(XML_SAVE_AS_HTML|XML_SAVE_FORMAT, encoding, outputBuffer) 827 | } 828 | 829 | func (xmlNode *XmlNode) ToBuffer(outputBuffer []byte) []byte { 830 | var b []byte 831 | var size int 832 | if docType := xmlNode.Document.DocType(); docType == XML_HTML_DOCUMENT_NODE { 833 | b, size = xmlNode.ToHtml(nil, outputBuffer) 834 | } else { 835 | b, size = xmlNode.ToXml(nil, outputBuffer) 836 | } 837 | return b[:size] 838 | } 839 | 840 | func (xmlNode *XmlNode) String() string { 841 | b := xmlNode.ToBuffer(nil) 842 | if b == nil { 843 | return "" 844 | } 845 | return string(b) 846 | } 847 | 848 | func (xmlNode *XmlNode) Content() string { 849 | contentPtr := C.xmlNodeGetContent(xmlNode.Ptr) 850 | charPtr := (*C.char)(unsafe.Pointer(contentPtr)) 851 | defer C.xmlFreeChars(charPtr) 852 | return C.GoString(charPtr) 853 | } 854 | 855 | func (xmlNode *XmlNode) InnerHtml() string { 856 | out := "" 857 | 858 | for child := xmlNode.FirstChild(); child != nil; child = child.NextSibling() { 859 | out += child.String() 860 | } 861 | return out 862 | } 863 | 864 | func (xmlNode *XmlNode) Unlink() { 865 | if int(C.xmlUnlinkNodeWithCheck(xmlNode.Ptr)) != 0 { 866 | xmlNode.Document.AddUnlinkedNode(unsafe.Pointer(xmlNode.Ptr)) 867 | } 868 | } 869 | 870 | func (xmlNode *XmlNode) Remove() { 871 | if xmlNode.valid && unsafe.Pointer(xmlNode.Ptr) != xmlNode.Document.DocPtr() { 872 | xmlNode.Unlink() 873 | xmlNode.valid = false 874 | } 875 | } 876 | 877 | func (xmlNode *XmlNode) addChild(node Node) (err error) { 878 | nodeType := node.NodeType() 879 | if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE { 880 | err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD 881 | return 882 | } 883 | nodePtr := node.NodePtr() 884 | if xmlNode.NodePtr() == nodePtr { 885 | return 886 | } 887 | ret := xmlNode.isAccestor(nodePtr) 888 | if ret < 0 { 889 | return 890 | } else if ret == 0 { 891 | if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) { 892 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr)) 893 | } 894 | C.xmlAddChild(xmlNode.Ptr, (*C.xmlNode)(nodePtr)) 895 | } else if ret > 0 { 896 | node.Remove() 897 | } 898 | 899 | return 900 | } 901 | 902 | func (xmlNode *XmlNode) addPreviousSibling(node Node) (err error) { 903 | nodeType := node.NodeType() 904 | if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE { 905 | err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD 906 | return 907 | } 908 | nodePtr := node.NodePtr() 909 | if xmlNode.NodePtr() == nodePtr { 910 | return 911 | } 912 | ret := xmlNode.isAccestor(nodePtr) 913 | if ret < 0 { 914 | return 915 | } else if ret == 0 { 916 | if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) { 917 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr)) 918 | } 919 | C.xmlAddPrevSibling(xmlNode.Ptr, (*C.xmlNode)(nodePtr)) 920 | } else if ret > 0 { 921 | node.Remove() 922 | } 923 | return 924 | } 925 | 926 | func (xmlNode *XmlNode) addNextSibling(node Node) (err error) { 927 | nodeType := node.NodeType() 928 | if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE { 929 | err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD 930 | return 931 | } 932 | nodePtr := node.NodePtr() 933 | if xmlNode.NodePtr() == nodePtr { 934 | return 935 | } 936 | ret := xmlNode.isAccestor(nodePtr) 937 | if ret < 0 { 938 | return 939 | } else if ret == 0 { 940 | if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) { 941 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr)) 942 | } 943 | C.xmlAddNextSibling(xmlNode.Ptr, (*C.xmlNode)(nodePtr)) 944 | } else if ret > 0 { 945 | node.Remove() 946 | } 947 | return 948 | } 949 | 950 | func (xmlNode *XmlNode) Wrap(data string) (err error) { 951 | newNodes, err := xmlNode.coerce(data) 952 | if err == nil && len(newNodes) > 0 { 953 | newParent := newNodes[0] 954 | xmlNode.addNextSibling(newParent) 955 | newParent.AddChild(xmlNode) 956 | } 957 | return 958 | } 959 | 960 | func (xmlNode *XmlNode) ParseFragment(input, url []byte, options ParseOption) (fragment *DocumentFragment, err error) { 961 | fragment, err = parsefragment(xmlNode.Document, xmlNode, input, url, options) 962 | return 963 | } 964 | 965 | //export xmlNodeWriteCallback 966 | func xmlNodeWriteCallback(wbufferObj unsafe.Pointer, data unsafe.Pointer, data_len C.int) { 967 | wbuffer := (*WriteBuffer)(wbufferObj) 968 | offset := wbuffer.Offset 969 | 970 | if offset > len(wbuffer.Buffer) { 971 | panic("fatal error in xmlNodeWriteCallback") 972 | } 973 | 974 | buffer := wbuffer.Buffer[:offset] 975 | dataLen := int(data_len) 976 | 977 | if dataLen > 0 { 978 | if len(buffer)+dataLen > cap(buffer) { 979 | newBuffer := grow(buffer, dataLen) 980 | wbuffer.Buffer = newBuffer 981 | } 982 | destBufPtr := unsafe.Pointer(&(wbuffer.Buffer[offset])) 983 | C.memcpy(destBufPtr, data, C.size_t(dataLen)) 984 | wbuffer.Offset += dataLen 985 | } 986 | } 987 | 988 | //export xmlUnlinkNodeCallback 989 | func xmlUnlinkNodeCallback(nodePtr unsafe.Pointer, gonodePtr unsafe.Pointer) { 990 | xmlNode := (*XmlNode)(gonodePtr) 991 | xmlNode.Document.AddUnlinkedNode(nodePtr) 992 | } 993 | 994 | func grow(buffer []byte, n int) (newBuffer []byte) { 995 | newBuffer = makeSlice(2*cap(buffer) + n) 996 | copy(newBuffer, buffer) 997 | return 998 | } 999 | 1000 | func makeSlice(n int) []byte { 1001 | // If the make fails, give a known error. 1002 | defer func() { 1003 | if recover() != nil { 1004 | panic(ErrTooLarge) 1005 | } 1006 | }() 1007 | return make([]byte, n) 1008 | } 1009 | 1010 | func (xmlNode *XmlNode) isAccestor(nodePtr unsafe.Pointer) int { 1011 | parentPtr := xmlNode.Ptr.parent 1012 | 1013 | if C.xmlNodePtrCheck(unsafe.Pointer(parentPtr)) == C.int(0) { 1014 | return -1 1015 | } 1016 | for ; parentPtr != nil; parentPtr = parentPtr.parent { 1017 | if C.xmlNodePtrCheck(unsafe.Pointer(parentPtr)) == C.int(0) { 1018 | return -1 1019 | } 1020 | p := unsafe.Pointer(parentPtr) 1021 | if p == nodePtr { 1022 | return 1 1023 | } 1024 | } 1025 | return 0 1026 | } 1027 | 1028 | func (xmlNode *XmlNode) RecursivelyRemoveNamespaces() (err error) { 1029 | nodePtr := xmlNode.Ptr 1030 | C.xmlSetNs(nodePtr, nil) 1031 | 1032 | for child := xmlNode.FirstChild(); child != nil; { 1033 | child.RecursivelyRemoveNamespaces() 1034 | child = child.NextSibling() 1035 | } 1036 | 1037 | nodeType := xmlNode.NodeType() 1038 | 1039 | if ((nodeType == XML_ELEMENT_NODE) || 1040 | (nodeType == XML_XINCLUDE_START) || 1041 | (nodeType == XML_XINCLUDE_END)) && 1042 | (nodePtr.nsDef != nil) { 1043 | C.xmlFreeNsList((*C.xmlNs)(nodePtr.nsDef)) 1044 | nodePtr.nsDef = nil 1045 | } 1046 | 1047 | if nodeType == XML_ELEMENT_NODE && nodePtr.properties != nil { 1048 | property := nodePtr.properties 1049 | for property != nil { 1050 | if property.ns != nil { 1051 | property.ns = nil 1052 | } 1053 | property = property.next 1054 | } 1055 | } 1056 | return 1057 | } 1058 | 1059 | func (xmlNode *XmlNode) RemoveDefaultNamespace() { 1060 | nodePtr := xmlNode.Ptr 1061 | C.xmlRemoveDefaultNamespace(nodePtr) 1062 | } 1063 | 1064 | // Returns a list of all the namespace declarations that exist on this node. 1065 | 1066 | // You can add a namespace declaration by calling DeclareNamespace. 1067 | // Calling SetNamespace will automatically add a declaration if required. 1068 | 1069 | // Calling SetNsAttr does *not* automatically create a declaration. This will 1070 | // fixed in a future version. 1071 | func (xmlNode *XmlNode) DeclaredNamespaces() (result []NamespaceDeclaration) { 1072 | nodePtr := xmlNode.Ptr 1073 | for ns := nodePtr.nsDef; ns != nil; ns = (*C.xmlNs)(ns.next) { 1074 | prefixPtr := unsafe.Pointer(ns.prefix) 1075 | prefix := C.GoString((*C.char)(prefixPtr)) 1076 | hrefPtr := unsafe.Pointer(ns.href) 1077 | uri := C.GoString((*C.char)(hrefPtr)) 1078 | decl := NamespaceDeclaration{prefix, uri} 1079 | result = append(result, decl) 1080 | } 1081 | return 1082 | } 1083 | 1084 | // Add a namespace declaration to an element. 1085 | 1086 | // This is typically done on the root element or node high up in the tree 1087 | // to avoid duplication. The declaration is not created if the namespace 1088 | // is already declared in this scope with the same prefix. 1089 | func (xmlNode *XmlNode) DeclareNamespace(prefix, href string) { 1090 | //can only declare namespaces on elements 1091 | if xmlNode.NodeType() != XML_ELEMENT_NODE { 1092 | return 1093 | } 1094 | hrefBytes := GetCString([]byte(href)) 1095 | hrefPtr := unsafe.Pointer(&hrefBytes[0]) 1096 | 1097 | //if the namespace is already declared using this prefix, just return 1098 | _ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr)) 1099 | if _ns != nil { 1100 | _prefixPtr := unsafe.Pointer(_ns.prefix) 1101 | _prefix := C.GoString((*C.char)(_prefixPtr)) 1102 | if prefix == _prefix { 1103 | return 1104 | } 1105 | } 1106 | 1107 | prefixBytes := GetCString([]byte(prefix)) 1108 | prefixPtr := unsafe.Pointer(&prefixBytes[0]) 1109 | if prefix == "" { 1110 | prefixPtr = nil 1111 | } 1112 | 1113 | //this adds the namespace declaration to the node 1114 | _ = C.xmlNewNs(xmlNode.Ptr, (*C.xmlChar)(hrefPtr), (*C.xmlChar)(prefixPtr)) 1115 | } 1116 | 1117 | // Set the namespace of an element. 1118 | func (xmlNode *XmlNode) SetNamespace(prefix, href string) { 1119 | if xmlNode.NodeType() != XML_ELEMENT_NODE { 1120 | return 1121 | } 1122 | 1123 | prefixBytes := GetCString([]byte(prefix)) 1124 | prefixPtr := unsafe.Pointer(&prefixBytes[0]) 1125 | if prefix == "" { 1126 | prefixPtr = nil 1127 | } 1128 | 1129 | hrefBytes := GetCString([]byte(href)) 1130 | hrefPtr := unsafe.Pointer(&hrefBytes[0]) 1131 | 1132 | // use the existing namespace declaration if there is one 1133 | _ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr)) 1134 | if _ns != nil { 1135 | _prefixPtr := unsafe.Pointer(_ns.prefix) 1136 | _prefix := C.GoString((*C.char)(_prefixPtr)) 1137 | if prefix == _prefix { 1138 | C.xmlSetNs(xmlNode.Ptr, _ns) 1139 | return 1140 | } 1141 | } 1142 | 1143 | ns := C.xmlNewNs(xmlNode.Ptr, (*C.xmlChar)(hrefPtr), (*C.xmlChar)(prefixPtr)) 1144 | C.xmlSetNs(xmlNode.Ptr, ns) 1145 | } 1146 | 1147 | // Returns the line number on which the node appears, or a -1 if the 1148 | // line number cannot be determined. 1149 | func (xmlNode *XmlNode) LineNumber() int { 1150 | return int(C.xmlGetLineNo(xmlNode.Ptr)) 1151 | } 1152 | --------------------------------------------------------------------------------