/*[1][./self:: and ]
44 | #//*/*[1][./self::]
45 |
46 | :first-child
47 | /*[position() = 1 and ./self:: and ]
48 | //*/*[position = 1 and ./self:: and ]
49 |
50 |
51 | div, DEEP
52 | /descendant-or-self::*/*[./self::div]
53 |
54 | div, FLAT
55 | /child::*[./self::div]
56 |
57 | div span
58 | /descendant-or-self::*/*[./self::div] /descendant-or-self::*/*[./self::span]
59 |
60 | div > span
61 | /descendant-or-self::*/*[./self::div] /child::*[./self::span]
62 |
63 | div ~ span
64 | /descendant-or-self::*/*[./self::div] /following-sibling::*[./self::span]
65 |
66 | div + span
67 | /descendant-or-self::*/*[./self::div] /following-sibling::*[./self::span and position()=1]
68 |
69 | div:first-child, DEEP
70 | /descendant-or-self::*/*[./self::div and position()=1]
71 |
72 | div:first-child, FLAT
73 | /child::*[./self::div and position()=1]
74 |
75 | div:first-of-type, DEEP
76 | /descendant-or-self::*/*[./self::div][position()=1]
77 |
78 | div:last-of-type, DEEP
79 | /descendant-or-self::*/*[./self::div][position()=last()]
80 |
81 |
82 | div:nth-child(odd), DEEP
83 | /child::*[./self::div and position() mod 2 = 1]
84 |
85 | div:nth-child(a), DEEP
86 | /child::*[./self::div and position()=a]
87 |
88 |
89 |
90 | :first-child
91 | descendant-or-self::*/*[./self:: and position()=1]
92 |
93 |
94 | :first-child:last-child
95 |
96 | /*[position() = 1 and ./self::]
97 |
98 |
99 | foo + bar + hux --> foo [+ bar] [+ hux]
100 | foo/following-sibling::*[1]/self::bar/following-sibling::*[1]/self::hux
101 |
102 | foo ~ bar ~ hux
103 | foo/following-sibling::bar/following-sibling::hux
104 |
105 | foo bar
106 | foo//bar
107 |
108 | foo > bar
109 | foo/bar
110 |
111 | div.foo:nth-of-type(3)
112 | div[@class='foo'][3]
113 |
114 | div:nth-child(3).foo
115 |
116 | *[3][./self::div][@class='foo']
117 |
118 | foo.bar:first-child
119 | *[1][./self::foo[@class='bar']]
120 |
121 | :first-child
122 | *[position()=1][./self::]
123 |
124 | :not(:first-child)
125 | *[not(position()=1)][./self::]
126 |
127 |
128 | div:nth-child(3)
129 | *[3][./self::div]
130 |
131 | // div *[3][./self::
132 |
133 |
134 | :nth-child(an+b)
135 | *[(position() - b) mod a = 0]
136 |
137 |
138 | :not(div)
139 | *[not(./self::div)]
140 |
141 | :first-child
142 | *[position() = 1]
143 | :nth-child(4)
144 | *[position() = 4]
145 |
146 | :not(:first-child)
147 | *[not(position() = 1)]
148 | :not(:nth-child(4))
149 | *[not(position() = 4)]
--------------------------------------------------------------------------------
/xml/fragment.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | //#include "helper.h"
4 | import "C"
5 | import (
6 | "errors"
7 | . "github.com/moovweb/gokogiri/util"
8 | "unsafe"
9 | )
10 |
11 | type DocumentFragment struct {
12 | Node
13 | InEncoding []byte
14 | OutEncoding []byte
15 | }
16 |
17 | var (
18 | fragmentWrapperStart = []byte("")
19 | fragmentWrapperEnd = []byte("")
20 | )
21 |
22 | var ErrFailParseFragment = errors.New("failed to parse xml fragment")
23 | var ErrEmptyFragment = errors.New("empty xml fragment")
24 |
25 | const initChildrenNumber = 4
26 |
27 | func parsefragment(document Document, node *XmlNode, content, url []byte, options ParseOption) (fragment *DocumentFragment, err error) {
28 | //wrap the content before parsing
29 | content = append(fragmentWrapperStart, content...)
30 | content = append(content, fragmentWrapperEnd...)
31 |
32 | //set up pointers before calling the C function
33 | var contentPtr, urlPtr unsafe.Pointer
34 | contentPtr = unsafe.Pointer(&content[0])
35 | contentLen := len(content)
36 | if len(url) > 0 {
37 | url = AppendCStringTerminator(url)
38 | urlPtr = unsafe.Pointer(&url[0])
39 | }
40 |
41 | var rootElementPtr *C.xmlNode
42 |
43 | if node == nil {
44 | inEncoding := document.InputEncoding()
45 | var encodingPtr unsafe.Pointer
46 | if len(inEncoding) > 0 {
47 | encodingPtr = unsafe.Pointer(&inEncoding[0])
48 | }
49 | rootElementPtr = C.xmlParseFragmentAsDoc(document.DocPtr(), contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0)
50 |
51 | } else {
52 | rootElementPtr = C.xmlParseFragment(node.NodePtr(), contentPtr, C.int(contentLen), urlPtr, C.int(options), nil, 0)
53 | }
54 |
55 | //Note we've parsed the fragment within the given document
56 | //the root is not the root of the document; rather it's the root of the subtree from the fragment
57 | root := NewNode(unsafe.Pointer(rootElementPtr), document)
58 |
59 | //the fragment was in invalid
60 | if root == nil {
61 | err = ErrFailParseFragment
62 | return
63 | }
64 |
65 | fragment = &DocumentFragment{}
66 | fragment.Node = root
67 | fragment.InEncoding = document.InputEncoding()
68 | fragment.OutEncoding = document.OutputEncoding()
69 |
70 | document.BookkeepFragment(fragment)
71 | return
72 | }
73 |
74 | func ParseFragment(content, inEncoding, url []byte, options ParseOption, outEncoding []byte) (fragment *DocumentFragment, err error) {
75 | inEncoding = AppendCStringTerminator(inEncoding)
76 | outEncoding = AppendCStringTerminator(outEncoding)
77 | document := CreateEmptyDocument(inEncoding, outEncoding)
78 | fragment, err = parsefragment(document, nil, content, url, options)
79 | return
80 | }
81 |
82 | func (fragment *DocumentFragment) Remove() {
83 | fragment.Node.Remove()
84 | }
85 |
86 | func (fragment *DocumentFragment) Children() []Node {
87 | nodes := make([]Node, 0, initChildrenNumber)
88 | child := fragment.FirstChild()
89 | for ; child != nil; child = child.NextSibling() {
90 | nodes = append(nodes, child)
91 | }
92 | return nodes
93 | }
94 |
95 | func (fragment *DocumentFragment) ToBuffer(outputBuffer []byte) []byte {
96 | var b []byte
97 | var size int
98 | for _, node := range fragment.Children() {
99 | if docType := node.MyDocument().DocType(); docType == XML_HTML_DOCUMENT_NODE {
100 | b, size = node.ToHtml(fragment.OutEncoding, nil)
101 | } else {
102 | b, size = node.ToXml(fragment.OutEncoding, nil)
103 | }
104 | outputBuffer = append(outputBuffer, b[:size]...)
105 | }
106 | return outputBuffer
107 | }
108 |
109 | func (fragment *DocumentFragment) String() string {
110 | b := fragment.ToBuffer(nil)
111 | if b == nil {
112 | return ""
113 | }
114 | return string(b)
115 | }
116 |
--------------------------------------------------------------------------------
/xml/fragment_test.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | import "testing"
4 |
5 | func TestParseDocumentFragmentBasic(t *testing.T) {
6 | defer CheckXmlMemoryLeaks(t)
7 |
8 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
9 | if err != nil {
10 | t.Error("parsing error:", err.Error())
11 | return
12 | }
13 | root := doc.Root()
14 | if root != nil {
15 | println("root:", root.String())
16 | }
17 | docFragment, err := doc.ParseFragment([]byte("hi"), nil, DefaultParseOption)
18 | if err != nil {
19 | t.Error(err.Error())
20 | doc.Free()
21 | return
22 | }
23 | if len(docFragment.Children()) != 1 {
24 | t.Error("the number of children from the fragment does not match")
25 | }
26 | doc.Free()
27 | }
28 |
29 | func TestParseDocumentFragment(t *testing.T) {
30 | defer CheckXmlMemoryLeaks(t)
31 |
32 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
33 | if err != nil {
34 | t.Error("parsing error:", err.Error())
35 | return
36 | }
37 | docFragment, err := doc.ParseFragment([]byte("fun"), nil, DefaultParseOption)
38 | if err != nil {
39 | t.Error(err.Error())
40 | doc.Free()
41 | return
42 | }
43 | if docFragment.String() != "fun" {
44 | t.Error("fragment output is wrong\n")
45 | doc.Free()
46 | return
47 | }
48 | if len(docFragment.Children()) != 3 {
49 | t.Error("the number of children from the fragment does not match")
50 | }
51 | doc.Free()
52 | }
53 |
54 | func TestSearchDocumentFragment(t *testing.T) {
55 | defer CheckXmlMemoryLeaks(t)
56 |
57 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
58 | if err != nil {
59 | t.Error("parsing error:", err.Error())
60 | return
61 | }
62 | docFragment, err := doc.ParseFragment([]byte("fun"), nil, DefaultParseOption)
63 | if err != nil {
64 | t.Error(err.Error())
65 | doc.Free()
66 | return
67 | }
68 | nodes, err := docFragment.Search(".//*")
69 | if err != nil {
70 | t.Error("fragment search has error")
71 | doc.Free()
72 | return
73 | }
74 | if len(nodes) != 2 {
75 | t.Error("the number of children from the fragment does not match")
76 | }
77 | nodes, err = docFragment.Search("//*")
78 |
79 | if err != nil {
80 | t.Error("fragment search has error")
81 | doc.Free()
82 | return
83 | }
84 |
85 | if len(nodes) != 3 {
86 | t.Error("the number of children from the fragment's document does not match")
87 | }
88 |
89 | doc.Free()
90 | }
91 |
92 | func TestSearchDocumentFragmentWithEmptyDoc(t *testing.T) {
93 | defer CheckXmlMemoryLeaks(t)
94 |
95 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
96 | if err != nil {
97 | t.Error("parsing error:", err.Error())
98 | return
99 | }
100 | docFragment, err := doc.ParseFragment([]byte("fun"), nil, DefaultParseOption)
101 | if err != nil {
102 | t.Error(err.Error())
103 | doc.Free()
104 | return
105 | }
106 | nodes, err := docFragment.Search(".//*")
107 | if err != nil {
108 | t.Error("fragment search has error")
109 | doc.Free()
110 | return
111 | }
112 | if len(nodes) != 2 {
113 | t.Error("the number of children from the fragment does not match")
114 | }
115 | nodes, err = docFragment.Search("//*")
116 |
117 | if err != nil {
118 | t.Error("fragment search has error")
119 | doc.Free()
120 | return
121 | }
122 |
123 | if len(nodes) != 0 {
124 | t.Error("the number of children from the fragment's document does not match")
125 | }
126 |
127 | doc.Free()
128 | }
129 |
--------------------------------------------------------------------------------
/xpath/util.go:
--------------------------------------------------------------------------------
1 | package xpath
2 |
3 | /*
4 | #cgo pkg-config: libxml-2.0
5 |
6 | #include
7 | #include
8 | #include
9 |
10 | int getXPathObjectType(xmlXPathObject* o);
11 |
12 | */
13 | import "C"
14 |
15 | import "unsafe"
16 | import "reflect"
17 | import . "github.com/moovweb/gokogiri/util"
18 |
19 | //export go_resolve_variables
20 | func go_resolve_variables(ctxt unsafe.Pointer, name, ns *C.char) (ret C.xmlXPathObjectPtr) {
21 | variable := C.GoString(name)
22 | namespace := C.GoString(ns)
23 |
24 | context := (*VariableScope)(ctxt)
25 | if context != nil {
26 | val := (*context).ResolveVariable(variable, namespace)
27 | ret = ValueToXPathObject(val)
28 | }
29 | return
30 | }
31 |
32 | // Convert an arbitrary value into a C.xmlXPathObjectPtr
33 | // Unrecognised and nil values are converted to empty node sets.
34 | func ValueToXPathObject(val interface{}) (ret C.xmlXPathObjectPtr) {
35 | if val == nil {
36 | //return the empty node set
37 | ret = C.xmlXPathNewNodeSet(nil)
38 | return
39 | }
40 | switch v := val.(type) {
41 | case unsafe.Pointer:
42 | return (C.xmlXPathObjectPtr)(v)
43 | case []unsafe.Pointer:
44 | ptrs := v
45 | if len(ptrs) > 0 {
46 | //default - return a node set
47 | ret = C.xmlXPathNewNodeSet(nil)
48 | for _, p := range ptrs {
49 | C.xmlXPathNodeSetAdd(ret.nodesetval, (*C.xmlNode)(p))
50 | }
51 | } else {
52 | ret = C.xmlXPathNewNodeSet(nil)
53 | return
54 | }
55 | case float64:
56 | ret = C.xmlXPathNewFloat(C.double(v))
57 | case string:
58 | xpathBytes := GetCString([]byte(v))
59 | xpathPtr := unsafe.Pointer(&xpathBytes[0])
60 | ret = C.xmlXPathNewString((*C.xmlChar)(xpathPtr))
61 | default:
62 | typ := reflect.TypeOf(val)
63 | // if a pointer to a struct is passed, get the type of the dereferenced object
64 | if typ.Kind() == reflect.Ptr {
65 | typ = typ.Elem()
66 | }
67 | //log the unknown type, return an empty node set
68 | //fmt.Println("go-resolve wrong-type", typ.Kind())
69 | ret = C.xmlXPathNewNodeSet(nil)
70 | }
71 | return
72 | }
73 |
74 | //export exec_xpath_function
75 | func exec_xpath_function(ctxt C.xmlXPathParserContextPtr, nargs C.int) {
76 | function := C.GoString((*C.char)(unsafe.Pointer(ctxt.context.function)))
77 | namespace := C.GoString((*C.char)(unsafe.Pointer(ctxt.context.functionURI)))
78 | context := (*VariableScope)(ctxt.context.funcLookupData)
79 |
80 | argcount := int(nargs)
81 | var args []interface{}
82 |
83 | for i := 0; i < argcount; i = i + 1 {
84 | args = append(args, XPathObjectToValue(C.valuePop(ctxt)))
85 | }
86 |
87 | // arguments are popped off the stack in reverse order, so
88 | // we reverse the slice before invoking our callback
89 | if argcount > 1 {
90 | for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
91 | args[i], args[j] = args[j], args[i]
92 | }
93 | }
94 |
95 | // push the result onto the stack
96 | // if for some reason we are unable to resolve the
97 | // function we push an empty nodeset
98 | f := (*context).ResolveFunction(function, namespace)
99 | if f != nil {
100 | retval := f(*context, args)
101 | C.valuePush(ctxt, ValueToXPathObject(retval))
102 | } else {
103 | ret := C.xmlXPathNewNodeSet(nil)
104 | C.valuePush(ctxt, ret)
105 | }
106 |
107 | }
108 |
109 | //export go_can_resolve_function
110 | func go_can_resolve_function(ctxt unsafe.Pointer, name, ns *C.char) (ret C.int) {
111 | function := C.GoString(name)
112 | namespace := C.GoString(ns)
113 | context := (*VariableScope)(ctxt)
114 | if *context == nil {
115 | return C.int(0)
116 | }
117 | if (*context).IsFunctionRegistered(function, namespace) {
118 | return C.int(1)
119 | }
120 | return C.int(0)
121 | }
122 |
--------------------------------------------------------------------------------
/xml/attribute_test.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | import "testing"
4 | import "fmt"
5 |
6 | func TestSetValue(t *testing.T) {
7 | defer CheckXmlMemoryLeaks(t)
8 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
9 | if err != nil {
10 | t.Error("Parsing has error:", err)
11 | return
12 | }
13 | root := doc.Root()
14 | attributes := root.Attributes()
15 | if len(attributes) != 2 || attributes["myname"].String() != "ff" {
16 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
17 | t.Error("root's attributes do not match")
18 | }
19 | child := root.FirstChild()
20 | childAttributes := child.Attributes()
21 | if len(childAttributes) != 1 || childAttributes["class"].String() != "shine" {
22 | t.Error("child's attributes do not match")
23 | }
24 | attributes["myname"].SetValue("new")
25 | expected :=
26 | `
27 |
28 | `
29 | if root.String() != expected {
30 | println("got:\n", root.String())
31 | println("expected:\n", expected)
32 | t.Error("root's new attr do not match")
33 | }
34 | attributes["id"].Remove()
35 | expected =
36 | `
37 |
38 | `
39 |
40 | if root.String() != expected {
41 | println("got:\n", root.String())
42 | println("expected:\n", expected)
43 | t.Error("root's remove attr do not match")
44 | }
45 | doc.Free()
46 | }
47 |
48 | func TestSetAttribute(t *testing.T) {
49 | defer CheckXmlMemoryLeaks(t)
50 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
51 | if err != nil {
52 | t.Error("Parsing has error:", err)
53 | return
54 | }
55 | root := doc.Root()
56 | attributes := root.Attributes()
57 | if len(attributes) != 2 || attributes["myname"].String() != "ff" {
58 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
59 | t.Error("root's attributes do not match")
60 | }
61 |
62 | root.SetAttr("id", "cooler")
63 | root.SetAttr("id2", "hot")
64 | root.SetAttr("id3", "")
65 | expected :=
66 | `
67 |
68 | `
69 | if root.String() != expected {
70 | println("got:\n", root.String())
71 | println("expected:\n", expected)
72 | t.Error("root's new attr do not match")
73 | }
74 | if root.Attr("id3") != "" {
75 | println("got:\n", root.Attr("id3"))
76 | println("expected:\n", "")
77 | t.Error("root's attr should have empty val")
78 | }
79 | if root.Attribute("id3") == nil {
80 | t.Error("root's attr should not be nil")
81 | }
82 | doc.Free()
83 | }
84 |
85 | func TestSetEmptyAttribute(t *testing.T) {
86 | defer CheckXmlMemoryLeaks(t)
87 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
88 | if err != nil {
89 | t.Error("Parsing has error:", err)
90 | return
91 | }
92 | root := doc.Root()
93 | attributes := root.Attributes()
94 | if len(attributes) != 2 || attributes["myname"].String() != "ff" {
95 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
96 | t.Error("root's attributes do not match")
97 | }
98 |
99 | root.SetAttr("", "cool")
100 | expected :=
101 | `
102 |
103 | `
104 | if root.String() != expected {
105 | println("got:\n", root.String())
106 | println("expected:\n", expected)
107 | t.Error("root's new attr do not match")
108 | }
109 |
110 | root.SetAttr("", "")
111 | expected =
112 | `
113 |
114 | `
115 | if root.String() != expected {
116 | println("got:\n", root.String())
117 | println("expected:\n", expected)
118 | t.Error("root's new attr do not match")
119 | }
120 | doc.Free()
121 | }
122 |
--------------------------------------------------------------------------------
/html/document.go:
--------------------------------------------------------------------------------
1 | package html
2 |
3 | /*
4 | #cgo pkg-config: libxml-2.0
5 |
6 | #include
7 | #include
8 | #include "helper.h"
9 | */
10 | import "C"
11 |
12 | import (
13 | "errors"
14 | "github.com/moovweb/gokogiri/help"
15 | . "github.com/moovweb/gokogiri/util"
16 | "github.com/moovweb/gokogiri/xml"
17 | //"runtime"
18 | "unsafe"
19 | )
20 |
21 | //xml parse option
22 | const (
23 | HTML_PARSE_RECOVER xml.ParseOption = 1 << 0 /* Relaxed parsing */
24 | HTML_PARSE_NODEFDTD xml.ParseOption = 1 << 2 /* do not default a doctype if not found */
25 | HTML_PARSE_NOERROR xml.ParseOption = 1 << 5 /* suppress error reports */
26 | HTML_PARSE_NOWARNING xml.ParseOption = 1 << 6 /* suppress warning reports */
27 | HTML_PARSE_PEDANTIC xml.ParseOption = 1 << 7 /* pedantic error reporting */
28 | HTML_PARSE_NOBLANKS xml.ParseOption = 1 << 8 /* remove blank nodes */
29 | HTML_PARSE_NONET xml.ParseOption = 1 << 11 /* Forbid network access */
30 | HTML_PARSE_NOIMPLIED xml.ParseOption = 1 << 13 /* Do not add implied html/body... elements */
31 | HTML_PARSE_COMPACT xml.ParseOption = 1 << 16 /* compact small text nodes */
32 | )
33 |
34 | const EmptyHtmlDoc = ""
35 |
36 | //default parsing option: relax parsing
37 | var DefaultParseOption xml.ParseOption = HTML_PARSE_RECOVER |
38 | HTML_PARSE_NONET |
39 | HTML_PARSE_NOERROR |
40 | HTML_PARSE_NOWARNING
41 |
42 | type HtmlDocument struct {
43 | *xml.XmlDocument
44 | }
45 |
46 | //default encoding in byte slice
47 | var DefaultEncodingBytes = []byte(xml.DefaultEncoding)
48 | var emptyHtmlDocBytes = []byte(EmptyHtmlDoc)
49 |
50 | var ErrSetMetaEncoding = errors.New("Set Meta Encoding failed")
51 | var ERR_FAILED_TO_PARSE_HTML = errors.New("failed to parse html input")
52 | var emptyStringBytes = []byte{0}
53 |
54 | //create a document
55 | func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *HtmlDocument) {
56 | doc = &HtmlDocument{}
57 | doc.XmlDocument = xml.NewDocument(p, contentLen, inEncoding, outEncoding)
58 | doc.Me = doc
59 | node := doc.Node.(*xml.XmlNode)
60 | node.Document = doc
61 | //runtime.SetFinalizer(doc, (*HtmlDocument).Free)
62 | return
63 | }
64 |
65 | //parse a string to document
66 | func Parse(content, inEncoding, url []byte, options xml.ParseOption, outEncoding []byte) (doc *HtmlDocument, err error) {
67 | inEncoding = AppendCStringTerminator(inEncoding)
68 | outEncoding = AppendCStringTerminator(outEncoding)
69 |
70 | var docPtr *C.xmlDoc
71 | contentLen := len(content)
72 |
73 | if contentLen > 0 {
74 | var contentPtr, urlPtr, encodingPtr unsafe.Pointer
75 |
76 | contentPtr = unsafe.Pointer(&content[0])
77 | if len(url) > 0 {
78 | url = AppendCStringTerminator(url)
79 | urlPtr = unsafe.Pointer(&url[0])
80 | }
81 | if len(inEncoding) > 0 {
82 | encodingPtr = unsafe.Pointer(&inEncoding[0])
83 | }
84 |
85 | docPtr = C.htmlParse(contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0)
86 |
87 | if docPtr == nil {
88 | err = ERR_FAILED_TO_PARSE_HTML
89 | } else {
90 | doc = NewDocument(unsafe.Pointer(docPtr), contentLen, inEncoding, outEncoding)
91 | }
92 | }
93 | if docPtr == nil {
94 | doc = CreateEmptyDocument(inEncoding, outEncoding)
95 | }
96 | return
97 | }
98 |
99 | func CreateEmptyDocument(inEncoding, outEncoding []byte) (doc *HtmlDocument) {
100 | help.LibxmlInitParser()
101 | docPtr := C.htmlNewDoc(nil, nil)
102 | doc = NewDocument(unsafe.Pointer(docPtr), 0, inEncoding, outEncoding)
103 | return
104 | }
105 |
106 | func (document *HtmlDocument) ParseFragment(input, url []byte, options xml.ParseOption) (fragment *xml.DocumentFragment, err error) {
107 | root := document.Root()
108 | if root == nil {
109 | fragment, err = parsefragment(document, nil, input, url, options)
110 | } else {
111 | fragment, err = parsefragment(document, root.XmlNode, input, url, options)
112 | }
113 | return
114 | }
115 |
116 | func (doc *HtmlDocument) MetaEncoding() string {
117 | metaEncodingXmlCharPtr := C.htmlGetMetaEncoding((*C.xmlDoc)(doc.DocPtr()))
118 | return C.GoString((*C.char)(unsafe.Pointer(metaEncodingXmlCharPtr)))
119 | }
120 |
121 | func (doc *HtmlDocument) SetMetaEncoding(encoding string) (err error) {
122 | var encodingPtr unsafe.Pointer = nil
123 | if len(encoding) > 0 {
124 | encodingBytes := AppendCStringTerminator([]byte(encoding))
125 | encodingPtr = unsafe.Pointer(&encodingBytes[0])
126 | }
127 | ret := int(C.htmlSetMetaEncoding((*C.xmlDoc)(doc.DocPtr()), (*C.xmlChar)(encodingPtr)))
128 | if ret == -1 {
129 | err = ErrSetMetaEncoding
130 | }
131 | return
132 | }
133 |
--------------------------------------------------------------------------------
/html/fragment_test.go:
--------------------------------------------------------------------------------
1 | package html
2 |
3 | import "testing"
4 |
5 | func TestParseDocumentFragmentText(t *testing.T) {
6 | doc, err := Parse(nil, []byte("iso-8859-1"), nil, DefaultParseOption, []byte("iso-8859-1"))
7 | if err != nil {
8 | println(err.Error())
9 | }
10 | docFragment, err := doc.ParseFragment([]byte("ok\r\n"), nil, DefaultParseOption)
11 | if err != nil {
12 | t.Error(err.Error())
13 | return
14 | }
15 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "ok\r\n" {
16 | println(docFragment.String())
17 | t.Error("the children from the fragment text do not match")
18 | }
19 | doc.Free()
20 | CheckXmlMemoryLeaks(t)
21 | }
22 |
23 | func TestParseDocumentFragment(t *testing.T) {
24 | doc, err := Parse(nil, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
25 | if err != nil {
26 | println(err.Error())
27 | }
28 | docFragment, err := doc.ParseFragment([]byte(""), nil, DefaultParseOption)
29 | if err != nil {
30 | t.Error(err.Error())
31 | return
32 | }
33 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "
" {
34 | t.Error("the of children from the fragment do not match")
35 | }
36 |
37 | doc.Free()
38 | CheckXmlMemoryLeaks(t)
39 |
40 | }
41 |
42 | func TestParseDocumentFragment2(t *testing.T) {
43 | docStr := `
44 |
45 |
46 |
47 | `
48 | doc, err := Parse([]byte(docStr), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
49 | if err != nil {
50 | println(err.Error())
51 | }
52 | docFragment, err := doc.ParseFragment([]byte(""), nil, DefaultParseOption)
53 | if err != nil {
54 | t.Error(err.Error())
55 | return
56 | }
57 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "" {
58 | t.Error("the of children from the fragment do not match")
59 | }
60 |
61 | doc.Free()
62 | CheckXmlMemoryLeaks(t)
63 | }
64 |
65 | func TestSearchDocumentFragment(t *testing.T) {
66 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
67 | if err != nil {
68 | println(err.Error())
69 | }
70 | docFragment, err := doc.ParseFragment([]byte(""), nil, DefaultParseOption)
71 | if err != nil {
72 | t.Error(err.Error())
73 | return
74 | }
75 | if len(docFragment.Children()) != 1 || docFragment.Children()[0].String() != "
" {
76 | t.Error("the of children from the fragment do not match")
77 | }
78 |
79 | nodes, err := docFragment.Search(".//*")
80 | if err != nil {
81 | t.Error("fragment search has error")
82 | return
83 | }
84 | if len(nodes) != 2 {
85 | t.Error("the number of children from the fragment does not match")
86 | }
87 | nodes, err = docFragment.Search("//div[@class='cool']")
88 |
89 | if err != nil {
90 | t.Error("fragment search has error")
91 | return
92 | }
93 |
94 | if len(nodes) != 1 {
95 | println(len(nodes))
96 | for _, node := range nodes {
97 | println(node.String())
98 | }
99 | t.Error("the number of children from the fragment's document does not match")
100 | }
101 |
102 | doc.Free()
103 | CheckXmlMemoryLeaks(t)
104 | }
105 |
106 | func TestAddFragmentWithNamespace(t *testing.T) {
107 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
108 | if err != nil {
109 | println(err.Error())
110 | }
111 | defer doc.Free()
112 | docFragment, err := doc.ParseFragment([]byte(""), nil, DefaultParseOption)
113 | if err != nil {
114 | t.Error(err.Error())
115 | return
116 | }
117 | if docFragment.String() != `
` {
118 | t.Errorf("doc fragment does not match\n")
119 | }
120 | doc2, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
121 | if err != nil {
122 | println(err.Error())
123 | return
124 | }
125 | defer doc2.Free()
126 | body := doc2.Root().FirstChild()
127 | body.AddChild(docFragment)
128 | if doc2.String() != `
129 |
130 |
131 |
132 |
133 | ` {
134 | t.Errorf("document does not match after adding a fragment with namespace\n")
135 | }
136 | CheckXmlMemoryLeaks(t)
137 | }
138 |
--------------------------------------------------------------------------------
/xml/search_test.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | import "testing"
4 |
5 | func TestSearch(t *testing.T) {
6 |
7 | testLogic := func(t *testing.T, doc *XmlDocument) {
8 | root := doc.Root()
9 | result, _ := root.Search(".//*[@class]")
10 | if len(result) != 2 {
11 | t.Error("search at root does not match")
12 | }
13 | result, _ = root.Search("//*[@class]")
14 | if len(result) != 3 {
15 | t.Error("search at root does not match")
16 | }
17 | result, _ = doc.Search(".//*[@class]")
18 | if len(result) != 3 {
19 | t.Error("search at doc does not match")
20 | }
21 | result, _ = doc.Search(".//*[@class='shine']")
22 | if len(result) != 2 {
23 | t.Error("search with value at doc does not match")
24 | }
25 | }
26 |
27 | RunTest(t, "node", "search", testLogic)
28 | }
29 |
30 | func BenchmarkSearch(b *testing.B) {
31 |
32 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
33 | root := doc.Root()
34 |
35 | for i := 0; i < b.N; i++ {
36 | root.Search(".//*[@class]")
37 | }
38 | }
39 |
40 | RunBenchmark(b, "node", "search", benchmarkLogic)
41 | }
42 |
43 | func BenchmarkBigDocDeepSearchyTagName(b *testing.B) {
44 |
45 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
46 |
47 | for i := 0; i < b.N; i++ {
48 | doc.Search("//div")
49 | }
50 | }
51 |
52 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
53 | }
54 |
55 | func BenchmarkBigDocPunctuatedDeepSearch(b *testing.B) {
56 |
57 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
58 |
59 | for i := 0; i < b.N; i++ {
60 | doc.Search("//*[@class='filters']//div")
61 | }
62 | }
63 |
64 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
65 | }
66 |
67 | func BenchmarkBigDocDeepSearchByID(b *testing.B) {
68 |
69 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
70 |
71 | for i := 0; i < b.N; i++ {
72 | doc.Search("//*[@id='ppp']")
73 | //nodes, _ := doc.Search("//*[@id='ppp']")
74 | //fmt.Printf("%v\t", len(nodes))
75 | }
76 | }
77 |
78 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
79 | }
80 |
81 | func BenchmarkBigDocDeepSearchByClass(b *testing.B) {
82 |
83 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
84 |
85 | for i := 0; i < b.N; i++ {
86 | doc.Search("//*[@class]")
87 | //nodes, _ := doc.Search("//*[@class]")
88 | //fmt.Printf("%v\t", len(nodes))
89 | }
90 | }
91 |
92 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
93 | }
94 |
95 | func BenchmarkBigDocDeepSearchByClassContains(b *testing.B) {
96 |
97 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
98 |
99 | for i := 0; i < b.N; i++ {
100 | doc.Search("//*[contains(@class, 'header')]")
101 | }
102 | }
103 |
104 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
105 | }
106 |
107 | func BenchmarkBigDocDeepSearchBySemanticClass(b *testing.B) {
108 |
109 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
110 |
111 | for i := 0; i < b.N; i++ {
112 | doc.Search("//*[contains(concat(concat(' ', @class), ' '), concat(concat(' ','header'), ' '))]")
113 | }
114 | }
115 |
116 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
117 | }
118 |
119 | func BenchmarkBigDocDeepSearchByText(b *testing.B) {
120 |
121 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
122 |
123 | for i := 0; i < b.N; i++ {
124 | doc.Search("//*[text()='hey']")
125 | }
126 | }
127 |
128 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
129 | }
130 |
131 | func BenchmarkBigDocDeepSearchByTextContains(b *testing.B) {
132 |
133 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
134 |
135 | for i := 0; i < b.N; i++ {
136 | doc.Search("//*[contains(text(),'hey')]")
137 | }
138 | }
139 |
140 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
141 | }
142 |
143 | func BenchmarkBigDocSearchAncestorAxes(b *testing.B) {
144 |
145 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
146 | elem, _ := doc.Search("//*[@id='ppp']")
147 | for i := 0; i < b.N; i++ {
148 | elem[0].Search("ancestor::html")
149 | }
150 | }
151 |
152 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
153 | }
154 |
155 | func BenchmarkBigDocSearchLongTraverseUpToRoot(b *testing.B) {
156 |
157 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
158 | elem, _ := doc.Search("//*[@id='ppp']")
159 |
160 | for i := 0; i < b.N; i++ {
161 | elem[0].Search("../../../../../../../../..")
162 | }
163 | }
164 |
165 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
166 | }
167 |
168 | func BenchmarkBigDocSearchShortTraverseUpToRoot(b *testing.B) {
169 |
170 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
171 | elem, _ := doc.Search("//*[@id='ppp']")
172 |
173 | for i := 0; i < b.N; i++ {
174 | elem[0].Search("../../../..")
175 | }
176 | }
177 |
178 | RunBenchmark(b, "document", "big_un", benchmarkLogic)
179 | }
180 |
--------------------------------------------------------------------------------
/xml/utils_test.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "github.com/moovweb/gokogiri/help"
7 | "github.com/moovweb/gokogiri/xpath"
8 | "io/ioutil"
9 | "path/filepath"
10 | "strings"
11 | "testing"
12 | )
13 |
14 | func badOutput(actual string, expected string) {
15 | fmt.Printf("Got:\n[%v]\n", actual)
16 | fmt.Printf("Expected:\n[%v]\n", expected)
17 | }
18 |
19 | func RunTest(t *testing.T, suite string, name string, specificLogic func(t *testing.T, doc *XmlDocument), extraAssertions ...func(doc *XmlDocument) (string, string, string)) {
20 | defer CheckXmlMemoryLeaks(t)
21 |
22 | //println("Initiating test:" + suite + ":" + name)
23 |
24 | input, output, error := getTestData(filepath.Join("tests", suite, name))
25 |
26 | if len(error) > 0 {
27 | t.Errorf("Error gathering test data for %v:\n%v\n", name, error)
28 | t.FailNow()
29 | }
30 |
31 | expected := string(output)
32 |
33 | //println("Got raw input/output")
34 |
35 | doc, err := parseInput(input)
36 |
37 | if err != nil {
38 | t.Error(err.Error())
39 | }
40 |
41 | //println("parsed input")
42 |
43 | if specificLogic != nil {
44 | specificLogic(t, doc)
45 | }
46 | if doc.String() != expected {
47 | badOutput(doc.String(), expected)
48 | t.Error("the output of the xml doc does not match")
49 | }
50 | for _, extraAssertion := range extraAssertions {
51 | actual, expected, message := extraAssertion(doc)
52 |
53 | if actual != expected {
54 | badOutput(actual, expected)
55 | t.Error(message)
56 | }
57 | }
58 |
59 | doc.Free()
60 | }
61 |
62 | func RunBenchmark(b *testing.B, suite string, name string, specificLogic func(b *testing.B, doc *XmlDocument)) {
63 | b.StopTimer()
64 |
65 | // defer CheckXmlMemoryLeaks(b)
66 |
67 | input, _, error := getTestData(filepath.Join("tests", suite, name))
68 |
69 | if len(error) > 0 {
70 | panic(fmt.Sprintf("Error gathering test data for %v:\n%v\n", name, error))
71 | }
72 |
73 | doc, err := parseInput(input)
74 |
75 | if err != nil {
76 | panic("Error:" + err.Error())
77 | }
78 |
79 | b.StartTimer()
80 |
81 | if specificLogic != nil {
82 | specificLogic(b, doc)
83 | }
84 |
85 | doc.Free()
86 |
87 | // println("----------- END OF BENCHMARK -----------")
88 | }
89 |
90 | func parseInput(input interface{}) (*XmlDocument, error) {
91 | var realInput []byte
92 |
93 | switch thisInput := input.(type) {
94 | case []byte:
95 | realInput = thisInput
96 | case string:
97 | realInput = []byte(thisInput)
98 | default:
99 | return nil, errors.New("Unrecognized parsing input!")
100 | }
101 |
102 | doc, err := Parse(realInput, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
103 |
104 | if err != nil {
105 | return nil, errors.New(fmt.Sprintf("parsing error:%v\n", err.Error()))
106 | }
107 |
108 | return doc, nil
109 | }
110 |
111 | func getTestData(name string) (input []byte, output []byte, error string) {
112 | var errorMessage string
113 | offset := "\t"
114 | inputFile := filepath.Join(name, "input.txt")
115 |
116 | input, err := ioutil.ReadFile(inputFile)
117 |
118 | if err != nil {
119 | errorMessage += fmt.Sprintf("%vCouldn't read test (%v) input:\n%v\n", offset, name, offset+err.Error())
120 | }
121 |
122 | output, err = ioutil.ReadFile(filepath.Join(name, "output.txt"))
123 |
124 | if err != nil {
125 | errorMessage += fmt.Sprintf("%vCouldn't read test (%v) output:\n%v\n", offset, name, offset+err.Error())
126 | }
127 |
128 | return input, output, errorMessage
129 | }
130 |
131 | func collectTests(suite string) (names []string, error string) {
132 | testPath := filepath.Join("tests", suite)
133 | entries, err := ioutil.ReadDir(testPath)
134 |
135 | if err != nil {
136 | return nil, fmt.Sprintf("Couldn't read tests:\n%v\n", err.Error())
137 | }
138 |
139 | for _, entry := range entries {
140 | if strings.HasPrefix(entry.Name(), "_") || strings.HasPrefix(entry.Name(), ".") {
141 | continue
142 | }
143 |
144 | if entry.IsDir() {
145 | names = append(names, filepath.Join(testPath, entry.Name()))
146 | }
147 | }
148 |
149 | return
150 | }
151 |
152 | func CheckXmlMemoryLeaks(t *testing.T) {
153 | // LibxmlCleanUpParser() should only be called once during the lifetime of the
154 | // program, but because there's no way to know when the last test of the suite
155 | // runs in go, we can't accurately call it strictly once, so just avoid calling
156 | // it for now because it's known to cause crashes if called multiple times.
157 | //help.LibxmlCleanUpParser()
158 |
159 | if !help.LibxmlCheckMemoryLeak() {
160 | t.Errorf("Memory leaks: %d!!!", help.LibxmlGetMemoryAllocation())
161 | help.LibxmlReportMemoryLeak()
162 | }
163 | }
164 |
165 | // This is a simple test implementation of the VariableScope interface.
166 | // Currently it's os simple it ignores the namespace argument.
167 | type SimpleVariableScope struct {
168 | variables map[string]interface{}
169 | functions map[string]xpath.XPathFunction
170 | }
171 |
172 | func (s *SimpleVariableScope) ResolveVariable(name, ns string) interface{} {
173 | v, _ := s.variables[name]
174 | return v
175 | }
176 |
177 | func (s *SimpleVariableScope) IsFunctionRegistered(name, ns string) bool {
178 | _, ok := s.functions[name]
179 | return ok
180 | }
181 | func (s *SimpleVariableScope) ResolveFunction(name, ns string) xpath.XPathFunction {
182 | f, _ := s.functions[name]
183 | return f
184 | }
185 |
186 | func newSimpleVariableScope() *SimpleVariableScope {
187 | s := &SimpleVariableScope{make(map[string]interface{}), make(map[string]xpath.XPathFunction)}
188 | return s
189 | }
190 |
--------------------------------------------------------------------------------
/xml/helper.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "helper.h"
3 |
4 | //internal callback functions
5 | int xml_write_callback(void *ctx, char *buffer, int len) {
6 | if (len > 0) {
7 | xmlNodeWriteCallback(ctx, buffer, len);
8 | }
9 | return len;
10 | }
11 |
12 | int close_callback(void * ctx) {
13 | return 0;
14 | }
15 |
16 | xmlDoc* newEmptyXmlDoc() {
17 | return xmlNewDoc(BAD_CAST XML_DEFAULT_VERSION);
18 | }
19 |
20 | xmlElementType getNodeType(xmlNode *node) { return node->type; }
21 |
22 | void xmlFreeChars(char *buffer) {
23 | if (buffer) {
24 | xmlFree((xmlChar*)buffer);
25 | }
26 | }
27 |
28 | char *xmlDocDumpToString(xmlDoc *doc, void *encoding, int format) {
29 | xmlChar *buff;
30 | int buffersize;
31 | xmlDocDumpFormatMemoryEnc(doc, &buff, &buffersize, (char*)encoding, format);
32 | return (char*)buff;
33 | }
34 |
35 | char *htmlDocDumpToString(htmlDocPtr doc, int format) {
36 | xmlChar *buff;
37 | int buffersize;
38 | htmlDocDumpMemoryFormat(doc, &buff, &buffersize, format);
39 | return (char*)buff;
40 | }
41 |
42 | xmlDoc* xmlParse(void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) {
43 | const char *c_buffer = (char*)buffer;
44 | const char *c_url = (char*)url;
45 | const char *c_encoding = (char*)encoding;
46 | xmlDoc *doc = NULL;
47 |
48 | xmlResetLastError();
49 | doc = xmlReadMemory(c_buffer, buffer_len, c_url, c_encoding, options);
50 |
51 | if(doc == NULL) {
52 | xmlErrorPtr error;
53 | xmlFreeDoc(doc);
54 | error = xmlGetLastError();
55 | if(error != NULL && error_buffer != NULL && error->level >= XML_ERR_ERROR) {
56 | char *c_error_buffer = (char*)error_buffer;
57 | if (error->message != NULL) {
58 | strncpy(c_error_buffer, error->message, error_buffer_len-1);
59 | c_error_buffer[error_buffer_len-1] = '\0';
60 | }
61 | else {
62 | //snprintf(c_error_buffer, error_buffer_len, "xml parsing error:%d", error->code);
63 | }
64 | }
65 | }
66 | return doc;
67 | }
68 |
69 | xmlNode* xmlParseFragment(void *doc, void *buffer, int buffer_len, void *url, int options, void *error_buffer, int error_buffer_len) {
70 | xmlNodePtr root_element = NULL;
71 | xmlParserErrors errCode;
72 | errCode = xmlParseInNodeContext((xmlNodePtr)doc, buffer, buffer_len, options, &root_element);
73 | if (errCode != XML_ERR_OK) {
74 | if (error_buffer != NULL && error_buffer_len > 0) {
75 | //char *c_error_buffer = (char*)error_buffer;
76 | //snprintf(c_error_buffer, error_buffer_len, "xml fragemnt parsing error (xmlParserErrors):%d", errCode);
77 | }
78 | printf("errorcode %d\n", errCode);
79 | return NULL;
80 | }
81 | return root_element;
82 | }
83 |
84 | xmlNode* xmlParseFragmentAsDoc(void *doc, void *buffer, int buffer_len, void *url, void *encoding, int options, void *error_buffer, int error_buffer_len) {
85 | xmlDoc* tmpDoc = NULL;
86 | xmlNode* tmpRoot = NULL;
87 | tmpDoc = xmlReadMemory((char*)buffer, buffer_len, (char*)url, (char*)encoding, options);
88 | if (tmpDoc == NULL) {
89 | return NULL;
90 | }
91 | tmpRoot = xmlDocGetRootElement(tmpDoc);
92 | if (tmpRoot == NULL) {
93 | return NULL;
94 | }
95 | tmpRoot = xmlDocCopyNode(tmpRoot, doc, 1);
96 | xmlFreeDoc(tmpDoc);
97 | return tmpRoot;
98 | }
99 |
100 | void xmlSetContent(void *gonode, void *n, void *content) {
101 | xmlNode *node = (xmlNode*)n;
102 | xmlNode *child = node->children;
103 | xmlNode *next = NULL;
104 | unsigned char *encoded = xmlEncodeSpecialChars(node->doc, content);
105 | if (encoded) {
106 | while (child) {
107 | next = child->next ;
108 | xmlUnlinkNode(child);
109 | //xmlFreeNode(child);
110 | xmlUnlinkNodeCallback(child, gonode);
111 | child = next ;
112 | }
113 | xmlNodeSetContent(node, (xmlChar*)encoded);
114 | xmlFree(encoded);
115 | }
116 | }
117 |
118 | int xmlUnlinkNodeWithCheck(xmlNode *node) {
119 | if (xmlNodePtrCheck(node->parent)) {
120 | xmlUnlinkNode(node);
121 | return 1;
122 | }
123 | return 0;
124 | }
125 |
126 | int xmlNodePtrCheck(void *node) {
127 | if (node == (void*)(-1))
128 | return 0;
129 | return 1;
130 | }
131 |
132 | int xmlSaveNode(void *wbuffer, void *node, void *encoding, int options) {
133 | xmlSaveCtxtPtr savectx;
134 | const char *c_encoding = (char*)encoding;
135 |
136 | savectx = xmlSaveToIO(
137 | (xmlOutputWriteCallback)xml_write_callback,
138 | (xmlOutputCloseCallback)close_callback,
139 | wbuffer,
140 | encoding,
141 | options
142 | );
143 | xmlSaveTree(savectx, (xmlNode*)node);
144 | return xmlSaveClose(savectx);
145 | }
146 |
147 | void removeNamespace(xmlNs **source, xmlNs *target) {
148 | xmlNs *ns, *prevns = NULL;
149 |
150 | for (ns = *source; ns; ns = ns->next) {
151 | if (ns == target) {
152 | if (!prevns) {
153 | // we are the first element
154 | *source = ns->next;
155 | } else {
156 | prevns->next = ns->next;
157 | }
158 |
159 | break;
160 | }
161 |
162 | prevns = ns;
163 | }
164 | }
165 |
166 | void removeDefaultNamespace(xmlNs *ns, xmlNode *node) {
167 | removeNamespace(&node->nsDef, ns);
168 |
169 | xmlAttr *attr;
170 |
171 | for (attr = node->properties; attr; attr = attr->next) {
172 | if (!attr->ns)
173 | continue;
174 |
175 | removeNamespace(&attr->ns, ns);
176 | }
177 |
178 | if (node->ns == ns)
179 | node->ns = NULL;
180 |
181 | xmlNode *child;
182 |
183 | for (child = xmlFirstElementChild(node); child; child = xmlNextElementSibling(child)) {
184 | removeDefaultNamespace(ns, child);
185 | }
186 | }
187 |
188 | void xmlRemoveDefaultNamespace(xmlNode *node) {
189 | if (node->ns && node->ns->prefix) {
190 | // not a default namespace
191 | return;
192 | }
193 |
194 | removeDefaultNamespace(node->ns, node);
195 | }
196 |
--------------------------------------------------------------------------------
/html/node_test.go:
--------------------------------------------------------------------------------
1 | package html
2 |
3 | import "testing"
4 |
5 | func TestInnerScript(t *testing.T) {
6 | defer CheckXmlMemoryLeaks(t)
7 |
8 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
9 |
10 | if err != nil {
11 | t.Error("Parsing has error:", err)
12 | return
13 | }
14 |
15 | h1 := doc.Root().FirstChild().FirstChild().FirstChild()
16 | h1.SetInnerHtml("")
17 | if h1.String() != "" {
18 | t.Error("script does not match")
19 | }
20 | doc.Free()
21 | }
22 |
23 | func TestInnerScript2(t *testing.T) {
24 | defer CheckXmlMemoryLeaks(t)
25 | script := ``
56 |
57 | doc, err := Parse([]byte(""), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
58 |
59 | if err != nil {
60 | t.Error("Parsing has error:", err)
61 | return
62 | }
63 |
64 | h1 := doc.Root().FirstChild().FirstChild().FirstChild()
65 | h1.SetInnerHtml(script)
66 | if h1.String() != ""+script+"
" {
67 | t.Error("script does not match")
68 | }
69 | doc.Free()
70 | }
71 |
72 | func TestInsertMyselfBefore(t *testing.T) {
73 | input := `
74 |
75 | Title
76 |
77 |
78 |
79 | Welcome to Tritium Tester
80 |
81 |
82 | `
83 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
84 |
85 | if err != nil {
86 | t.Error("Parsing has error:", err)
87 | return
88 | }
89 |
90 | divs, _ := doc.Search("//div")
91 | if len(divs) != 1 {
92 | t.Error("should have 1 div")
93 | return
94 | }
95 |
96 | div := divs[0]
97 | div.InsertBefore(div)
98 |
99 | expected := `
100 |
101 |
102 |
103 | Title
104 |
105 |
106 |
107 | Welcome to Tritium Tester
108 |
109 |
110 | `
111 | if expected != doc.String() {
112 | t.Error("doc is not expected:\n", doc.String(), "\n", expected)
113 | }
114 | doc.Free()
115 | CheckXmlMemoryLeaks(t)
116 | }
117 |
118 | func TestInsertMyselfAfter(t *testing.T) {
119 | input := `
120 |
121 | Title
122 |
123 |
124 |
125 | Welcome to Tritium Tester
126 |
127 |
128 | `
129 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
130 |
131 | if err != nil {
132 | t.Error("Parsing has error:", err)
133 | return
134 | }
135 |
136 | divs, _ := doc.Search("//div")
137 | if len(divs) != 1 {
138 | t.Error("should have 1 div")
139 | return
140 | }
141 |
142 | div := divs[0]
143 | div.InsertAfter(div)
144 |
145 | expected := `
146 |
147 |
148 |
149 | Title
150 |
151 |
152 |
153 | Welcome to Tritium Tester
154 |
155 |
156 | `
157 | if expected != doc.String() {
158 | t.Error("doc is not expected:\n", doc.String(), "\n", expected)
159 | }
160 | doc.Free()
161 | CheckXmlMemoryLeaks(t)
162 | }
163 |
164 | func TestAddMyselfChild(t *testing.T) {
165 | input := `
166 |
167 | Title
168 |
169 |
170 |
171 | Welcome to Tritium Tester
172 |
173 |
174 | `
175 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
176 |
177 | if err != nil {
178 | t.Error("Parsing has error:", err)
179 | return
180 | }
181 |
182 | divs, _ := doc.Search("//div")
183 | if len(divs) != 1 {
184 | t.Error("should have 1 div")
185 | return
186 | }
187 |
188 | div := divs[0]
189 | div.AddChild(div)
190 |
191 | expected := `
192 |
193 |
194 |
195 | Title
196 |
197 |
198 |
199 | Welcome to Tritium Tester
200 |
201 |
202 | `
203 | if expected != doc.String() {
204 | t.Error("doc is not expected:\n", doc.String(), "\n", expected)
205 | }
206 | doc.Free()
207 | CheckXmlMemoryLeaks(t)
208 | }
209 |
210 | func TestRemoveMeRemoveParent(t *testing.T) {
211 | input := `
212 |
213 | Title
214 |
215 |
216 |
217 |
218 |
219 | `
220 | doc, err := Parse([]byte(input), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
221 |
222 | if err != nil {
223 | t.Error("Parsing has error:", err)
224 | return
225 | }
226 |
227 | divs, _ := doc.Search("//div")
228 | if len(divs) != 1 {
229 | t.Error("should have 1 div")
230 | return
231 | }
232 |
233 | div := divs[0]
234 | h1 := div.FirstChild()
235 | nodes, _ := h1.Search("..")
236 | h1.Remove()
237 | nodes, _ = h1.Search("..")
238 | if len(nodes) != 1 {
239 | t.Error("removed node should have a parent , i.e. its document")
240 | }
241 | nodes[0].Remove()
242 | doc.Free()
243 | CheckXmlMemoryLeaks(t)
244 | }
245 |
--------------------------------------------------------------------------------
/xpath/xpath.go:
--------------------------------------------------------------------------------
1 | package xpath
2 |
3 | /*
4 | #cgo pkg-config: libxml-2.0
5 |
6 | #include
7 | #include
8 | #include
9 |
10 | xmlNode* fetchNode(xmlNodeSet *nodeset, int index) {
11 | return nodeset->nodeTab[index];
12 | }
13 |
14 | xmlXPathObjectPtr go_resolve_variables(void* ctxt, char* name, char* ns);
15 | int go_can_resolve_function(void* ctxt, char* name, char* ns);
16 | void exec_xpath_function(xmlXPathParserContextPtr ctxt, int nargs);
17 |
18 | xmlXPathFunction go_resolve_function(void* ctxt, char* name, char* ns) {
19 | if (go_can_resolve_function(ctxt, name, ns))
20 | return exec_xpath_function;
21 |
22 | return 0;
23 | }
24 |
25 | static void set_var_lookup(xmlXPathContext* c, void* data) {
26 | c->varLookupFunc = (void *)go_resolve_variables;
27 | c->varLookupData = data;
28 | }
29 |
30 | static void set_function_lookup(xmlXPathContext* c, void* data) {
31 | c->funcLookupFunc = (void *)go_resolve_function;
32 | c->funcLookupData = data;
33 | }
34 |
35 | int getXPathObjectType(xmlXPathObject* o) {
36 | if(o == 0)
37 | return 0;
38 | return o->type;
39 | }
40 | */
41 | import "C"
42 |
43 | import "unsafe"
44 | import . "github.com/moovweb/gokogiri/util"
45 | import "runtime"
46 | import "errors"
47 |
48 | type XPath struct {
49 | ContextPtr *C.xmlXPathContext
50 | ResultPtr *C.xmlXPathObject
51 | }
52 |
53 | type XPathObjectType int
54 |
55 | const (
56 | XPATH_UNDEFINED XPathObjectType = 0
57 | XPATH_NODESET = 1
58 | XPATH_BOOLEAN = 2
59 | XPATH_NUMBER = 3
60 | XPATH_STRING = 4
61 | XPATH_POINT = 5
62 | XPATH_RANGE = 6
63 | XPATH_LOCATIONSET = 7
64 | XPATH_USERS = 8
65 | XPATH_XSLT_TREE = 9 // An XSLT value tree, non modifiable
66 | )
67 |
68 | type XPathFunction func(context VariableScope, args []interface{}) interface{}
69 |
70 | // Types that provide the VariableScope interface know how to resolve
71 | // XPath variable names into values.
72 |
73 | //This interface exist primarily for the benefit of XSLT processors.
74 | type VariableScope interface {
75 | ResolveVariable(string, string) interface{}
76 | IsFunctionRegistered(string, string) bool
77 | ResolveFunction(string, string) XPathFunction
78 | }
79 |
80 | func NewXPath(docPtr unsafe.Pointer) (xpath *XPath) {
81 | if docPtr == nil {
82 | return
83 | }
84 | xpath = &XPath{ContextPtr: C.xmlXPathNewContext((*C.xmlDoc)(docPtr)), ResultPtr: nil}
85 | runtime.SetFinalizer(xpath, (*XPath).Free)
86 | return
87 | }
88 |
89 | func (xpath *XPath) RegisterNamespace(prefix, href string) bool {
90 | var prefixPtr unsafe.Pointer = nil
91 | if len(prefix) > 0 {
92 | prefixBytes := AppendCStringTerminator([]byte(prefix))
93 | prefixPtr = unsafe.Pointer(&prefixBytes[0])
94 | }
95 |
96 | var hrefPtr unsafe.Pointer = nil
97 | if len(href) > 0 {
98 | hrefBytes := AppendCStringTerminator([]byte(href))
99 | hrefPtr = unsafe.Pointer(&hrefBytes[0])
100 | }
101 |
102 | result := C.xmlXPathRegisterNs(xpath.ContextPtr, (*C.xmlChar)(prefixPtr), (*C.xmlChar)(hrefPtr))
103 | return result == 0
104 | }
105 |
106 | // Evaluate an XPath and attempt to consume the result as a nodeset.
107 | func (xpath *XPath) EvaluateAsNodeset(nodePtr unsafe.Pointer, xpathExpr *Expression) (nodes []unsafe.Pointer, err error) {
108 | if nodePtr == nil {
109 | //evaluating xpath on a nil node returns no result.
110 | return
111 | }
112 |
113 | err = xpath.Evaluate(nodePtr, xpathExpr)
114 | if err != nil {
115 | return
116 | }
117 |
118 | nodes, err = xpath.ResultAsNodeset()
119 | return
120 | }
121 |
122 | // Evaluate an XPath. The returned result is stored in the struct. Call ReturnType to
123 | // discover the type of result, and call one of the ResultAs* functions to return a
124 | // copy of the result as a particular type.
125 | func (xpath *XPath) Evaluate(nodePtr unsafe.Pointer, xpathExpr *Expression) (err error) {
126 | if nodePtr == nil {
127 | //evaluating xpath on a nil node returns no result.
128 | return
129 | }
130 |
131 | oldXPContextDoc := xpath.ContextPtr.doc
132 | oldXPContextNode := xpath.ContextPtr.node
133 | oldXPProximityPosition := xpath.ContextPtr.proximityPosition
134 | oldXPContextSize := xpath.ContextPtr.contextSize
135 | oldXPNsNr := xpath.ContextPtr.nsNr
136 | oldXPNamespaces := xpath.ContextPtr.namespaces
137 |
138 | xpath.ContextPtr.node = (*C.xmlNode)(nodePtr)
139 | if xpath.ResultPtr != nil {
140 | C.xmlXPathFreeObject(xpath.ResultPtr)
141 | }
142 | xpath.ResultPtr = C.xmlXPathCompiledEval(xpathExpr.Ptr, xpath.ContextPtr)
143 |
144 | xpath.ContextPtr.doc = oldXPContextDoc
145 | xpath.ContextPtr.node = oldXPContextNode
146 | xpath.ContextPtr.proximityPosition = oldXPProximityPosition
147 | xpath.ContextPtr.contextSize = oldXPContextSize
148 | xpath.ContextPtr.nsNr = oldXPNsNr
149 | xpath.ContextPtr.namespaces = oldXPNamespaces
150 |
151 | if xpath.ResultPtr == nil {
152 | err = errors.New("err in evaluating xpath: " + xpathExpr.String())
153 | return
154 | }
155 | return
156 | }
157 |
158 | // Determine the actual return type of the XPath evaluation.
159 | func (xpath *XPath) ReturnType() XPathObjectType {
160 | return XPathObjectType(C.getXPathObjectType(xpath.ResultPtr))
161 | }
162 |
163 | // Get the XPath result as a nodeset.
164 | func (xpath *XPath) ResultAsNodeset() (nodes []unsafe.Pointer, err error) {
165 | if xpath.ResultPtr == nil {
166 | return
167 | }
168 |
169 | if xpath.ReturnType() != XPATH_NODESET {
170 | err = errors.New("Cannot convert XPath result to nodeset")
171 | }
172 |
173 | if nodesetPtr := xpath.ResultPtr.nodesetval; nodesetPtr != nil {
174 | if nodesetSize := int(nodesetPtr.nodeNr); nodesetSize > 0 {
175 | nodes = make([]unsafe.Pointer, nodesetSize)
176 | for i := 0; i < nodesetSize; i++ {
177 | nodes[i] = unsafe.Pointer(C.fetchNode(nodesetPtr, C.int(i)))
178 | }
179 | }
180 | }
181 | return
182 | }
183 |
184 | // Coerce the result into a string
185 | func (xpath *XPath) ResultAsString() (val string, err error) {
186 | if xpath.ReturnType() != XPATH_STRING {
187 | xpath.ResultPtr = C.xmlXPathConvertString(xpath.ResultPtr)
188 | }
189 | val = C.GoString((*C.char)(unsafe.Pointer(xpath.ResultPtr.stringval)))
190 | return
191 | }
192 |
193 | // Coerce the result into a number
194 | func (xpath *XPath) ResultAsNumber() (val float64, err error) {
195 | if xpath.ReturnType() != XPATH_NUMBER {
196 | xpath.ResultPtr = C.xmlXPathConvertNumber(xpath.ResultPtr)
197 | }
198 | val = float64(xpath.ResultPtr.floatval)
199 | return
200 | }
201 |
202 | // Coerce the result into a boolean
203 | func (xpath *XPath) ResultAsBoolean() (val bool, err error) {
204 | xpath.ResultPtr = C.xmlXPathConvertBoolean(xpath.ResultPtr)
205 | val = xpath.ResultPtr.boolval != 0
206 | return
207 | }
208 |
209 | // Add a variable resolver.
210 | func (xpath *XPath) SetResolver(v VariableScope) {
211 | C.set_var_lookup(xpath.ContextPtr, unsafe.Pointer(&v))
212 | C.set_function_lookup(xpath.ContextPtr, unsafe.Pointer(&v))
213 | }
214 |
215 | // SetContextPosition sets the internal values needed to
216 | // determine the values of position() and last() for the
217 | // current context node.
218 | func (xpath *XPath) SetContextPosition(position, size int) {
219 | xpath.ContextPtr.proximityPosition = C.int(position)
220 | xpath.ContextPtr.contextSize = C.int(size)
221 | }
222 |
223 | // GetContextPosition retrieves the internal values used to
224 | // determine the values of position() and last() for the
225 | // current context node.
226 |
227 | // This allows values to saved and restored during processing
228 | // of a document.
229 | func (xpath *XPath) GetContextPosition() (position, size int) {
230 | position = int(xpath.ContextPtr.proximityPosition)
231 | size = int(xpath.ContextPtr.contextSize)
232 | return
233 | }
234 |
235 | func (xpath *XPath) Free() {
236 | if xpath.ContextPtr != nil {
237 | C.xmlXPathFreeContext(xpath.ContextPtr)
238 | xpath.ContextPtr = nil
239 | }
240 | if xpath.ResultPtr != nil {
241 | C.xmlXPathFreeObject(xpath.ResultPtr)
242 | xpath.ResultPtr = nil
243 | }
244 | }
245 |
246 | func XPathObjectToValue(obj C.xmlXPathObjectPtr) (result interface{}) {
247 | rt := XPathObjectType(C.getXPathObjectType(obj))
248 | switch rt {
249 | case XPATH_NODESET, XPATH_XSLT_TREE:
250 | if nodesetPtr := obj.nodesetval; nodesetPtr != nil {
251 | if nodesetSize := int(nodesetPtr.nodeNr); nodesetSize > 0 {
252 | nodes := make([]unsafe.Pointer, nodesetSize)
253 | for i := 0; i < nodesetSize; i++ {
254 | nodes[i] = unsafe.Pointer(C.fetchNode(nodesetPtr, C.int(i)))
255 | }
256 | result = nodes
257 | return
258 | }
259 | }
260 | result = nil
261 | case XPATH_NUMBER:
262 | obj = C.xmlXPathConvertNumber(obj)
263 | result = float64(obj.floatval)
264 | case XPATH_BOOLEAN:
265 | obj = C.xmlXPathConvertBoolean(obj)
266 | result = obj.boolval != 0
267 | default:
268 | obj = C.xmlXPathConvertString(obj)
269 | result = C.GoString((*C.char)(unsafe.Pointer(obj.stringval)))
270 | }
271 | return
272 | }
273 |
--------------------------------------------------------------------------------
/xml/document_test.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | import (
4 | "fmt"
5 | "io/ioutil"
6 | "os"
7 | "path/filepath"
8 | "runtime"
9 | "strings"
10 | "testing"
11 | )
12 |
13 | func TestDocuments(t *testing.T) {
14 | if runtime.GOOS == "windows" {
15 | return
16 | }
17 | tests, err := collectTests("document")
18 |
19 | if len(err) > 0 {
20 | t.Errorf(err)
21 | }
22 |
23 | errors := make([]string, 0)
24 |
25 | print("\nTesting: Basic Parsing [")
26 |
27 | for _, test := range tests {
28 | error := RunDocumentParseTest(t, test)
29 |
30 | if error != nil {
31 | errors = append(errors, fmt.Sprintf("Test %v failed:\n%v\n", test, *error))
32 | print("F")
33 | } else {
34 | print(".")
35 | }
36 | }
37 |
38 | println("]")
39 |
40 | if len(errors) > 0 {
41 | errorMessage := "\t" + strings.Join(strings.Split(strings.Join(errors, "\n\n"), "\n"), "\n\t")
42 | t.Errorf("\nSome tests failed! (%d passed / %d total) :\n%v", len(tests)-len(errors), len(tests), errorMessage)
43 | } else {
44 | fmt.Printf("\nAll (%d) tests passed!\n", len(tests))
45 | }
46 | }
47 |
48 | func TestBufferedDocuments(t *testing.T) {
49 | if runtime.GOOS == "windows" {
50 | return
51 | }
52 | tests, err := collectTests("document")
53 |
54 | if len(err) > 0 {
55 | t.Errorf(err)
56 | }
57 |
58 | errors := make([]string, 0)
59 |
60 | print("\nTesting: Buffered Parsing [")
61 |
62 | for _, test := range tests {
63 | error := RunParseDocumentWithBufferTest(t, test)
64 |
65 | if error != nil {
66 | errors = append(errors, fmt.Sprintf("Test %v failed:\n%v\n", test, *error))
67 | print("F")
68 | } else {
69 | print(".")
70 | }
71 | }
72 |
73 | println("]")
74 |
75 | if len(errors) > 0 {
76 | errorMessage := "\t" + strings.Join(strings.Split(strings.Join(errors, "\n\n"), "\n"), "\n\t")
77 | t.Errorf("\nSome tests failed! (%d passed / %d total) :\n%v", len(tests)-len(errors), len(tests), errorMessage)
78 | } else {
79 | fmt.Printf("\nAll (%d) tests passed!\n", len(tests))
80 | }
81 | }
82 |
83 | func RunParseDocumentWithBufferTest(t *testing.T, name string) (error *string) {
84 | var errorMessage string
85 | offset := "\t"
86 |
87 | defer CheckXmlMemoryLeaks(t)
88 |
89 | input, output, dataError := getTestData(name)
90 |
91 | if len(dataError) > 0 {
92 | errorMessage += dataError
93 | }
94 |
95 | buffer := make([]byte, 500000)
96 |
97 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
98 |
99 | if err != nil {
100 | errorMessage = fmt.Sprintf("parsing error:%v\n", err)
101 | }
102 |
103 | if string(doc.ToBuffer(buffer)) != string(output) {
104 | formattedOutput := offset + strings.Join(strings.Split("["+doc.String()+"]", "\n"), "\n"+offset)
105 | formattedExpectedOutput := offset + strings.Join(strings.Split("["+string(output)+"]", "\n"), "\n"+offset)
106 | errorMessage = fmt.Sprintf("%v-- Got --\n%v\n%v-- Expected --\n%v\n", offset, formattedOutput, offset, formattedExpectedOutput)
107 | }
108 | doc.Free()
109 |
110 | if len(errorMessage) > 0 {
111 | return &errorMessage
112 | }
113 | return nil
114 |
115 | }
116 |
117 | func RunDocumentParseTest(t *testing.T, name string) (error *string) {
118 |
119 | var errorMessage string
120 | offset := "\t"
121 |
122 | defer CheckXmlMemoryLeaks(t)
123 |
124 | input, output, dataError := getTestData(name)
125 |
126 | if len(dataError) > 0 {
127 | errorMessage += dataError
128 | }
129 |
130 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
131 |
132 | if err != nil {
133 | errorMessage = fmt.Sprintf("parsing error:%v\n", err)
134 | }
135 |
136 | if doc.String() != string(output) {
137 | formattedOutput := offset + strings.Join(strings.Split("["+doc.String()+"]", "\n"), "\n"+offset)
138 | formattedExpectedOutput := offset + strings.Join(strings.Split("["+string(output)+"]", "\n"), "\n"+offset)
139 | errorMessage = fmt.Sprintf("%v-- Got --\n%v\n%v-- Expected --\n%v\n", offset, formattedOutput, offset, formattedExpectedOutput)
140 | testOutput := filepath.Join(name, "test_output.txt")
141 | ioutil.WriteFile(testOutput, []byte(doc.String()), os.FileMode(0666))
142 | errorMessage += fmt.Sprintf("%v Output test output to: %v\n", offset, testOutput)
143 | }
144 | doc.Free()
145 |
146 | if len(errorMessage) > 0 {
147 | return &errorMessage
148 | }
149 | return nil
150 |
151 | }
152 |
153 | func BenchmarkDocOutput(b *testing.B) {
154 | b.StopTimer()
155 |
156 | tests, err := collectTests("document")
157 |
158 | if len(err) > 0 {
159 | fmt.Printf(err)
160 | return
161 | }
162 |
163 | docs := make([]*XmlDocument, 0)
164 |
165 | for _, testName := range tests {
166 |
167 | input, _, dataError := getTestData(testName)
168 |
169 | if len(dataError) > 0 {
170 | fmt.Printf(dataError)
171 | return
172 | }
173 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
174 |
175 | if err != nil {
176 | fmt.Printf("parsing error:%v\n", err)
177 | return
178 | }
179 | docs = append(docs, doc)
180 | }
181 |
182 | b.StartTimer()
183 |
184 | for i := 0; i < b.N; i++ {
185 | for index, _ := range tests {
186 | _ = docs[index].String()
187 | }
188 | }
189 |
190 | }
191 |
192 | func BenchmarkDocOutputToBuffer(b *testing.B) {
193 | b.StopTimer()
194 |
195 | tests, err := collectTests("document")
196 |
197 | if len(err) > 0 {
198 | fmt.Printf(err)
199 | return
200 | }
201 |
202 | docs := make([]*XmlDocument, 0)
203 |
204 | for _, testName := range tests {
205 |
206 | input, _, dataError := getTestData(testName)
207 |
208 | if len(dataError) > 0 {
209 | fmt.Printf(dataError)
210 | return
211 | }
212 | doc, err := Parse(input, DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
213 |
214 | if err != nil {
215 | fmt.Printf("parsing error:%v\n", err)
216 | return
217 | }
218 | docs = append(docs, doc)
219 | }
220 |
221 | buffer := make([]byte, 500*1024)
222 |
223 | b.StartTimer()
224 |
225 | for i := 0; i < b.N; i++ {
226 |
227 | for index, _ := range tests {
228 |
229 | _ = docs[index].ToBuffer(buffer)
230 |
231 | }
232 | }
233 |
234 | }
235 |
236 | func TestRemoveNamespaces(t *testing.T) {
237 | xml := ""
238 | xml_no_namespace := ""
239 |
240 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
241 | doc.Root().RecursivelyRemoveNamespaces()
242 | doc2, _ := Parse([]byte(xml_no_namespace), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
243 |
244 | output := fmt.Sprintf("%v", doc)
245 | output_no_namespace := fmt.Sprintf("%v", doc2)
246 | if output != output_no_namespace {
247 | t.Errorf("Xml namespaces not removed!")
248 | }
249 | }
250 |
251 | func TestRemoveDefaultNamespace(t *testing.T) {
252 | xml := `
253 |
254 |
255 |
256 | xyz
257 |
258 |
259 |
260 | `
261 |
262 | xml_no_namespace := `
263 |
264 |
265 |
266 | xyz
267 |
268 |
269 |
270 | `
271 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
272 | doc.Root().RemoveDefaultNamespace()
273 | doc2, _ := Parse([]byte(xml_no_namespace), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
274 |
275 | output := fmt.Sprintf("%v", doc)
276 | output_no_namespace := fmt.Sprintf("%v", doc2)
277 | if output != output_no_namespace {
278 | t.Errorf("Default namespace not removed!")
279 | }
280 | }
281 |
282 | func TestNodeById(t *testing.T) {
283 | xml := "\n\n]>\nFailedSuccess"
284 |
285 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
286 | p := doc.NodeById("W11")
287 |
288 | if p == nil {
289 | t.Errorf("Did not find node by ID!")
290 | return
291 | }
292 |
293 | output := fmt.Sprintf("%v", p.Content())
294 | if output != "Success" {
295 | t.Errorf("Incorrect node selected by ID!")
296 | }
297 | }
298 |
299 | func TestUnparsedEntityURI(t *testing.T) {
300 | xml := "\n\n\n\n]>\n"
301 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
302 | expected := "test.jpg"
303 | actual := doc.UnparsedEntityURI("test")
304 |
305 | if actual == "" {
306 | t.Errorf("Did not find unparsed entity 'test'")
307 | return
308 | }
309 |
310 | if actual != expected {
311 | t.Errorf("Expected '%s', but got '%s' calling doc.UnparsedEntityURI", expected, actual)
312 | }
313 | }
314 |
--------------------------------------------------------------------------------
/xml/node_test.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | import "testing"
4 | import "fmt"
5 |
6 | func TestAddChild(t *testing.T) {
7 |
8 | docAssertion := func(doc *XmlDocument) (string, string, string) {
9 | expectedDocAfterAdd :=
10 | `
11 |
12 |
13 |
14 | `
15 | doc.Root().AddChild("")
16 |
17 | return doc.String(), expectedDocAfterAdd, "output of the xml doc after AddChild does not match"
18 | }
19 |
20 | nodeAssertion := func(doc *XmlDocument) (string, string, string) {
21 | expectedNodeAfterAdd :=
22 | `
23 |
24 | `
25 |
26 | return doc.Root().String(), expectedNodeAfterAdd, "the output of the xml root after AddChild does not match"
27 | }
28 |
29 | RunTest(t, "node", "add_child", nil, docAssertion, nodeAssertion)
30 |
31 | }
32 |
33 | func TestAddAncestorAsChild(t *testing.T) {
34 | docAssertion := func(doc *XmlDocument) (string, string, string) {
35 | expectedDocAfterAdd :=
36 | `
37 |
38 | `
39 |
40 | foo := doc.Root()
41 | bar := foo.FirstChild()
42 | holiday := bar.FirstChild()
43 | fun := holiday.FirstChild()
44 | fun.AddChild(bar)
45 |
46 | return doc.String(), expectedDocAfterAdd, "output of the xml doc after AddChild does not match"
47 | }
48 |
49 | nodeAssertion := func(doc *XmlDocument) (string, string, string) {
50 | expectedNodeAfterAdd :=
51 | ``
52 |
53 | return doc.Root().String(), expectedNodeAfterAdd, "the output of the xml root after AddChild does not match"
54 | }
55 |
56 | RunTest(t, "node", "add_ancestor", nil, docAssertion, nodeAssertion)
57 |
58 | }
59 |
60 | func addChildBenchLogic(b *testing.B, doc *XmlDocument) {
61 | root := doc.Root()
62 |
63 | for i := 0; i < b.N; i++ {
64 | root.AddChild("")
65 | }
66 | }
67 |
68 | func BenchmarkAddChild(b *testing.B) {
69 | RunBenchmark(b, "document", "big_un", addChildBenchLogic) // Run against big doc
70 | }
71 |
72 | func BenchmarkAddChildBigDoc(b *testing.B) {
73 | RunBenchmark(b, "node", "add_child", addChildBenchLogic)
74 | }
75 |
76 | func TestAddPreviousSibling(t *testing.T) {
77 |
78 | testLogic := func(t *testing.T, doc *XmlDocument) {
79 | err := doc.Root().AddPreviousSibling("")
80 |
81 | if err != nil {
82 | t.Errorf("Error adding previous sibling:\n%v\n", err.Error())
83 | }
84 | }
85 |
86 | RunTest(t, "node", "add_previous_sibling", testLogic)
87 | }
88 |
89 | func TestAddPreviousSibling2(t *testing.T) {
90 |
91 | testLogic := func(t *testing.T, doc *XmlDocument) {
92 | err := doc.Root().FirstChild().AddPreviousSibling("COOL")
93 |
94 | if err != nil {
95 | t.Errorf("Error adding previous sibling:\n%v\n", err.Error())
96 | }
97 | }
98 |
99 | RunTest(t, "node", "add_previous_sibling2", testLogic)
100 | }
101 |
102 | func TestAddNextSibling(t *testing.T) {
103 |
104 | testLogic := func(t *testing.T, doc *XmlDocument) {
105 | doc.Root().AddNextSibling("")
106 | }
107 |
108 | RunTest(t, "node", "add_next_sibling", testLogic)
109 | }
110 |
111 | func TestSetContent(t *testing.T) {
112 |
113 | testLogic := func(t *testing.T, doc *XmlDocument) {
114 | root := doc.Root()
115 | root.SetContent("")
116 | }
117 |
118 | RunTest(t, "node", "set_content", testLogic)
119 | }
120 |
121 | func BenchmarkSetContent(b *testing.B) {
122 |
123 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
124 | root := doc.Root()
125 | for i := 0; i < b.N; i++ {
126 | root.SetContent("")
127 | }
128 | }
129 |
130 | RunBenchmark(b, "node", "set_content", benchmarkLogic)
131 | }
132 |
133 | func TestSetChildren(t *testing.T) {
134 | testLogic := func(t *testing.T, doc *XmlDocument) {
135 | root := doc.Root()
136 | root.SetChildren("")
137 | }
138 |
139 | RunTest(t, "node", "set_children", testLogic)
140 | }
141 |
142 | func BenchmarkSetChildren(b *testing.B) {
143 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
144 | root := doc.Root()
145 | for i := 0; i < b.N; i++ {
146 | root.SetChildren("")
147 | }
148 | }
149 |
150 | RunBenchmark(b, "node", "set_children", benchmarkLogic)
151 | }
152 |
153 | func TestReplace(t *testing.T) {
154 |
155 | testLogic := func(t *testing.T, doc *XmlDocument) {
156 | root := doc.Root()
157 | root.Replace("")
158 | }
159 |
160 | rootAssertion := func(doc *XmlDocument) (string, string, string) {
161 | root := doc.Root()
162 | return root.String(), "", "the output of the xml root does not match"
163 | }
164 |
165 | RunTest(t, "node", "replace", testLogic, rootAssertion)
166 | }
167 |
168 | func BenchmarkReplace(b *testing.B) {
169 |
170 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
171 | root := doc.Root()
172 | for i := 0; i < b.N; i++ {
173 | root.Replace("")
174 | root = doc.Root() //once the node has been replaced, we need to get a new node
175 | }
176 | }
177 |
178 | RunBenchmark(b, "node", "replace", benchmarkLogic)
179 | }
180 |
181 | func TestAttributes(t *testing.T) {
182 |
183 | testLogic := func(t *testing.T, doc *XmlDocument) {
184 |
185 | root := doc.Root()
186 | attributes := root.Attributes()
187 |
188 | if len(attributes) != 2 || attributes["myname"].String() != "ff" {
189 | fmt.Printf("%v, %q\n", attributes, attributes["myname"].String())
190 | t.Error("root's attributes do not match")
191 | }
192 |
193 | child := root.FirstChild()
194 | childAttributes := child.Attributes()
195 |
196 | if len(childAttributes) != 1 || childAttributes["class"].String() != "shine" {
197 | t.Error("child's attributes do not match")
198 | }
199 | }
200 |
201 | RunTest(t, "node", "attributes", testLogic)
202 |
203 | }
204 |
205 | func BenchmarkAttributes(b *testing.B) {
206 | benchmarkLogic := func(b *testing.B, doc *XmlDocument) {
207 |
208 | root := doc.Root()
209 |
210 | for i := 0; i < b.N; i++ {
211 | root.SetAttr("garfield", "spaghetti")
212 | }
213 | }
214 |
215 | RunBenchmark(b, "node", "attributes", benchmarkLogic)
216 | }
217 |
218 | func TestInner(t *testing.T) {
219 |
220 | testLogic := func(t *testing.T, doc *XmlDocument) {
221 | root := doc.Root()
222 | root.SetInnerHtml("")
223 | }
224 |
225 | RunTest(t, "node", "inner", testLogic)
226 | }
227 | func TestInnerWithAttributes(t *testing.T) {
228 |
229 | testLogic := func(t *testing.T, doc *XmlDocument) {
230 | root := doc.Root()
231 | root.SetInnerHtml("")
232 | }
233 |
234 | RunTest(t, "node", "inner_with_attributes", testLogic)
235 | }
236 |
237 | func TestSetNamespace(t *testing.T) {
238 | testLogic := func(t *testing.T, doc *XmlDocument) {
239 | root := doc.Root()
240 | root.SetNamespace("foo", "bar")
241 | }
242 |
243 | RunTest(t, "node", "set_namespace", testLogic)
244 | }
245 |
246 | func TestSetDefaultNamespace(t *testing.T) {
247 | testLogic := func(t *testing.T, doc *XmlDocument) {
248 | root := doc.Root()
249 | root.SetNamespace("", "bar")
250 | }
251 |
252 | RunTest(t, "node", "set_default_namespace", testLogic)
253 | }
254 |
255 | func TestDeclareNamespace(t *testing.T) {
256 | testLogic := func(t *testing.T, doc *XmlDocument) {
257 | root := doc.Root()
258 | root.DeclareNamespace("foo", "bar")
259 | child := root.FirstChild()
260 | child.SetNamespace("foo", "bar")
261 | }
262 |
263 | RunTest(t, "node", "declare_namespace", testLogic)
264 | }
265 |
266 | func TestNamespaceAttribute(t *testing.T) {
267 | testLogic := func(t *testing.T, doc *XmlDocument) {
268 | root := doc.Root()
269 | root.DeclareNamespace("foo", "bar")
270 | root.SetNsAttr("bar", "hello", "world")
271 | }
272 |
273 | RunTest(t, "node", "set_ns_attr", testLogic)
274 | }
275 |
276 | func TestUnformattedXml(t *testing.T) {
277 | xml := "\n\n\tTest\n"
278 | expected := "\n\tTest\n"
279 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
280 | root := doc.Root()
281 | out := root.ToUnformattedXml()
282 | if out != expected {
283 | t.Errorf("TestUnformattedXml Expected: %v\nActual: %v", expected, out)
284 | }
285 |
286 | }
287 |
288 | func TestSerializewithFomat(t *testing.T) {
289 | xml := "\n\n\tTest\n"
290 | expected := "\n\tTest\n"
291 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
292 | root := doc.Root()
293 | b, size := root.SerializeWithFormat(XML_SAVE_AS_XML|XML_SAVE_NO_DECL, nil, nil)
294 | if b == nil {
295 | t.Errorf("SerializeWithFormat Expected: %v\nActual: (nil)", expected)
296 | return
297 | }
298 | out := string(b[:size])
299 | if out != expected {
300 | t.Errorf("SerializeWithFormat Expected: %v\nActual: %v", expected, out)
301 | }
302 |
303 | }
304 |
305 | func TestEvalVariableExpr(t *testing.T) {
306 | xml := ""
307 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
308 | s := newSimpleVariableScope()
309 | root := doc.Root()
310 | s.variables["spec"] = "XSLT 1.0"
311 | s.variables["number"] = 7
312 | v, err := root.EvalXPath("$spec", s)
313 | if err != nil {
314 | t.Errorf("%v", err)
315 | }
316 | out := v.(string)
317 | if out != "XSLT 1.0" {
318 | t.Errorf("TestEvalVariableExpr Expected: %v\nActual: %v", "XSLT 1.0", out)
319 | }
320 | }
321 |
322 | func TestEvalStringExpr(t *testing.T) {
323 | xml := ""
324 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
325 | root := doc.Root()
326 | v, err := root.EvalXPath("\"Hello\"", nil)
327 | if err != nil {
328 | t.Errorf("%v", err)
329 | }
330 | out := v.(string)
331 | if out != "Hello" {
332 | t.Errorf("TestEvalStringExpr Expected: %v\nActual: %v", "Hello", out)
333 | }
334 | }
335 |
336 | func TestEvalNumericExpr(t *testing.T) {
337 | xml := ""
338 | doc, _ := Parse([]byte(xml), DefaultEncodingBytes, nil, DefaultParseOption, DefaultEncodingBytes)
339 | root := doc.Root()
340 | v, err := root.EvalXPath("7", nil)
341 | if err != nil {
342 | t.Errorf("%v", err)
343 | }
344 | out := v.(float64)
345 | if out != 7 {
346 | t.Errorf("TestEvalNumericExpr Expected: %v\nActual: %v", 7, out)
347 | }
348 | }
349 |
350 | func TestDisableOutputEscaping(t *testing.T) {
351 | doc := CreateEmptyDocument(DefaultEncodingBytes, DefaultEncodingBytes)
352 | n := doc.CreateTextNode("
")
353 |
354 | //normal usage escapes the output
355 | escaped := "<br/>"
356 | if n.String() != escaped {
357 | t.Errorf("TestDisableOutputEscaping (escaping enabled) Expected: %v\nActual: %v", escaped, n.String())
358 | }
359 |
360 | //now we disable the output escaping
361 | unescaped := "
"
362 | n.DisableOutputEscaping()
363 | if n.String() != unescaped {
364 | t.Errorf("TestDisableOutputEscaping (escaping disabled) Expected: %v\nActual: %v", unescaped, n.String())
365 | }
366 | }
367 |
--------------------------------------------------------------------------------
/css/css.go:
--------------------------------------------------------------------------------
1 | package css
2 |
3 | // package main
4 |
5 | import (
6 | "fmt"
7 | "github.com/moovweb/rubex"
8 | "strings"
9 | )
10 |
11 | type Lexeme int
12 |
13 | const (
14 | SPACES Lexeme = iota
15 | COMMA
16 | UNIVERSAL
17 | TYPE
18 | ELEMENT
19 | CLASS
20 | ID
21 | LBRACKET
22 | RBRACKET
23 | ATTR_NAME
24 | ATTR_VALUE
25 | EQUALS
26 | CONTAINS_CLASS
27 | DASH_PREFIXED
28 | STARTS_WITH
29 | ENDS_WITH
30 | CONTAINS
31 | MATCH_OP
32 | PSEUDO_CLASS
33 | FIRST_CHILD
34 | FIRST_OF_TYPE
35 | NTH_CHILD
36 | NTH_OF_TYPE
37 | ONLY_CHILD
38 | ONLY_OF_TYPE
39 | LAST_CHILD
40 | LAST_OF_TYPE
41 | NOT
42 | LPAREN
43 | RPAREN
44 | COEFFICIENT
45 | SIGNED
46 | UNSIGNED
47 | ODD
48 | EVEN
49 | N
50 | OPERATOR
51 | PLUS
52 | MINUS
53 | BINOMIAL
54 | ADJACENT_TO
55 | PRECEDES
56 | PARENT_OF
57 | ANCESTOR_OF
58 | // and a counter ... I can't believe I didn't think of this sooner
59 | NUM_LEXEMES
60 | )
61 |
62 | var pattern [NUM_LEXEMES]string
63 |
64 | func init() {
65 | pattern[SPACES] = `\s+`
66 | pattern[COMMA] = `\s*,`
67 | pattern[UNIVERSAL] = `\*`
68 | pattern[TYPE] = `[_a-zA-Z]\w*`
69 | pattern[ELEMENT] = `(\*|[_a-zA-Z]\w*)`
70 | pattern[CLASS] = `\.[-\w]+`
71 | pattern[ID] = `\#[-\w]+`
72 | pattern[LBRACKET] = `\[`
73 | pattern[RBRACKET] = `\]`
74 | pattern[ATTR_NAME] = `[-_:a-zA-Z][-\w:.]*`
75 | pattern[ATTR_VALUE] = `("(\\.|[^"\\])*"|'(\\.|[^'\\])*')`
76 | pattern[EQUALS] = `=`
77 | pattern[CONTAINS_CLASS] = `~=`
78 | pattern[DASH_PREFIXED] = `\|=`
79 | pattern[STARTS_WITH] = `\^=`
80 | pattern[ENDS_WITH] = `\$=`
81 | pattern[CONTAINS] = `\*=`
82 | pattern[MATCH_OP] = "(" + strings.Join([]string{pattern[EQUALS], pattern[CONTAINS_CLASS], pattern[DASH_PREFIXED], pattern[STARTS_WITH], pattern[ENDS_WITH], pattern[CONTAINS]}, "|") + ")"
83 | pattern[PSEUDO_CLASS] = `:[-a-z]+`
84 | pattern[FIRST_CHILD] = `:first-child`
85 | pattern[FIRST_OF_TYPE] = `:first-of-type`
86 | pattern[NTH_CHILD] = `:nth-child`
87 | pattern[NTH_OF_TYPE] = `:nth-of-type`
88 | pattern[ONLY_CHILD] = `:only-child`
89 | pattern[ONLY_OF_TYPE] = `:only-of-type`
90 | pattern[LAST_CHILD] = `:last-child`
91 | pattern[LAST_OF_TYPE] = `:last-of-type`
92 | pattern[NOT] = `:not`
93 | pattern[LPAREN] = `\s*\(`
94 | pattern[RPAREN] = `\s*\)`
95 | pattern[COEFFICIENT] = `[-+]?(\d+)?`
96 | pattern[SIGNED] = `[-+]?\d+`
97 | pattern[UNSIGNED] = `\d+`
98 | pattern[ODD] = `odd`
99 | pattern[EVEN] = `even`
100 | pattern[N] = `[nN]`
101 | pattern[OPERATOR] = `[-+]`
102 | pattern[PLUS] = `\+`
103 | pattern[MINUS] = `-`
104 | pattern[BINOMIAL] = strings.Join([]string{pattern[COEFFICIENT], pattern[N], `\s*`, pattern[OPERATOR], `\s*`, pattern[UNSIGNED]}, "")
105 | pattern[ADJACENT_TO] = `\s*\+`
106 | pattern[PRECEDES] = `\s*~`
107 | pattern[PARENT_OF] = `\s*>`
108 | pattern[ANCESTOR_OF] = `\s+`
109 | }
110 |
111 | type Scope int
112 |
113 | const (
114 | GLOBAL Scope = iota
115 | LOCAL
116 | )
117 |
118 | func Convert(css string, scope Scope) string {
119 | matchers := allocate()
120 | defer deallocate(matchers)
121 | xpath, _ := selectors(matchers, []byte(css), scope)
122 | return xpath
123 | }
124 |
125 | func allocate() []*rubex.Regexp {
126 | // some overlap in here, but it'll make the parsing functions clearer
127 | matchers := make([]*rubex.Regexp, 0, NUM_LEXEMES)
128 | for _, p := range pattern {
129 | matchers = append(matchers, rubex.MustCompile(`\A`+p))
130 | }
131 | return matchers
132 | }
133 |
134 | func deallocate(matchers []*rubex.Regexp) {
135 | for _, m := range matchers {
136 | m.Free()
137 | }
138 | }
139 |
140 | func selectors(matchers []*rubex.Regexp, input []byte, scope Scope) (string, []byte) {
141 | x, input := selector(matchers, input, scope)
142 | xs := []string{x}
143 | for peek(matchers, COMMA, input) {
144 | _, input = token(matchers, COMMA, input)
145 | x, input = selector(matchers, input, scope)
146 | xs = append(xs, x)
147 | }
148 | return strings.Join(xs, " | "), input
149 | }
150 |
151 | func selector(matchers []*rubex.Regexp, input []byte, scope Scope) (string, []byte) {
152 | var combinator Lexeme
153 | var xs []string
154 | if scope == LOCAL {
155 | xs = []string{"."}
156 | }
157 | if matched, remainder := token(matchers, PARENT_OF, input); matched != nil {
158 | combinator, input = PARENT_OF, remainder
159 | } else {
160 | combinator = ANCESTOR_OF
161 | }
162 | x, input := sequence(matchers, input, combinator)
163 | xs = append(xs, x)
164 | for {
165 | if matched, remainder := token(matchers, ADJACENT_TO, input); matched != nil {
166 | combinator, input = ADJACENT_TO, remainder
167 | } else if matched, remainder := token(matchers, PRECEDES, input); matched != nil {
168 | combinator, input = PRECEDES, remainder
169 | } else if matched, remainder := token(matchers, PARENT_OF, input); matched != nil {
170 | combinator, input = PARENT_OF, remainder
171 | } else if matched, remainder := token(matchers, ANCESTOR_OF, input); matched != nil {
172 | combinator, input = ANCESTOR_OF, remainder
173 | } else {
174 | break
175 | }
176 | x, input = sequence(matchers, input, combinator)
177 | xs = append(xs, x)
178 | }
179 | return strings.Join(xs, ""), input
180 | }
181 |
182 | func sequence(matchers []*rubex.Regexp, input []byte, combinator Lexeme) (string, []byte) {
183 | _, input = token(matchers, SPACES, input)
184 | x, ps := "", []string{}
185 |
186 | switch combinator {
187 | case ANCESTOR_OF:
188 | x = "/descendant-or-self::*/*"
189 | case PARENT_OF:
190 | x = "/child::*"
191 | case PRECEDES:
192 | x = "/following-sibling::*"
193 | case ADJACENT_TO:
194 | x = "/following-sibling::*"
195 | ps = append(ps, "position()=1")
196 | }
197 |
198 | if e, remainder := token(matchers, ELEMENT, input); e != nil {
199 | input = remainder
200 | if len(ps) > 0 {
201 | ps = append(ps, " and ")
202 | }
203 | ps = append(ps, "self::"+string(e))
204 | if !(peek(matchers, ID, input) || peek(matchers, CLASS, input) || peek(matchers, PSEUDO_CLASS, input) || peek(matchers, LBRACKET, input)) {
205 | pstr := strings.Join(ps, "")
206 | if pstr != "" {
207 | pstr = fmt.Sprintf("[%s]", pstr)
208 | }
209 | return x + pstr, input
210 | }
211 | }
212 | q, input, connective := qualifier(matchers, input)
213 | if q == "" {
214 | panic("Invalid CSS selector")
215 | }
216 | if len(ps) > 0 {
217 | ps = append(ps, connective)
218 | }
219 | ps = append(ps, q)
220 | for q, r, c := qualifier(matchers, input); q != ""; q, r, c = qualifier(matchers, input) {
221 | ps, input = append(ps, c, q), r
222 | }
223 | pstr := strings.Join(ps, "")
224 | if combinator != NOT {
225 | pstr = fmt.Sprintf("[%s]", pstr)
226 | }
227 | return x + pstr, input
228 | }
229 |
230 | func qualifier(matchers []*rubex.Regexp, input []byte) (string, []byte, string) {
231 | p, connective := "", ""
232 | if t, remainder := token(matchers, CLASS, input); t != nil {
233 | p = fmt.Sprintf(`contains(concat(" ", @class, " "), " %s ")`, string(t[1:]))
234 | input = remainder
235 | connective = " and "
236 | } else if t, remainder := token(matchers, ID, input); t != nil {
237 | p, input, connective = fmt.Sprintf(`@id="%s"`, string(t[1:])), remainder, " and "
238 | } else if peek(matchers, PSEUDO_CLASS, input) {
239 | p, input, connective = pseudoClass(matchers, input)
240 | } else if peek(matchers, LBRACKET, input) {
241 | p, input = attribute(matchers, input)
242 | connective = " and "
243 | }
244 | return p, input, connective
245 | }
246 |
247 | func pseudoClass(matchers []*rubex.Regexp, input []byte) (string, []byte, string) {
248 | class, input := token(matchers, PSEUDO_CLASS, input)
249 | var p, connective string
250 | switch string(class) {
251 | case ":first-child":
252 | p, connective = "position()=1", " and "
253 | case ":first-of-type":
254 | p, connective = "position()=1", "]["
255 | case ":last-child":
256 | p, connective = "position()=last()", " and "
257 | case ":last-of-type":
258 | p, connective = "position()=last()", "]["
259 | case ":only-child":
260 | p, connective = "position() = 1 and position() = last()", " and "
261 | case ":only-of-type":
262 | p, connective = "position() = 1 and position() = last()", "]["
263 | case ":nth-child":
264 | p, input = nth(matchers, input)
265 | connective = " and "
266 | case ":nth-of-type":
267 | p, input = nth(matchers, input)
268 | connective = "]["
269 | case ":not":
270 | p, input = negate(matchers, input)
271 | connective = " and "
272 | default:
273 | panic(`Cannot convert CSS pseudo-class "` + string(class) + `" to XPath.`)
274 | }
275 | return p, input, connective
276 | }
277 |
278 | func nth(matchers []*rubex.Regexp, input []byte) (string, []byte) {
279 | lparen, input := token(matchers, LPAREN, input)
280 | if lparen == nil {
281 | panic(":nth-child and :nth-of-type require an parenthesized argument")
282 | }
283 | _, input = token(matchers, SPACES, input)
284 | var expr string
285 | if e, rem := token(matchers, EVEN, input); e != nil {
286 | expr, input = "position() mod 2 = 0", rem
287 | } else if e, rem := token(matchers, ODD, input); e != nil {
288 | expr, input = "position() mod 2 = 1", rem
289 | } else if e, _ := token(matchers, BINOMIAL, input); e != nil {
290 | var coefficient, operator, constant []byte
291 | coefficient, input = token(matchers, COEFFICIENT, input)
292 | switch string(coefficient) {
293 | case "", "+":
294 | coefficient = []byte("1")
295 | case "-":
296 | coefficient = []byte("-1")
297 | }
298 | _, input = token(matchers, N, input)
299 | _, input = token(matchers, SPACES, input)
300 | operator, input = token(matchers, OPERATOR, input)
301 | _, input = token(matchers, SPACES, input)
302 | constant, input = token(matchers, UNSIGNED, input)
303 | expr = fmt.Sprintf("(position() %s %s) mod %s = 0", invert(string(operator)), string(constant), string(coefficient))
304 | } else if e, rem := token(matchers, SIGNED, input); e != nil {
305 | expr, input = "position() = "+string(e), rem
306 | } else {
307 | panic("Invalid argument to :nth-child or :nth-of-type.")
308 | }
309 | fmt.Println(string(input))
310 | _, input = token(matchers, SPACES, input)
311 | rparen, input := token(matchers, RPAREN, input)
312 | if rparen == nil {
313 | panic("Unterminated argument to :nth-child or :nth-of-type.")
314 | }
315 | return expr, input
316 | }
317 |
318 | func invert(op string) string {
319 | op = strings.TrimSpace(op)
320 | if op == "+" {
321 | op = "-"
322 | } else {
323 | op = "+"
324 | }
325 | return op
326 | }
327 |
328 | func negate(matchers []*rubex.Regexp, input []byte) (string, []byte) {
329 | _, input = token(matchers, SPACES, input)
330 | lparen, input := token(matchers, LPAREN, input)
331 | if lparen == nil {
332 | panic(":not requires a parenthesized argument.")
333 | }
334 | _, input = token(matchers, SPACES, input)
335 | p, input := sequence(matchers, input, NOT)
336 | _, input = token(matchers, SPACES, input)
337 | rparen, input := token(matchers, RPAREN, input)
338 | if rparen == nil {
339 | panic("Unterminated argument to :not.")
340 | }
341 | return fmt.Sprintf("not(%s)", p), input
342 | }
343 |
344 | func attribute(matchers []*rubex.Regexp, input []byte) (string, []byte) {
345 | _, input = token(matchers, LBRACKET, input)
346 | _, input = token(matchers, SPACES, input)
347 | name, input := token(matchers, ATTR_NAME, input)
348 | if name == nil {
349 | panic("Attribute selector requires an attribute name.")
350 | }
351 | _, input = token(matchers, SPACES, input)
352 | if rbracket, remainder := token(matchers, RBRACKET, input); rbracket != nil {
353 | return "@" + string(name), remainder
354 | }
355 | op, input := token(matchers, MATCH_OP, input)
356 | if op == nil {
357 | panic("Missing operator in attribute selector.")
358 | }
359 | _, input = token(matchers, SPACES, input)
360 | val, input := token(matchers, ATTR_VALUE, input)
361 | if val == nil {
362 | panic("Missing value in attribute selector.")
363 | }
364 | _, input = token(matchers, SPACES, input)
365 | rbracket, input := token(matchers, RBRACKET, input)
366 | if rbracket == nil {
367 | panic("Unterminated attribute selector.")
368 | }
369 | var expr string
370 | n, v := string(name), string(val)
371 | switch string(op) {
372 | case "=":
373 | expr = fmt.Sprintf("@%s=%s", n, v)
374 | case "~=":
375 | expr = fmt.Sprintf(`contains(concat(" ", @%s, " "), concat(" ", %s, " "))`, n, v)
376 | case "|=":
377 | expr = fmt.Sprintf(`(@%s=%s or starts-with(@%s, concat(%s, "-")))`, n, v, n, v)
378 | case "^=":
379 | expr = fmt.Sprintf("starts-with(@%s, %s)", n, v)
380 | case "$=":
381 | // oy, libxml doesn't support ends-with
382 | // generate something like: div[substring(@class, string-length(@class) - string-length('foo') + 1) = 'foo']
383 | expr = fmt.Sprintf("substring(@%s, string-length(@%s) - string-length(%s) + 1) = %s", n, n, v, v)
384 | case "*=":
385 | expr = fmt.Sprintf("contains(@%s, %s)", n, v)
386 | }
387 | return expr, input
388 | }
389 |
390 | func token(matchers []*rubex.Regexp, lexeme Lexeme, input []byte) ([]byte, []byte) {
391 | matched := matchers[lexeme].Find(input)
392 | length := len(matched)
393 | if length == 0 {
394 | matched = nil
395 | }
396 | return matched, input[length:]
397 | }
398 |
399 | func peek(matchers []*rubex.Regexp, lexeme Lexeme, input []byte) bool {
400 | matched, _ := token(matchers, lexeme, input)
401 | return matched != nil
402 | }
403 |
--------------------------------------------------------------------------------
/xml/document.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | /*
4 | #cgo pkg-config: libxml-2.0
5 |
6 | #include "helper.h"
7 | */
8 | import "C"
9 |
10 | import (
11 | "errors"
12 | "github.com/moovweb/gokogiri/help"
13 | . "github.com/moovweb/gokogiri/util"
14 | "github.com/moovweb/gokogiri/xpath"
15 | "os"
16 | "unsafe"
17 | )
18 |
19 | type Document interface {
20 | /* Nokogiri APIs */
21 | CreateElementNode(string) *ElementNode
22 | CreateCDataNode(string) *CDataNode
23 | CreateTextNode(string) *TextNode
24 | CreateCommentNode(string) *CommentNode
25 | CreatePINode(string, string) *ProcessingInstructionNode
26 | ParseFragment([]byte, []byte, ParseOption) (*DocumentFragment, error)
27 |
28 | DocPtr() unsafe.Pointer
29 | DocType() NodeType
30 | DocRef() Document
31 | InputEncoding() []byte
32 | OutputEncoding() []byte
33 | DocXPathCtx() *xpath.XPath
34 | AddUnlinkedNode(unsafe.Pointer)
35 | RemoveUnlinkedNode(unsafe.Pointer) bool
36 | Free()
37 | String() string
38 | Root() *ElementNode
39 | NodeById(string) *ElementNode
40 | BookkeepFragment(*DocumentFragment)
41 |
42 | RecursivelyRemoveNamespaces() error
43 | UnparsedEntityURI(string) string
44 | Uri() string
45 | }
46 |
47 | // ParseOption values allow you to tune the behaviour of the parsing engine.
48 | type ParseOption int
49 |
50 | const (
51 | XML_PARSE_RECOVER ParseOption = 1 << iota // recover on errors
52 | XML_PARSE_NOENT // substitute entities
53 | XML_PARSE_DTDLOAD // load the external subset
54 | XML_PARSE_DTDATTR // default DTD attributes
55 | XML_PARSE_DTDVALID // validate with the DTD
56 | XML_PARSE_NOERROR // suppress error reports
57 | XML_PARSE_NOWARNING // suppress warning reports
58 | XML_PARSE_PEDANTIC // pedantic error reporting
59 | XML_PARSE_NOBLANKS // remove blank nodes
60 | XML_PARSE_SAX1 // use the SAX1 interface internally
61 | XML_PARSE_XINCLUDE // Implement XInclude substitition
62 | XML_PARSE_NONET // Forbid network access
63 | XML_PARSE_NODICT // Do not reuse the context dictionnary
64 | XML_PARSE_NSCLEAN // remove redundant namespaces declarations
65 | XML_PARSE_NOCDATA // merge CDATA as text nodes
66 | XML_PARSE_NOXINCNODE // do not generate XINCLUDE START/END nodes
67 | XML_PARSE_COMPACT // compact small text nodes; makes tree read-only
68 | XML_PARSE_OLD10 // parse using XML-1.0 before update 5
69 | XML_PARSE_NOBASEFIX // do not fixup XINCLUDE xml//base uris
70 | XML_PARSE_HUGE // relax any hardcoded limit from the parser
71 | XML_PARSE_OLDSAX // parse using SAX2 interface before 2.7.0
72 | XML_PARSE_IGNORE_ENC // ignore internal document encoding hint
73 | XML_PARSE_BIG_LINES // Store big lines numbers in text PSVI field
74 | )
75 |
76 | //DefaultParseOption provides liberal parsing highly tolerant of invalid documents. Errors and warnings
77 | // are suppressed and the DTD is not processed.
78 | const DefaultParseOption ParseOption = XML_PARSE_RECOVER |
79 | XML_PARSE_NONET |
80 | XML_PARSE_NOERROR |
81 | XML_PARSE_NOWARNING
82 |
83 | //StrictParseOption provides standard-compliant parsing. The DTD is processed, entity
84 | // substitions are made, and errors and warnings are reported back.
85 | const StrictParseOption ParseOption = XML_PARSE_NOENT |
86 | XML_PARSE_DTDLOAD |
87 | XML_PARSE_DTDATTR |
88 | XML_PARSE_NOCDATA
89 |
90 | //DefaultEncoding is UTF-8, which is also the default for both libxml2 and Go strings.
91 | const DefaultEncoding = "utf-8"
92 |
93 | var ERR_FAILED_TO_PARSE_XML = errors.New("failed to parse xml input")
94 |
95 | /*
96 | XmlDocument is the primary interface for working with XML documents.
97 | */
98 | type XmlDocument struct {
99 | Ptr *C.xmlDoc
100 | Me Document
101 | Node
102 | InEncoding []byte
103 | OutEncoding []byte
104 | UnlinkedNodes map[*C.xmlNode]bool
105 | XPathCtx *xpath.XPath
106 | Type NodeType
107 | InputLen int
108 |
109 | fragments []*DocumentFragment //save the pointers to free them when the doc is freed
110 | }
111 |
112 | //DefaultEncodingBytes allows us to conveniently pass the DefaultEncoding to various functions that
113 | // expect the encoding as a byte array.
114 | var DefaultEncodingBytes = []byte(DefaultEncoding)
115 |
116 | const initialFragments = 2
117 |
118 | //NewDocument wraps the pointer to the C struct.
119 | //
120 | // TODO: this should probably not be exported.
121 | func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *XmlDocument) {
122 | inEncoding = AppendCStringTerminator(inEncoding)
123 | outEncoding = AppendCStringTerminator(outEncoding)
124 |
125 | xmlNode := &XmlNode{Ptr: (*C.xmlNode)(p)}
126 | docPtr := (*C.xmlDoc)(p)
127 | doc = &XmlDocument{Ptr: docPtr, Node: xmlNode, InEncoding: inEncoding, OutEncoding: outEncoding, InputLen: contentLen}
128 | doc.UnlinkedNodes = make(map[*C.xmlNode]bool)
129 | doc.XPathCtx = xpath.NewXPath(p)
130 | doc.Type = xmlNode.NodeType()
131 | doc.fragments = make([]*DocumentFragment, 0, initialFragments)
132 | doc.Me = doc
133 | xmlNode.Document = doc
134 | //runtime.SetFinalizer(doc, (*XmlDocument).Free)
135 | return
136 | }
137 |
138 | // Parse creates an XmlDocument from some pre-existing content where the input encoding is known. Byte arrays created from
139 | // a Go string are utf-8 encoded (you can pass DefaultEncodingBytes in this scenario).
140 | //
141 | // If you want to build up a document programatically, calling CreateEmptyDocument and building it up using the xml.Node
142 | // interface is a better approach than building a string and calling Parse.
143 | //
144 | // If you have an XML file, then ReadFile will automatically determine the encoding according to the XML specification.
145 | func Parse(content, inEncoding, url []byte, options ParseOption, outEncoding []byte) (doc *XmlDocument, err error) {
146 | inEncoding = AppendCStringTerminator(inEncoding)
147 | outEncoding = AppendCStringTerminator(outEncoding)
148 |
149 | var docPtr *C.xmlDoc
150 | contentLen := len(content)
151 |
152 | if contentLen > 0 {
153 | var contentPtr, urlPtr, encodingPtr unsafe.Pointer
154 | contentPtr = unsafe.Pointer(&content[0])
155 |
156 | if len(url) > 0 {
157 | url = AppendCStringTerminator(url)
158 | urlPtr = unsafe.Pointer(&url[0])
159 | }
160 | if len(inEncoding) > 0 {
161 | encodingPtr = unsafe.Pointer(&inEncoding[0])
162 | }
163 |
164 | docPtr = C.xmlParse(contentPtr, C.int(contentLen), urlPtr, encodingPtr, C.int(options), nil, 0)
165 |
166 | if docPtr == nil {
167 | err = ERR_FAILED_TO_PARSE_XML
168 | } else {
169 | doc = NewDocument(unsafe.Pointer(docPtr), contentLen, inEncoding, outEncoding)
170 | }
171 |
172 | } else {
173 | doc = CreateEmptyDocument(inEncoding, outEncoding)
174 | }
175 | return
176 | }
177 |
178 | // ReadFile loads an XmlDocument from a filename. The encoding declared in the document will be
179 | // used as the input encoding. If no encoding is declared, the library will use the alogrithm
180 | // in the XML standard to determine if the document is encoded with UTF-8 or UTF-16.
181 | func ReadFile(filename string, options ParseOption) (doc *XmlDocument, err error) {
182 | // verify the file exists and can be read before we invoke C API
183 | _, err = os.Stat(filename)
184 | if err != nil {
185 | return
186 | }
187 |
188 | dataBytes := GetCString([]byte(filename))
189 | dataPtr := unsafe.Pointer(&dataBytes[0])
190 | var docPtr *C.xmlDoc
191 | docPtr = C.xmlReadFile((*C.char)(dataPtr), nil, C.int(options))
192 | if docPtr == nil {
193 | err = ERR_FAILED_TO_PARSE_XML
194 | } else {
195 | var encoding []byte
196 | // capture the detected input encoding
197 | p := docPtr.encoding
198 | if p != nil {
199 | encoding = []byte(C.GoString((*C.char)(unsafe.Pointer(p))))
200 | }
201 | doc = NewDocument(unsafe.Pointer(docPtr), 0, encoding, DefaultEncodingBytes)
202 | }
203 | return
204 | }
205 |
206 | // Create an empty XML document and return an XmlDocument. The root element, along with
207 | // any top-level comments or processing instructions, can be added by calling
208 | // AddChild() on the document itself.
209 | func CreateEmptyDocument(inEncoding, outEncoding []byte) (doc *XmlDocument) {
210 | help.LibxmlInitParser()
211 | docPtr := C.newEmptyXmlDoc()
212 | doc = NewDocument(unsafe.Pointer(docPtr), 0, inEncoding, outEncoding)
213 | return
214 | }
215 |
216 | // DocPtr provides access to the libxml2 structure underlying the document.
217 | func (document *XmlDocument) DocPtr() (ptr unsafe.Pointer) {
218 | ptr = unsafe.Pointer(document.Ptr)
219 | return
220 | }
221 |
222 | // DocType returns one of the node type constants, usually XML_DOCUMENT_NODE. This
223 | // may be of use if you are working with the C API.
224 | func (document *XmlDocument) DocType() (t NodeType) {
225 | t = document.Type
226 | return
227 | }
228 |
229 | // DocRef returns the embedded Document interface.
230 | func (document *XmlDocument) DocRef() (d Document) {
231 | d = document.Me
232 | return
233 | }
234 |
235 | // InputEncoding is the original encoding of the document.
236 | func (document *XmlDocument) InputEncoding() (encoding []byte) {
237 | encoding = document.InEncoding
238 | return
239 | }
240 |
241 | // OutputEncoding is the encoding that will be used when the document is written out.
242 | // This can be overridden by explicitly specifying an encoding as an argument to any of the
243 | // output functions.
244 | func (document *XmlDocument) OutputEncoding() (encoding []byte) {
245 | encoding = document.OutEncoding
246 | return
247 | }
248 |
249 | // Returns an XPath context that can be used to compile and evaluate XPath
250 | // expressions.
251 | //
252 | // In most cases, you should call the Search or EvalXPath functions instead of
253 | // handling the context directly.
254 | func (document *XmlDocument) DocXPathCtx() (ctx *xpath.XPath) {
255 | ctx = document.XPathCtx
256 | return
257 | }
258 |
259 | func (document *XmlDocument) AddUnlinkedNode(nodePtr unsafe.Pointer) {
260 | p := (*C.xmlNode)(nodePtr)
261 | document.UnlinkedNodes[p] = true
262 | }
263 |
264 | func (document *XmlDocument) RemoveUnlinkedNode(nodePtr unsafe.Pointer) bool {
265 | p := (*C.xmlNode)(nodePtr)
266 | if document.UnlinkedNodes[p] {
267 | delete(document.UnlinkedNodes, p)
268 | return true
269 | }
270 | return false
271 | }
272 |
273 | func (document *XmlDocument) BookkeepFragment(fragment *DocumentFragment) {
274 | document.fragments = append(document.fragments, fragment)
275 | }
276 |
277 | // Root returns the root node of the document. Newly created documents do not
278 | // have a root node until an element node is added a child of the document.
279 | //
280 | // Documents that have multiple root nodes are invalid adn the behaviour is
281 | // not well defined.
282 | func (document *XmlDocument) Root() (element *ElementNode) {
283 | nodePtr := C.xmlDocGetRootElement(document.Ptr)
284 | if nodePtr != nil {
285 | element = NewNode(unsafe.Pointer(nodePtr), document).(*ElementNode)
286 | }
287 | return
288 | }
289 |
290 | // Get an element node by the value of its ID attribute. By convention this attribute
291 | // is named id, but the actual name of the attribute is set by the document's DTD or schema.
292 | //
293 | // The value for an ID attribute is guaranteed to be unique within a valid document.
294 | func (document *XmlDocument) NodeById(id string) (element *ElementNode) {
295 | dataBytes := GetCString([]byte(id))
296 | dataPtr := unsafe.Pointer(&dataBytes[0])
297 | nodePtr := C.xmlGetID(document.Ptr, (*C.xmlChar)(dataPtr))
298 | if nodePtr != nil {
299 | idattr := NewNode(unsafe.Pointer(nodePtr), document).(*AttributeNode)
300 | element = idattr.Parent().(*ElementNode)
301 | }
302 | return
303 | }
304 |
305 | /*
306 | CreateElementNode creates an element node with the specified tag name. It can be
307 | added as a child of any other element, or as a child of the document itself.
308 |
309 | Use SetNamespace if the element node needs to be in a namespace.
310 |
311 | Note that valid documents have only one child element, referred to as the root node.
312 | */
313 | func (document *XmlDocument) CreateElementNode(tag string) (element *ElementNode) {
314 | tagBytes := GetCString([]byte(tag))
315 | tagPtr := unsafe.Pointer(&tagBytes[0])
316 | newNodePtr := C.xmlNewNode(nil, (*C.xmlChar)(tagPtr))
317 | newNode := NewNode(unsafe.Pointer(newNodePtr), document)
318 | element = newNode.(*ElementNode)
319 | return
320 | }
321 |
322 | //CreateTextNode creates a text node. It can be added as a child of an element.
323 | //
324 | // The data argument is XML-escaped and used as the content of the node.
325 | func (document *XmlDocument) CreateTextNode(data string) (text *TextNode) {
326 | dataBytes := GetCString([]byte(data))
327 | dataPtr := unsafe.Pointer(&dataBytes[0])
328 | nodePtr := C.xmlNewText((*C.xmlChar)(dataPtr))
329 | if nodePtr != nil {
330 | nodePtr.doc = (*_Ctype_struct__xmlDoc)(document.DocPtr())
331 | text = NewNode(unsafe.Pointer(nodePtr), document).(*TextNode)
332 | }
333 | return
334 | }
335 |
336 | //CreateCDataNode creates a CDATA node. CDATA nodes can
337 | // only be children of an element.
338 | //
339 | // The data argument will become the content of the newly created node.
340 | func (document *XmlDocument) CreateCDataNode(data string) (cdata *CDataNode) {
341 | dataLen := len(data)
342 | dataBytes := GetCString([]byte(data))
343 | dataPtr := unsafe.Pointer(&dataBytes[0])
344 | nodePtr := C.xmlNewCDataBlock(document.Ptr, (*C.xmlChar)(dataPtr), C.int(dataLen))
345 | if nodePtr != nil {
346 | cdata = NewNode(unsafe.Pointer(nodePtr), document).(*CDataNode)
347 | }
348 | return
349 | }
350 |
351 | //CreateCommentNode creates a comment node. Comment nodes can
352 | // be children of an element or of the document itself.
353 | //
354 | // The data argument will become the content of the comment.
355 | func (document *XmlDocument) CreateCommentNode(data string) (comment *CommentNode) {
356 | dataBytes := GetCString([]byte(data))
357 | dataPtr := unsafe.Pointer(&dataBytes[0])
358 | nodePtr := C.xmlNewComment((*C.xmlChar)(dataPtr))
359 | if nodePtr != nil {
360 | comment = NewNode(unsafe.Pointer(nodePtr), document).(*CommentNode)
361 | }
362 | return
363 | }
364 |
365 | //CreatePINode creates a processing instruction node with the specified name and data.
366 | // Processing instruction nodes can be children of an element or of the document itself.
367 | //
368 | // While it's common to use an attribute-like syntax for processing instructions, the data
369 | // is actually an arbitrary string that you will need to generate or parse yourself.
370 | func (document *XmlDocument) CreatePINode(name, data string) (pi *ProcessingInstructionNode) {
371 | nameBytes := GetCString([]byte(name))
372 | namePtr := unsafe.Pointer(&nameBytes[0])
373 | dataBytes := GetCString([]byte(data))
374 | dataPtr := unsafe.Pointer(&dataBytes[0])
375 | nodePtr := C.xmlNewDocPI(document.Ptr, (*C.xmlChar)(namePtr), (*C.xmlChar)(dataPtr))
376 | if nodePtr != nil {
377 | pi = NewNode(unsafe.Pointer(nodePtr), document).(*ProcessingInstructionNode)
378 | }
379 | return
380 | }
381 |
382 | func (document *XmlDocument) ParseFragment(input, url []byte, options ParseOption) (fragment *DocumentFragment, err error) {
383 | root := document.Root()
384 | if root == nil {
385 | fragment, err = parsefragment(document, nil, input, url, options)
386 | } else {
387 | fragment, err = parsefragment(document, root.XmlNode, input, url, options)
388 | }
389 | return
390 | }
391 |
392 | // Return the value of an NDATA entity declared in the DTD. If there is no such entity or
393 | // the value cannot be encoded as a valid URI, an empty string is returned.
394 | //
395 | // Note that this library assumes you already know the name of entity and does not
396 | // expose any way of getting the list of entities.
397 | func (document *XmlDocument) UnparsedEntityURI(name string) (val string) {
398 | if name == "" {
399 | return
400 | }
401 |
402 | nameBytes := GetCString([]byte(name))
403 | namePtr := unsafe.Pointer(&nameBytes[0])
404 | entity := C.xmlGetDocEntity(document.Ptr, (*C.xmlChar)(namePtr))
405 | if entity == nil {
406 | return
407 | }
408 |
409 | // unlike entity.content (which returns the raw, unprocessed string value of the entity),
410 | // it looks like entity.URI includes any escaping needed to treat the value as a URI.
411 | valPtr := unsafe.Pointer(entity.URI)
412 | if valPtr == nil {
413 | return
414 | }
415 |
416 | val = C.GoString((*C.char)(valPtr))
417 | return
418 | }
419 |
420 | // Free the C structures associated with this document.
421 | func (document *XmlDocument) Free() {
422 | //must free the xpath context before freeing the fragments or unlinked nodes
423 | //otherwise, it causes memory leaks and crashes when dealing with very large documents (a few MB)
424 | if document.XPathCtx != nil {
425 | document.XPathCtx.Free()
426 | document.XPathCtx = nil
427 | }
428 | //must clear the fragments first
429 | //because the nodes are put in the unlinked list
430 | if document.fragments != nil {
431 | for _, fragment := range document.fragments {
432 | fragment.Remove()
433 | }
434 | }
435 | document.fragments = nil
436 | var p *C.xmlNode
437 | if document.UnlinkedNodes != nil {
438 | for p, _ = range document.UnlinkedNodes {
439 | C.xmlFreeNode(p)
440 | }
441 | }
442 | document.UnlinkedNodes = nil
443 | if document.Ptr != nil {
444 | C.xmlFreeDoc(document.Ptr)
445 | document.Ptr = nil
446 | }
447 | }
448 |
449 | /* Uri returns the URI of the document - typically this is the filename if ReadFile was used to parse
450 | the document.
451 | */
452 | func (document *XmlDocument) Uri() (val string) {
453 | val = C.GoString((*C.char)(unsafe.Pointer(document.Ptr.URL)))
454 | return
455 | }
456 |
--------------------------------------------------------------------------------
/xml/node.go:
--------------------------------------------------------------------------------
1 | package xml
2 |
3 | //#include "helper.h"
4 | //#include
5 | import "C"
6 |
7 | import (
8 | "errors"
9 | . "github.com/moovweb/gokogiri/util"
10 | "github.com/moovweb/gokogiri/xpath"
11 | "strconv"
12 | "unsafe"
13 | )
14 |
15 | var (
16 | ERR_UNDEFINED_COERCE_PARAM = errors.New("unexpected parameter type in coerce")
17 | ERR_UNDEFINED_SET_CONTENT_PARAM = errors.New("unexpected parameter type in SetContent")
18 | ERR_UNDEFINED_SEARCH_PARAM = errors.New("unexpected parameter type in Search")
19 | ERR_CANNOT_MAKE_DUCMENT_AS_CHILD = errors.New("cannot add a document node as a child")
20 | ERR_CANNOT_COPY_TEXT_NODE_WHEN_ADD_CHILD = errors.New("cannot copy a text node when adding it")
21 | )
22 |
23 | // NodeType is an enumeration that indicates the type of XmlNode.
24 | type NodeType int
25 |
26 | const (
27 | XML_ELEMENT_NODE NodeType = iota + 1
28 | XML_ATTRIBUTE_NODE
29 | XML_TEXT_NODE
30 | XML_CDATA_SECTION_NODE
31 | XML_ENTITY_REF_NODE
32 | XML_ENTITY_NODE
33 | XML_PI_NODE
34 | XML_COMMENT_NODE
35 | XML_DOCUMENT_NODE
36 | XML_DOCUMENT_TYPE_NODE
37 | XML_DOCUMENT_FRAG_NODE
38 | XML_NOTATION_NODE
39 | XML_HTML_DOCUMENT_NODE
40 | XML_DTD_NODE
41 | XML_ELEMENT_DECL
42 | XML_ATTRIBUTE_DECL
43 | XML_ENTITY_DECL
44 | XML_NAMESPACE_DECL
45 | XML_XINCLUDE_START
46 | XML_XINCLUDE_END
47 | XML_DOCB_DOCUMENT_NODE
48 | )
49 |
50 | // SerializationOption is a set of flags used to control how a node is written out.
51 | type SerializationOption int
52 |
53 | const (
54 | XML_SAVE_FORMAT SerializationOption = 1 << iota // format save output
55 | XML_SAVE_NO_DECL //drop the xml declaration
56 | XML_SAVE_NO_EMPTY //no empty tags
57 | XML_SAVE_NO_XHTML //disable XHTML1 specific rules
58 | XML_SAVE_XHTML //force XHTML1 specific rules
59 | XML_SAVE_AS_XML //force XML serialization on HTML doc
60 | XML_SAVE_AS_HTML //force HTML serialization on XML doc
61 | XML_SAVE_WSNONSIG //format with non-significant whitespace
62 | )
63 |
64 | // NamespaceDeclaration represents a namespace declaration, providing both the prefix and the URI of the namespace.
65 | // It is returned by the DeclaredNamespaces function.
66 | type NamespaceDeclaration struct {
67 | Prefix string
68 | Uri string
69 | }
70 |
71 | type Node interface {
72 | NodePtr() unsafe.Pointer
73 | ResetNodePtr()
74 | MyDocument() Document
75 |
76 | IsValid() bool
77 |
78 | ParseFragment([]byte, []byte, ParseOption) (*DocumentFragment, error)
79 | LineNumber() int
80 |
81 | //
82 | NodeType() NodeType
83 | NextSibling() Node
84 | PreviousSibling() Node
85 |
86 | Parent() Node
87 | FirstChild() Node
88 | LastChild() Node
89 | CountChildren() int
90 | Attributes() map[string]*AttributeNode
91 |
92 | Coerce(interface{}) ([]Node, error)
93 |
94 | AddChild(interface{}) error
95 | AddPreviousSibling(interface{}) error
96 | AddNextSibling(interface{}) error
97 | InsertBefore(interface{}) error
98 | InsertAfter(interface{}) error
99 | InsertBegin(interface{}) error
100 | InsertEnd(interface{}) error
101 | SetInnerHtml(interface{}) error
102 | SetChildren(interface{}) error
103 | Replace(interface{}) error
104 | Wrap(string) error
105 |
106 | SetContent(interface{}) error
107 |
108 | Name() string
109 | SetName(string)
110 |
111 | Attr(string) string
112 | SetAttr(string, string) string
113 | SetNsAttr(string, string, string) string
114 | Attribute(string) *AttributeNode
115 |
116 | Path() string
117 |
118 | Duplicate(int) Node
119 | DuplicateTo(Document, int) Node
120 |
121 | Search(interface{}) ([]Node, error)
122 | SearchWithVariables(interface{}, xpath.VariableScope) ([]Node, error)
123 | EvalXPath(interface{}, xpath.VariableScope) (interface{}, error)
124 | EvalXPathAsBoolean(interface{}, xpath.VariableScope) bool
125 |
126 | Unlink()
127 | Remove()
128 | ResetChildren()
129 |
130 | SerializeWithFormat(SerializationOption, []byte, []byte) ([]byte, int)
131 | ToXml([]byte, []byte) ([]byte, int)
132 | ToUnformattedXml() string
133 | ToHtml([]byte, []byte) ([]byte, int)
134 | ToBuffer([]byte) []byte
135 | String() string
136 | Content() string
137 | InnerHtml() string
138 |
139 | RecursivelyRemoveNamespaces() error
140 | Namespace() string
141 | SetNamespace(string, string)
142 | DeclareNamespace(string, string)
143 | RemoveDefaultNamespace()
144 | DeclaredNamespaces() []NamespaceDeclaration
145 | }
146 |
147 | //run out of memory
148 | var ErrTooLarge = errors.New("Output buffer too large")
149 |
150 | //pre-allocate a buffer for serializing the document
151 | const initialOutputBufferSize = 10 //100K
152 |
153 | /*
154 | XmlNode implements the Node interface, and as such is the heart of the API.
155 | */
156 | type XmlNode struct {
157 | Ptr *C.xmlNode
158 | Document
159 | valid bool
160 | }
161 |
162 | type WriteBuffer struct {
163 | Node *XmlNode
164 | Buffer []byte
165 | Offset int
166 | }
167 |
168 | // NewNode takes a C pointer from the libxml2 library and returns a Node instance of
169 | // the appropriate type.
170 | func NewNode(nodePtr unsafe.Pointer, document Document) (node Node) {
171 | if nodePtr == nil {
172 | return nil
173 | }
174 | xmlNode := &XmlNode{
175 | Ptr: (*C.xmlNode)(nodePtr),
176 | Document: document,
177 | valid: true,
178 | }
179 | nodeType := NodeType(C.getNodeType((*C.xmlNode)(nodePtr)))
180 |
181 | switch nodeType {
182 | default:
183 | node = xmlNode
184 | case XML_ATTRIBUTE_NODE:
185 | node = &AttributeNode{XmlNode: xmlNode}
186 | case XML_ELEMENT_NODE:
187 | node = &ElementNode{XmlNode: xmlNode}
188 | case XML_CDATA_SECTION_NODE:
189 | node = &CDataNode{XmlNode: xmlNode}
190 | case XML_COMMENT_NODE:
191 | node = &CommentNode{XmlNode: xmlNode}
192 | case XML_PI_NODE:
193 | node = &ProcessingInstructionNode{XmlNode: xmlNode}
194 | case XML_TEXT_NODE:
195 | node = &TextNode{XmlNode: xmlNode}
196 | }
197 | return
198 | }
199 |
200 | func (xmlNode *XmlNode) coerce(data interface{}) (nodes []Node, err error) {
201 | switch t := data.(type) {
202 | default:
203 | err = ERR_UNDEFINED_COERCE_PARAM
204 | case []Node:
205 | nodes = t
206 | case *DocumentFragment:
207 | nodes = t.Children()
208 | case string:
209 | f, err := xmlNode.MyDocument().ParseFragment([]byte(t), nil, DefaultParseOption)
210 | if err == nil {
211 | nodes = f.Children()
212 | }
213 | case []byte:
214 | f, err := xmlNode.MyDocument().ParseFragment(t, nil, DefaultParseOption)
215 | if err == nil {
216 | nodes = f.Children()
217 | }
218 | }
219 | return
220 | }
221 |
222 | func (xmlNode *XmlNode) Coerce(data interface{}) (nodes []Node, err error) {
223 | return xmlNode.coerce(data)
224 | }
225 |
226 | // Add a node as a child of the current node.
227 | // Passing in a nodeset will add all the nodes as children of the current node.
228 | func (xmlNode *XmlNode) AddChild(data interface{}) (err error) {
229 | switch t := data.(type) {
230 | default:
231 | if nodes, err := xmlNode.coerce(data); err == nil {
232 | for _, node := range nodes {
233 | if err = xmlNode.addChild(node); err != nil {
234 | break
235 | }
236 | }
237 | }
238 | case *DocumentFragment:
239 | if nodes, err := xmlNode.coerce(data); err == nil {
240 | for _, node := range nodes {
241 | if err = xmlNode.addChild(node); err != nil {
242 | break
243 | }
244 | }
245 | }
246 | case Node:
247 | err = xmlNode.addChild(t)
248 | }
249 | return
250 | }
251 |
252 | // Insert a node immediately before this node in the document.
253 | // Passing in a nodeset will add all the nodes, in order.
254 | func (xmlNode *XmlNode) AddPreviousSibling(data interface{}) (err error) {
255 | switch t := data.(type) {
256 | default:
257 | if nodes, err := xmlNode.coerce(data); err == nil {
258 | for _, node := range nodes {
259 | if err = xmlNode.addPreviousSibling(node); err != nil {
260 | break
261 | }
262 | }
263 | }
264 | case *DocumentFragment:
265 | if nodes, err := xmlNode.coerce(data); err == nil {
266 | for _, node := range nodes {
267 | if err = xmlNode.addPreviousSibling(node); err != nil {
268 | break
269 | }
270 | }
271 | }
272 | case Node:
273 | err = xmlNode.addPreviousSibling(t)
274 | }
275 | return
276 | }
277 |
278 | // Insert a node immediately after this node in the document.
279 | // Passing in a nodeset will add all the nodes, in order.
280 | func (xmlNode *XmlNode) AddNextSibling(data interface{}) (err error) {
281 | switch t := data.(type) {
282 | default:
283 | if nodes, err := xmlNode.coerce(data); err == nil {
284 | for i := len(nodes) - 1; i >= 0; i-- {
285 | node := nodes[i]
286 | if err = xmlNode.addNextSibling(node); err != nil {
287 | break
288 | }
289 | }
290 | }
291 | case *DocumentFragment:
292 | if nodes, err := xmlNode.coerce(data); err == nil {
293 | for i := len(nodes) - 1; i >= 0; i-- {
294 | node := nodes[i]
295 | if err = xmlNode.addNextSibling(node); err != nil {
296 | break
297 | }
298 | }
299 | }
300 | case Node:
301 | err = xmlNode.addNextSibling(t)
302 | }
303 | return
304 | }
305 |
306 | func (xmlNode *XmlNode) ResetNodePtr() {
307 | xmlNode.Ptr = nil
308 | return
309 | }
310 |
311 | // Returns true if the node is valid. Nodes become
312 | // invalid when Remove() is called.
313 | func (xmlNode *XmlNode) IsValid() bool {
314 | return xmlNode.valid
315 | }
316 |
317 | // Return the document containing this node. Removed or unlinked
318 | // nodes still have a document associated with them.
319 | func (xmlNode *XmlNode) MyDocument() (document Document) {
320 | document = xmlNode.Document.DocRef()
321 | return
322 | }
323 |
324 | // NodePtr returns a pointer to the underlying C struct.
325 | func (xmlNode *XmlNode) NodePtr() (p unsafe.Pointer) {
326 | p = unsafe.Pointer(xmlNode.Ptr)
327 | return
328 | }
329 |
330 | func (xmlNode *XmlNode) NodeType() (nodeType NodeType) {
331 | nodeType = NodeType(C.getNodeType(xmlNode.Ptr))
332 | return
333 | }
334 |
335 | // Path returns an XPath expression that can be used to
336 | // select this node in the document.
337 | func (xmlNode *XmlNode) Path() (path string) {
338 | pathPtr := C.xmlGetNodePath(xmlNode.Ptr)
339 | if pathPtr != nil {
340 | p := (*C.char)(unsafe.Pointer(pathPtr))
341 | defer C.xmlFreeChars(p)
342 | path = C.GoString(p)
343 | }
344 | return
345 | }
346 |
347 | // NextSibling returns the next sibling (if any) of the current node.
348 | // It is often used when iterating over the children of a node.
349 | func (xmlNode *XmlNode) NextSibling() Node {
350 | siblingPtr := (*C.xmlNode)(xmlNode.Ptr.next)
351 | return NewNode(unsafe.Pointer(siblingPtr), xmlNode.Document)
352 | }
353 |
354 | // PreviousSibling returns the previous sibling (if any) of the current node.
355 | // It is often used when iterating over the children of a node in reverse.
356 | func (xmlNode *XmlNode) PreviousSibling() Node {
357 | siblingPtr := (*C.xmlNode)(xmlNode.Ptr.prev)
358 | return NewNode(unsafe.Pointer(siblingPtr), xmlNode.Document)
359 | }
360 |
361 | // CountChildren returns the number of child nodes.
362 | func (xmlNode *XmlNode) CountChildren() int {
363 | return int(C.xmlLsCountNode(xmlNode.Ptr))
364 | }
365 |
366 | func (xmlNode *XmlNode) FirstChild() Node {
367 | return NewNode(unsafe.Pointer(xmlNode.Ptr.children), xmlNode.Document)
368 | }
369 |
370 | func (xmlNode *XmlNode) LastChild() Node {
371 | return NewNode(unsafe.Pointer(xmlNode.Ptr.last), xmlNode.Document)
372 | }
373 |
374 | /*
375 | Parent returns the parent of the current node (or nil if there isn't one).
376 | This will always be an element or document node, as those are the only node types
377 | that can have children.
378 | */
379 | func (xmlNode *XmlNode) Parent() Node {
380 | if C.xmlNodePtrCheck(unsafe.Pointer(xmlNode.Ptr.parent)) == C.int(0) {
381 | return nil
382 | }
383 | return NewNode(unsafe.Pointer(xmlNode.Ptr.parent), xmlNode.Document)
384 | }
385 |
386 | func (xmlNode *XmlNode) ResetChildren() {
387 | var p unsafe.Pointer
388 | for childPtr := xmlNode.Ptr.children; childPtr != nil; {
389 | nextPtr := childPtr.next
390 | p = unsafe.Pointer(childPtr)
391 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(p))
392 | xmlNode.Document.AddUnlinkedNode(p)
393 | childPtr = nextPtr
394 | }
395 | }
396 |
397 | func (xmlNode *XmlNode) SetContent(content interface{}) (err error) {
398 | switch data := content.(type) {
399 | default:
400 | err = ERR_UNDEFINED_SET_CONTENT_PARAM
401 | case string:
402 | err = xmlNode.SetContent([]byte(data))
403 | case []byte:
404 | contentBytes := GetCString(data)
405 | contentPtr := unsafe.Pointer(&contentBytes[0])
406 | C.xmlSetContent(unsafe.Pointer(xmlNode), unsafe.Pointer(xmlNode.Ptr), contentPtr)
407 | }
408 | return
409 | }
410 |
411 | func (xmlNode *XmlNode) InsertBefore(data interface{}) (err error) {
412 | err = xmlNode.AddPreviousSibling(data)
413 | return
414 | }
415 |
416 | func (xmlNode *XmlNode) InsertAfter(data interface{}) (err error) {
417 | err = xmlNode.AddNextSibling(data)
418 | return
419 | }
420 |
421 | func (xmlNode *XmlNode) InsertBegin(data interface{}) (err error) {
422 | if parent := xmlNode.Parent(); parent != nil {
423 | if last := parent.LastChild(); last != nil {
424 | err = last.AddPreviousSibling(data)
425 | }
426 | }
427 | return
428 | }
429 |
430 | func (xmlNode *XmlNode) InsertEnd(data interface{}) (err error) {
431 | if parent := xmlNode.Parent(); parent != nil {
432 | if first := parent.FirstChild(); first != nil {
433 | err = first.AddPreviousSibling(data)
434 | }
435 | }
436 | return
437 | }
438 |
439 | func (xmlNode *XmlNode) SetChildren(data interface{}) (err error) {
440 | nodes, err := xmlNode.coerce(data)
441 | if err != nil {
442 | return
443 | }
444 | xmlNode.ResetChildren()
445 | err = xmlNode.AddChild(nodes)
446 | return nil
447 | }
448 |
449 | func (xmlNode *XmlNode) SetInnerHtml(data interface{}) (err error) {
450 | err = xmlNode.SetChildren(data)
451 | return
452 | }
453 |
454 | func (xmlNode *XmlNode) Replace(data interface{}) (err error) {
455 | err = xmlNode.AddPreviousSibling(data)
456 | if err != nil {
457 | return
458 | }
459 | xmlNode.Remove()
460 | return
461 | }
462 |
463 | func (xmlNode *XmlNode) Attributes() (attributes map[string]*AttributeNode) {
464 | attributes = make(map[string]*AttributeNode)
465 | for prop := xmlNode.Ptr.properties; prop != nil; prop = prop.next {
466 | if prop.name != nil {
467 | namePtr := unsafe.Pointer(prop.name)
468 | name := C.GoString((*C.char)(namePtr))
469 | attrPtr := unsafe.Pointer(prop)
470 | attributeNode := NewNode(attrPtr, xmlNode.Document)
471 | if attr, ok := attributeNode.(*AttributeNode); ok {
472 | attributes[name] = attr
473 | }
474 | }
475 | }
476 | return
477 | }
478 |
479 | // Return the attribute node, or nil if the attribute does not exist.
480 | func (xmlNode *XmlNode) Attribute(name string) (attribute *AttributeNode) {
481 | if xmlNode.NodeType() != XML_ELEMENT_NODE {
482 | return
483 | }
484 | nameBytes := GetCString([]byte(name))
485 | namePtr := unsafe.Pointer(&nameBytes[0])
486 | attrPtr := C.xmlHasNsProp(xmlNode.Ptr, (*C.xmlChar)(namePtr), nil)
487 | if attrPtr == nil {
488 | return
489 | } else {
490 | node := NewNode(unsafe.Pointer(attrPtr), xmlNode.Document)
491 | if node, ok := node.(*AttributeNode); ok {
492 | attribute = node
493 | }
494 | }
495 | return
496 | }
497 |
498 | // Attr returns the value of an attribute.
499 |
500 | // If you need to check for the existence of an attribute,
501 | // use Attribute.
502 | func (xmlNode *XmlNode) Attr(name string) (val string) {
503 | if xmlNode.NodeType() != XML_ELEMENT_NODE {
504 | return
505 | }
506 | nameBytes := GetCString([]byte(name))
507 | namePtr := unsafe.Pointer(&nameBytes[0])
508 | valPtr := C.xmlGetProp(xmlNode.Ptr, (*C.xmlChar)(namePtr))
509 | if valPtr == nil {
510 | return
511 | }
512 | p := unsafe.Pointer(valPtr)
513 | defer C.xmlFreeChars((*C.char)(p))
514 | val = C.GoString((*C.char)(p))
515 | return
516 | }
517 |
518 | // SetAttr sets the value of an attribute. If the attribute is in a namespace,
519 | // use SetNsAttr instead.
520 |
521 | // While this call accepts QNames for the name parameter, it does not check
522 | // their validity.
523 |
524 | // Attributes such as "xml:lang" or "xml:space" are not is a formal namespace
525 | // and should be set by calling SetAttr with the prefix as part of the name.
526 | func (xmlNode *XmlNode) SetAttr(name, value string) (val string) {
527 | val = value
528 | if xmlNode.NodeType() != XML_ELEMENT_NODE {
529 | return
530 | }
531 | nameBytes := GetCString([]byte(name))
532 | namePtr := unsafe.Pointer(&nameBytes[0])
533 |
534 | valueBytes := GetCString([]byte(value))
535 | valuePtr := unsafe.Pointer(&valueBytes[0])
536 |
537 | C.xmlSetProp(xmlNode.Ptr, (*C.xmlChar)(namePtr), (*C.xmlChar)(valuePtr))
538 | return
539 | }
540 |
541 | // SetNsAttr sets the value of a namespaced attribute.
542 |
543 | // Attributes such as "xml:lang" or "xml:space" are not is a formal namespace
544 | // and should be set by calling SetAttr with the xml prefix as part of the name.
545 |
546 | // The namespace should already be declared and in-scope when SetNsAttr is called.
547 | // This restriction will be lifted in a future version.
548 | func (xmlNode *XmlNode) SetNsAttr(href, name, value string) (val string) {
549 | val = value
550 | if xmlNode.NodeType() != XML_ELEMENT_NODE {
551 | return
552 | }
553 | nameBytes := GetCString([]byte(name))
554 | namePtr := unsafe.Pointer(&nameBytes[0])
555 |
556 | valueBytes := GetCString([]byte(value))
557 | valuePtr := unsafe.Pointer(&valueBytes[0])
558 |
559 | hrefBytes := GetCString([]byte(href))
560 | hrefPtr := unsafe.Pointer(&hrefBytes[0])
561 |
562 | ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr))
563 | if ns == nil {
564 | return
565 | }
566 |
567 | C.xmlSetNsProp(xmlNode.Ptr, ns, (*C.xmlChar)(namePtr), (*C.xmlChar)(valuePtr))
568 | return
569 | }
570 |
571 | // Search for nodes that match an XPath. This is the simplest way to look for nodes.
572 | func (xmlNode *XmlNode) Search(data interface{}) (result []Node, err error) {
573 | switch data := data.(type) {
574 | default:
575 | err = ERR_UNDEFINED_SEARCH_PARAM
576 | case string:
577 | if xpathExpr := xpath.Compile(data); xpathExpr != nil {
578 | defer xpathExpr.Free()
579 | result, err = xmlNode.Search(xpathExpr)
580 | } else {
581 | err = errors.New("cannot compile xpath: " + data)
582 | }
583 | case []byte:
584 | result, err = xmlNode.Search(string(data))
585 | case *xpath.Expression:
586 | xpathCtx := xmlNode.Document.DocXPathCtx()
587 | nodePtrs, err := xpathCtx.EvaluateAsNodeset(unsafe.Pointer(xmlNode.Ptr), data)
588 | if nodePtrs == nil || err != nil {
589 | return nil, err
590 | }
591 | for _, nodePtr := range nodePtrs {
592 | result = append(result, NewNode(nodePtr, xmlNode.Document))
593 | }
594 | }
595 | return
596 | }
597 |
598 | // As the Search function, but passing a VariableScope that can be used to reolve variable
599 | // names or registered function references in the XPath being evaluated.
600 | func (xmlNode *XmlNode) SearchWithVariables(data interface{}, v xpath.VariableScope) (result []Node, err error) {
601 | switch data := data.(type) {
602 | default:
603 | err = ERR_UNDEFINED_SEARCH_PARAM
604 | case string:
605 | if xpathExpr := xpath.Compile(data); xpathExpr != nil {
606 | defer xpathExpr.Free()
607 | result, err = xmlNode.SearchWithVariables(xpathExpr, v)
608 | } else {
609 | err = errors.New("cannot compile xpath: " + data)
610 | }
611 | case []byte:
612 | result, err = xmlNode.SearchWithVariables(string(data), v)
613 | case *xpath.Expression:
614 | xpathCtx := xmlNode.Document.DocXPathCtx()
615 | xpathCtx.SetResolver(v)
616 | nodePtrs, err := xpathCtx.EvaluateAsNodeset(unsafe.Pointer(xmlNode.Ptr), data)
617 | if nodePtrs == nil || err != nil {
618 | return nil, err
619 | }
620 | for _, nodePtr := range nodePtrs {
621 | result = append(result, NewNode(nodePtr, xmlNode.Document))
622 | }
623 | }
624 | return
625 | }
626 |
627 | // Evaluate an XPath and return a result of the appropriate type.
628 | // If a non-nil VariableScope is provided, any variables or functions present
629 | // in the xpath will be resolved.
630 |
631 | // If the result is a nodeset (or the empty nodeset), a nodeset will be returned.
632 |
633 | // If the result is a number, a float64 will be returned.
634 |
635 | // If the result is a boolean, a bool will be returned.
636 |
637 | // In any other cases, the result will be coerced to a string.
638 | func (xmlNode *XmlNode) EvalXPath(data interface{}, v xpath.VariableScope) (result interface{}, err error) {
639 | switch data := data.(type) {
640 | case string:
641 | if xpathExpr := xpath.Compile(data); xpathExpr != nil {
642 | defer xpathExpr.Free()
643 | result, err = xmlNode.EvalXPath(xpathExpr, v)
644 | } else {
645 | err = errors.New("cannot compile xpath: " + data)
646 | }
647 | case []byte:
648 | result, err = xmlNode.EvalXPath(string(data), v)
649 | case *xpath.Expression:
650 | xpathCtx := xmlNode.Document.DocXPathCtx()
651 | xpathCtx.SetResolver(v)
652 | err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data)
653 | if err != nil {
654 | return nil, err
655 | }
656 | rt := xpathCtx.ReturnType()
657 | switch rt {
658 | case xpath.XPATH_NODESET, xpath.XPATH_XSLT_TREE:
659 | nodePtrs, err := xpathCtx.ResultAsNodeset()
660 | if err != nil {
661 | return nil, err
662 | }
663 | var output []Node
664 | for _, nodePtr := range nodePtrs {
665 | output = append(output, NewNode(nodePtr, xmlNode.Document))
666 | }
667 | result = output
668 | case xpath.XPATH_NUMBER:
669 | result, _ = xpathCtx.ResultAsNumber()
670 | case xpath.XPATH_BOOLEAN:
671 | result, _ = xpathCtx.ResultAsBoolean()
672 | default:
673 | result, _ = xpathCtx.ResultAsString()
674 | }
675 | default:
676 | err = ERR_UNDEFINED_SEARCH_PARAM
677 | }
678 | return
679 | }
680 |
681 | // Evaluate an XPath and coerce the result to a boolean according to the
682 | // XPath rules. In the presence of an error, this function will return false
683 | // even if the expression cannot actually be evaluated.
684 |
685 | // In most cases you are better advised to call EvalXPath; this function is
686 | // intended for packages that implement XML standards and that are fully aware
687 | // of the consequences of suppressing a compilation error.
688 |
689 | // If a non-nil VariableScope is provided, any variables or registered functions present
690 | // in the xpath will be resolved.
691 | func (xmlNode *XmlNode) EvalXPathAsBoolean(data interface{}, v xpath.VariableScope) (result bool) {
692 | switch data := data.(type) {
693 | case string:
694 | if xpathExpr := xpath.Compile(data); xpathExpr != nil {
695 | defer xpathExpr.Free()
696 | result = xmlNode.EvalXPathAsBoolean(xpathExpr, v)
697 | } else {
698 | //err = errors.New("cannot compile xpath: " + data)
699 | }
700 | case []byte:
701 | result = xmlNode.EvalXPathAsBoolean(string(data), v)
702 | case *xpath.Expression:
703 | xpathCtx := xmlNode.Document.DocXPathCtx()
704 | xpathCtx.SetResolver(v)
705 | err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data)
706 | if err != nil {
707 | return false
708 | }
709 | result, _ = xpathCtx.ResultAsBoolean()
710 | default:
711 | //err = ERR_UNDEFINED_SEARCH_PARAM
712 | }
713 | return
714 | }
715 |
716 | // The local name of the node. Use Namespace() to get the namespace.
717 | func (xmlNode *XmlNode) Name() (name string) {
718 | if xmlNode.Ptr.name != nil {
719 | p := unsafe.Pointer(xmlNode.Ptr.name)
720 | name = C.GoString((*C.char)(p))
721 | }
722 | return
723 | }
724 |
725 | // The namespace of the node. This is the empty string if there
726 | // no associated namespace.
727 | func (xmlNode *XmlNode) Namespace() (href string) {
728 | if xmlNode.Ptr.ns != nil {
729 | p := unsafe.Pointer(xmlNode.Ptr.ns.href)
730 | href = C.GoString((*C.char)(p))
731 | }
732 | return
733 | }
734 |
735 | // Set the local name of the node. The namespace is set via SetNamespace().
736 | func (xmlNode *XmlNode) SetName(name string) {
737 | if len(name) > 0 {
738 | nameBytes := GetCString([]byte(name))
739 | namePtr := unsafe.Pointer(&nameBytes[0])
740 | C.xmlNodeSetName(xmlNode.Ptr, (*C.xmlChar)(namePtr))
741 | }
742 | }
743 |
744 | func (xmlNode *XmlNode) Duplicate(level int) Node {
745 | return xmlNode.DuplicateTo(xmlNode.Document, level)
746 | }
747 |
748 | func (xmlNode *XmlNode) DuplicateTo(doc Document, level int) (dup Node) {
749 | if xmlNode.valid {
750 | dupPtr := C.xmlDocCopyNode(xmlNode.Ptr, (*C.xmlDoc)(doc.DocPtr()), C.int(level))
751 | if dupPtr != nil {
752 | dup = NewNode(unsafe.Pointer(dupPtr), xmlNode.Document)
753 | }
754 | }
755 | return
756 | }
757 |
758 | func (xmlNode *XmlNode) serialize(format SerializationOption, encoding, outputBuffer []byte) ([]byte, int) {
759 | nodePtr := unsafe.Pointer(xmlNode.Ptr)
760 | var encodingPtr unsafe.Pointer
761 | if len(encoding) == 0 {
762 | encoding = xmlNode.Document.OutputEncoding()
763 | }
764 | if len(encoding) > 0 {
765 | encodingPtr = unsafe.Pointer(&(encoding[0]))
766 | } else {
767 | encodingPtr = nil
768 | }
769 |
770 | wbuffer := &WriteBuffer{Node: xmlNode, Buffer: outputBuffer}
771 | wbufferPtr := unsafe.Pointer(wbuffer)
772 |
773 | ret := int(C.xmlSaveNode(wbufferPtr, nodePtr, encodingPtr, C.int(format)))
774 | if ret < 0 {
775 | panic("output error in xml node serialization: " + strconv.Itoa(ret))
776 | return nil, 0
777 | }
778 |
779 | return wbuffer.Buffer, wbuffer.Offset
780 | }
781 |
782 | // SerializeWithFormat allows you to control the serialization flags passed to libxml.
783 | // In most cases ToXml() and ToHtml() provide sensible defaults and should be preferred.
784 |
785 | // The format parameter should be a set of SerializationOption constants or'd together.
786 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8.
787 | // If outputBuffer is nil, one will be created for you.
788 | func (xmlNode *XmlNode) SerializeWithFormat(format SerializationOption, encoding, outputBuffer []byte) ([]byte, int) {
789 | return xmlNode.serialize(format, encoding, outputBuffer)
790 | }
791 |
792 | // ToXml generates an indented XML document with an XML declaration.
793 | // It is not guaranteed to be well formed unless xmlNode is an element node,
794 | // or a document node with only one element child.
795 |
796 | // If you need finer control over the formatting, call SerializeWithFormat.
797 |
798 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8.
799 | // If outputBuffer is nil, one will be created for you.
800 | func (xmlNode *XmlNode) ToXml(encoding, outputBuffer []byte) ([]byte, int) {
801 | return xmlNode.serialize(XML_SAVE_AS_XML|XML_SAVE_FORMAT, encoding, outputBuffer)
802 | }
803 |
804 | // ToUnformattedXml generates an unformatted XML document without an XML declaration.
805 | // This is useful for conforming to various standards and for unit testing, although
806 | // the output is not guaranteed to be well formed unless xmlNode is an element node.
807 | func (xmlNode *XmlNode) ToUnformattedXml() string {
808 | var b []byte
809 | var size int
810 | b, size = xmlNode.serialize(XML_SAVE_AS_XML|XML_SAVE_NO_DECL, nil, nil)
811 | if b == nil {
812 | return ""
813 | }
814 | return string(b[:size])
815 | }
816 |
817 | // ToHtml generates an indented XML document that conforms to HTML 4.0 rules; meaning
818 | // that some elements may be unclosed or forced to use end tags even when empty.
819 |
820 | // If you want to output XHTML, call SerializeWithFormat and enable the XML_SAVE_XHTML
821 | // flag as part of the format.
822 |
823 | // If encoding is nil, the document's output encoding is used - this defaults to UTF-8.
824 | // If outputBuffer is nil, one will be created for you.
825 | func (xmlNode *XmlNode) ToHtml(encoding, outputBuffer []byte) ([]byte, int) {
826 | return xmlNode.serialize(XML_SAVE_AS_HTML|XML_SAVE_FORMAT, encoding, outputBuffer)
827 | }
828 |
829 | func (xmlNode *XmlNode) ToBuffer(outputBuffer []byte) []byte {
830 | var b []byte
831 | var size int
832 | if docType := xmlNode.Document.DocType(); docType == XML_HTML_DOCUMENT_NODE {
833 | b, size = xmlNode.ToHtml(nil, outputBuffer)
834 | } else {
835 | b, size = xmlNode.ToXml(nil, outputBuffer)
836 | }
837 | return b[:size]
838 | }
839 |
840 | func (xmlNode *XmlNode) String() string {
841 | b := xmlNode.ToBuffer(nil)
842 | if b == nil {
843 | return ""
844 | }
845 | return string(b)
846 | }
847 |
848 | func (xmlNode *XmlNode) Content() string {
849 | contentPtr := C.xmlNodeGetContent(xmlNode.Ptr)
850 | charPtr := (*C.char)(unsafe.Pointer(contentPtr))
851 | defer C.xmlFreeChars(charPtr)
852 | return C.GoString(charPtr)
853 | }
854 |
855 | func (xmlNode *XmlNode) InnerHtml() string {
856 | out := ""
857 |
858 | for child := xmlNode.FirstChild(); child != nil; child = child.NextSibling() {
859 | out += child.String()
860 | }
861 | return out
862 | }
863 |
864 | func (xmlNode *XmlNode) Unlink() {
865 | if int(C.xmlUnlinkNodeWithCheck(xmlNode.Ptr)) != 0 {
866 | xmlNode.Document.AddUnlinkedNode(unsafe.Pointer(xmlNode.Ptr))
867 | }
868 | }
869 |
870 | func (xmlNode *XmlNode) Remove() {
871 | if xmlNode.valid && unsafe.Pointer(xmlNode.Ptr) != xmlNode.Document.DocPtr() {
872 | xmlNode.Unlink()
873 | xmlNode.valid = false
874 | }
875 | }
876 |
877 | func (xmlNode *XmlNode) addChild(node Node) (err error) {
878 | nodeType := node.NodeType()
879 | if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE {
880 | err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD
881 | return
882 | }
883 | nodePtr := node.NodePtr()
884 | if xmlNode.NodePtr() == nodePtr {
885 | return
886 | }
887 | ret := xmlNode.isAccestor(nodePtr)
888 | if ret < 0 {
889 | return
890 | } else if ret == 0 {
891 | if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) {
892 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr))
893 | }
894 | C.xmlAddChild(xmlNode.Ptr, (*C.xmlNode)(nodePtr))
895 | } else if ret > 0 {
896 | node.Remove()
897 | }
898 |
899 | return
900 | }
901 |
902 | func (xmlNode *XmlNode) addPreviousSibling(node Node) (err error) {
903 | nodeType := node.NodeType()
904 | if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE {
905 | err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD
906 | return
907 | }
908 | nodePtr := node.NodePtr()
909 | if xmlNode.NodePtr() == nodePtr {
910 | return
911 | }
912 | ret := xmlNode.isAccestor(nodePtr)
913 | if ret < 0 {
914 | return
915 | } else if ret == 0 {
916 | if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) {
917 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr))
918 | }
919 | C.xmlAddPrevSibling(xmlNode.Ptr, (*C.xmlNode)(nodePtr))
920 | } else if ret > 0 {
921 | node.Remove()
922 | }
923 | return
924 | }
925 |
926 | func (xmlNode *XmlNode) addNextSibling(node Node) (err error) {
927 | nodeType := node.NodeType()
928 | if nodeType == XML_DOCUMENT_NODE || nodeType == XML_HTML_DOCUMENT_NODE {
929 | err = ERR_CANNOT_MAKE_DUCMENT_AS_CHILD
930 | return
931 | }
932 | nodePtr := node.NodePtr()
933 | if xmlNode.NodePtr() == nodePtr {
934 | return
935 | }
936 | ret := xmlNode.isAccestor(nodePtr)
937 | if ret < 0 {
938 | return
939 | } else if ret == 0 {
940 | if !xmlNode.Document.RemoveUnlinkedNode(nodePtr) {
941 | C.xmlUnlinkNodeWithCheck((*C.xmlNode)(nodePtr))
942 | }
943 | C.xmlAddNextSibling(xmlNode.Ptr, (*C.xmlNode)(nodePtr))
944 | } else if ret > 0 {
945 | node.Remove()
946 | }
947 | return
948 | }
949 |
950 | func (xmlNode *XmlNode) Wrap(data string) (err error) {
951 | newNodes, err := xmlNode.coerce(data)
952 | if err == nil && len(newNodes) > 0 {
953 | newParent := newNodes[0]
954 | xmlNode.addNextSibling(newParent)
955 | newParent.AddChild(xmlNode)
956 | }
957 | return
958 | }
959 |
960 | func (xmlNode *XmlNode) ParseFragment(input, url []byte, options ParseOption) (fragment *DocumentFragment, err error) {
961 | fragment, err = parsefragment(xmlNode.Document, xmlNode, input, url, options)
962 | return
963 | }
964 |
965 | //export xmlNodeWriteCallback
966 | func xmlNodeWriteCallback(wbufferObj unsafe.Pointer, data unsafe.Pointer, data_len C.int) {
967 | wbuffer := (*WriteBuffer)(wbufferObj)
968 | offset := wbuffer.Offset
969 |
970 | if offset > len(wbuffer.Buffer) {
971 | panic("fatal error in xmlNodeWriteCallback")
972 | }
973 |
974 | buffer := wbuffer.Buffer[:offset]
975 | dataLen := int(data_len)
976 |
977 | if dataLen > 0 {
978 | if len(buffer)+dataLen > cap(buffer) {
979 | newBuffer := grow(buffer, dataLen)
980 | wbuffer.Buffer = newBuffer
981 | }
982 | destBufPtr := unsafe.Pointer(&(wbuffer.Buffer[offset]))
983 | C.memcpy(destBufPtr, data, C.size_t(dataLen))
984 | wbuffer.Offset += dataLen
985 | }
986 | }
987 |
988 | //export xmlUnlinkNodeCallback
989 | func xmlUnlinkNodeCallback(nodePtr unsafe.Pointer, gonodePtr unsafe.Pointer) {
990 | xmlNode := (*XmlNode)(gonodePtr)
991 | xmlNode.Document.AddUnlinkedNode(nodePtr)
992 | }
993 |
994 | func grow(buffer []byte, n int) (newBuffer []byte) {
995 | newBuffer = makeSlice(2*cap(buffer) + n)
996 | copy(newBuffer, buffer)
997 | return
998 | }
999 |
1000 | func makeSlice(n int) []byte {
1001 | // If the make fails, give a known error.
1002 | defer func() {
1003 | if recover() != nil {
1004 | panic(ErrTooLarge)
1005 | }
1006 | }()
1007 | return make([]byte, n)
1008 | }
1009 |
1010 | func (xmlNode *XmlNode) isAccestor(nodePtr unsafe.Pointer) int {
1011 | parentPtr := xmlNode.Ptr.parent
1012 |
1013 | if C.xmlNodePtrCheck(unsafe.Pointer(parentPtr)) == C.int(0) {
1014 | return -1
1015 | }
1016 | for ; parentPtr != nil; parentPtr = parentPtr.parent {
1017 | if C.xmlNodePtrCheck(unsafe.Pointer(parentPtr)) == C.int(0) {
1018 | return -1
1019 | }
1020 | p := unsafe.Pointer(parentPtr)
1021 | if p == nodePtr {
1022 | return 1
1023 | }
1024 | }
1025 | return 0
1026 | }
1027 |
1028 | func (xmlNode *XmlNode) RecursivelyRemoveNamespaces() (err error) {
1029 | nodePtr := xmlNode.Ptr
1030 | C.xmlSetNs(nodePtr, nil)
1031 |
1032 | for child := xmlNode.FirstChild(); child != nil; {
1033 | child.RecursivelyRemoveNamespaces()
1034 | child = child.NextSibling()
1035 | }
1036 |
1037 | nodeType := xmlNode.NodeType()
1038 |
1039 | if ((nodeType == XML_ELEMENT_NODE) ||
1040 | (nodeType == XML_XINCLUDE_START) ||
1041 | (nodeType == XML_XINCLUDE_END)) &&
1042 | (nodePtr.nsDef != nil) {
1043 | C.xmlFreeNsList((*C.xmlNs)(nodePtr.nsDef))
1044 | nodePtr.nsDef = nil
1045 | }
1046 |
1047 | if nodeType == XML_ELEMENT_NODE && nodePtr.properties != nil {
1048 | property := nodePtr.properties
1049 | for property != nil {
1050 | if property.ns != nil {
1051 | property.ns = nil
1052 | }
1053 | property = property.next
1054 | }
1055 | }
1056 | return
1057 | }
1058 |
1059 | func (xmlNode *XmlNode) RemoveDefaultNamespace() {
1060 | nodePtr := xmlNode.Ptr
1061 | C.xmlRemoveDefaultNamespace(nodePtr)
1062 | }
1063 |
1064 | // Returns a list of all the namespace declarations that exist on this node.
1065 |
1066 | // You can add a namespace declaration by calling DeclareNamespace.
1067 | // Calling SetNamespace will automatically add a declaration if required.
1068 |
1069 | // Calling SetNsAttr does *not* automatically create a declaration. This will
1070 | // fixed in a future version.
1071 | func (xmlNode *XmlNode) DeclaredNamespaces() (result []NamespaceDeclaration) {
1072 | nodePtr := xmlNode.Ptr
1073 | for ns := nodePtr.nsDef; ns != nil; ns = (*C.xmlNs)(ns.next) {
1074 | prefixPtr := unsafe.Pointer(ns.prefix)
1075 | prefix := C.GoString((*C.char)(prefixPtr))
1076 | hrefPtr := unsafe.Pointer(ns.href)
1077 | uri := C.GoString((*C.char)(hrefPtr))
1078 | decl := NamespaceDeclaration{prefix, uri}
1079 | result = append(result, decl)
1080 | }
1081 | return
1082 | }
1083 |
1084 | // Add a namespace declaration to an element.
1085 |
1086 | // This is typically done on the root element or node high up in the tree
1087 | // to avoid duplication. The declaration is not created if the namespace
1088 | // is already declared in this scope with the same prefix.
1089 | func (xmlNode *XmlNode) DeclareNamespace(prefix, href string) {
1090 | //can only declare namespaces on elements
1091 | if xmlNode.NodeType() != XML_ELEMENT_NODE {
1092 | return
1093 | }
1094 | hrefBytes := GetCString([]byte(href))
1095 | hrefPtr := unsafe.Pointer(&hrefBytes[0])
1096 |
1097 | //if the namespace is already declared using this prefix, just return
1098 | _ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr))
1099 | if _ns != nil {
1100 | _prefixPtr := unsafe.Pointer(_ns.prefix)
1101 | _prefix := C.GoString((*C.char)(_prefixPtr))
1102 | if prefix == _prefix {
1103 | return
1104 | }
1105 | }
1106 |
1107 | prefixBytes := GetCString([]byte(prefix))
1108 | prefixPtr := unsafe.Pointer(&prefixBytes[0])
1109 | if prefix == "" {
1110 | prefixPtr = nil
1111 | }
1112 |
1113 | //this adds the namespace declaration to the node
1114 | _ = C.xmlNewNs(xmlNode.Ptr, (*C.xmlChar)(hrefPtr), (*C.xmlChar)(prefixPtr))
1115 | }
1116 |
1117 | // Set the namespace of an element.
1118 | func (xmlNode *XmlNode) SetNamespace(prefix, href string) {
1119 | if xmlNode.NodeType() != XML_ELEMENT_NODE {
1120 | return
1121 | }
1122 |
1123 | prefixBytes := GetCString([]byte(prefix))
1124 | prefixPtr := unsafe.Pointer(&prefixBytes[0])
1125 | if prefix == "" {
1126 | prefixPtr = nil
1127 | }
1128 |
1129 | hrefBytes := GetCString([]byte(href))
1130 | hrefPtr := unsafe.Pointer(&hrefBytes[0])
1131 |
1132 | // use the existing namespace declaration if there is one
1133 | _ns := C.xmlSearchNsByHref((*C.xmlDoc)(xmlNode.Document.DocPtr()), xmlNode.Ptr, (*C.xmlChar)(hrefPtr))
1134 | if _ns != nil {
1135 | _prefixPtr := unsafe.Pointer(_ns.prefix)
1136 | _prefix := C.GoString((*C.char)(_prefixPtr))
1137 | if prefix == _prefix {
1138 | C.xmlSetNs(xmlNode.Ptr, _ns)
1139 | return
1140 | }
1141 | }
1142 |
1143 | ns := C.xmlNewNs(xmlNode.Ptr, (*C.xmlChar)(hrefPtr), (*C.xmlChar)(prefixPtr))
1144 | C.xmlSetNs(xmlNode.Ptr, ns)
1145 | }
1146 |
1147 | // Returns the line number on which the node appears, or a -1 if the
1148 | // line number cannot be determined.
1149 | func (xmlNode *XmlNode) LineNumber() int {
1150 | return int(C.xmlGetLineNo(xmlNode.Ptr))
1151 | }
1152 |
--------------------------------------------------------------------------------