├── .github
├── CONTRIBUTING.md
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── config.yml
│ ├── feature_request.md
│ └── others.md
├── auto-assign-pr.yml
├── dependabot.yml
└── workflows
│ ├── assign-issue.yml
│ ├── assign-pr.yml
│ ├── ci.yml
│ ├── codeql.yml
│ ├── lint.yml
│ └── stale.yml
├── .gitignore
├── .golangci.yml
├── LICENSE
├── README.md
├── clib
├── clib.go
├── interface.go
├── link_dynamic.go
└── link_static.go
├── dom
├── c14n_test.go
├── document_test.go
├── dom.go
├── interface.go
├── node.go
├── node_attr.go
├── node_document.go
├── node_element.go
├── node_namespace.go
├── node_test.go
├── node_text.go
├── node_wrap.go
└── serialize.go
├── go.mod
├── go.sum
├── html.go
├── html_test.go
├── internal
├── cmd
│ └── genwrapnode
│ │ └── genwrapnode.go
├── debug
│ ├── debug_off.go
│ └── debug_on.go
└── option
│ ├── interface.go
│ └── option.go
├── libxml2.go
├── libxml2_bench_test.go
├── libxml2_example_test.go
├── parser.go
├── parser
├── interface.go
└── parser.go
├── parser_test.go
├── test
├── euc-jp.xml
├── feed.atom
├── go_libxml2_local.xml
├── go_libxml2_remote.xml
├── link
│ └── test.go
├── schema
│ ├── lib
│ │ └── types
│ │ │ ├── cksum.xsd
│ │ │ ├── net.xsd
│ │ │ ├── std.xsd
│ │ │ └── unix.xsd
│ └── projects
│ │ ├── go_libxml2_local.xsd
│ │ └── go_libxml2_remote.xsd
├── sjis.xml
├── utf-8.xml
└── xmldsig-core-schema.xsd
├── types
├── interface.go
└── types.go
├── xml_test.go
├── xpath
├── interface.go
├── iterator.go
├── util.go
├── xpath.go
└── xpath_test.go
├── xsd
├── interface.go
├── option.go
└── xsd.go
└── xsd_test.go
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # CONTRIBUTING
2 |
3 | ❤❤❤🎉 Thank you for considering to contribute to this project! 🎉❤❤❤
4 |
5 | The following is a set of guidelines that we ask you to follow when you contribute to this project.
6 |
7 | # Index
8 |
9 | * [tl;dr](#tldr)
10 | * [Please Be Nice](#please-be-nice)
11 | * [Please Use Correct Medium (GitHub Issues / Discussions)](#please-use-correct-medium-github-issues--discussions)
12 | * [Please Include (Pseudo)code for Any Technical Issues](#please-include-pseudocode-for-any-technical-issues)
13 | * [Reviewer/Reviewee Guidelines](#reviewer-reviewee-guidelines)
14 | * [Brown M&M Clause](#brown-mm-clause)
15 | * [Pull Requests](#pull-requests)
16 | * [Branches](#branches)
17 | * [Generated Files](#generated-files)
18 | * [Test Cases](#test-cases)
19 |
20 | # tl;dr
21 |
22 | * 📕 Please read this Guideline in its entirety once, if at least to check the headings.
23 | * 🙋 Please be nice, and please be aware that we are not providing this software as a hobby.
24 | * 💬 Open-ended questions and inquiries go to [Discussions](https://github.com/lestrrat-go/libxml2/discussions).
25 | * 🖥️ Actionable, specific technical questions go to [Issues](https://github.com/lestrrat-go/libxml2/issues).
26 | * 📝 Please always include (pseudo)code for any technical questions/issues.
27 | * 🔒 Issues, PR, and other posts may be closed or not addressed if you do not follow these guidelines
28 |
29 | # Please Be Nice
30 |
31 | [Main source; if wording differ, the main source supersedes this copy](https://github.com/lestrrat-go/contributions/blob/main/Contributions.md)
32 |
33 | Please be nice when you contact us.
34 |
35 | We are very glad that you find this project useful, and we intend to provide software that help you.
36 |
37 | You do not have to thank us, but please bare in mind that this is an opensource project that is provided **as-is**.
38 | This means that we are **NOT** obligated to support you, work for you, do your homework/research for you,
39 | or otherwise heed to you needs.
40 |
41 | We do not owe you one bit of code, or a fix, even if it's a critical one.
42 |
43 | We write software because we're curious, we fix bugs because we have integrity.
44 |
45 | But we do not owe you anything. Please do not order us to work for you.
46 | We are not your support staff, and we are not here to do your research.
47 | We are willing to help, but only as long as you are being nice to us.
48 |
49 | # Please Use Correct Medium (GitHub Issues / Discussions)
50 |
51 | [Main source; this is a specialized version copied from the main source](https://github.com/lestrrat-go/contributions/blob/main/Contributions.md)
52 |
53 | This project uses [GitHub Issues](https://github.com/lestrrat-go/libxml2/issues) to deal with technical issues
54 | including bug reports, proposing new API, and otherwise issues that are directly actionable.
55 |
56 | Inquiries, questions about the usage, maintenance policies, and other open-ended
57 | questions/discussions should be posted to [GitHub Discussions](https://github.com/lestrrat-go/libxml2/discussions).
58 |
59 | # Please Include (Pseudo)code for Any Technical Issues
60 |
61 | [Main source; if wording differ, the main source supersedes this copy](https://github.com/lestrrat-go/contributions/blob/main/Contributions.md)
62 |
63 | Your report should contain clear, concise description of the issue that you are facing.
64 | However, at the same time please always include (pseudo)code in report.
65 |
66 | English may not be your forte, but we all should speak the common language of code.
67 | Rather than trying to write an entire essay or beat around the bush, which will
68 | more than likely cost both you and the maintainers extra roundtrips to communicate,
69 | please use code to describe _exactly_ what you are trying to achieve.
70 |
71 | Good reports should contain (in order of preference):
72 |
73 | 1. Complete Go-style test code.
74 | 1. Code snippet that clearly shows the intent of your code.
75 | 1. Pseudocode that shows how you would want the API to work.
76 |
77 | As we are dealing with code, ultimately there is
78 | no better way to convey what you are trying to do than to provide
79 | your code.
80 |
81 | Please help us help you by providing us with a reproducible code.
82 |
83 | # Reviewer/Reviewee Guidelines
84 |
85 | If you are curious about what what gets reviewed and why some decisions
86 | are made the way they are, please read [this document](https://github.com/lestrrat-go/contributions/blob/main/Reviews.md) to get some insight into the thinking process.
87 |
88 | # Brown M&M Clause
89 |
90 | If you came here from an issue/PR template, please make sure to delete
91 | the section on "Contribution Guidelines" from the template.
92 |
93 | Failure to do so may result in the maintainers assuming that you have
94 | not fully read the guidelines.
95 |
96 | [(Reference)](https://www.insider.com/van-halen-brown-m-ms-contract-2016-9)
97 |
98 | # Pull Requests
99 |
100 | ## Branches
101 |
102 | ### `vXXX` branches
103 |
104 | Stable releases, such as `v1`, `v2`, etc. Please do not work against these branches.
105 | Use the `develop/vXXX` branches instead.
106 |
107 | ### `develop/vXXX` branches
108 |
109 | Development occurs on these branches. If you are wishing to make changes against
110 | `v2`, work on `develop/v2` branch.
111 |
112 | When you make a PR, fork this branch, make your changes and create a PR against
113 | these development branches.
114 |
115 | ```mermaid
116 | sequenceDiagram
117 | autonumber
118 | participant v1/v2/..
119 | participant develop/v1/v2/..
120 | participant feature_branch
121 | develop/v1/v2/..->>feature_branch: Fork development branch to your feature branch
122 | Note over feature_branch: Work on your feature
123 | feature_branch->>develop/v1/v2/..: File a PR against the development branch
124 | develop/v1/v2/..->>v1/v2/..: Merge changes
125 | ```
126 |
127 | ## Generated Files
128 |
129 | All files with file names ending in `_gen.go` are generated by a tool. These files
130 | should not be modified directly. Instead, find out the tool that is generating the
131 | file by inspecting the file. Usually the tool that generated the file is listed
132 | in the comment section at the top of the file.
133 |
134 | Usually these files are generated based on a rule file (such as a YAML file).
135 | When you craft a pull request, you should include both changes to the rule file(s)
136 | and the generated file(s). The CI will run `go generate` and make sure that
137 | there are no extra `diff`s that have not been committed.
138 |
139 | ## Test Cases
140 |
141 | In general any code change must be accompanied with test case.
142 |
143 | It is obviously very important to test the functionality. But adding test cases
144 | also gives you the opportunity to check for yourself how the new code should/can
145 | be used in practice. Test cases also act as a great way to communicate any
146 | assumptions or requirements that your code needs in order to function properly.
147 |
148 |
149 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github:
4 | - lestrrat
5 | patreon: # Replace with a single Patreon username
6 | open_collective: # Replace with a single Open Collective username
7 | ko_fi: # Replace with a single Ko-fi username
8 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
9 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
10 | liberapay: # Replace with a single Liberapay username
11 | issuehunt: # Replace with a single IssueHunt username
12 | otechie: # Replace with a single Otechie username
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Contribution Guidelines**
11 |
12 | Before filing an issue, please read the contents of [CONTRIBUTING.md](https://github.com/lestrrat-go/libxml2/blob/v2/.github/CONTRIBUTING.md), and follow its instructions.
13 |
14 | **Describe the bug**
15 |
16 | A clear and concise description of what the bug is.
17 |
18 | Please attach the output of `go version`
19 |
20 | **To Reproduce / Expected behavior**
21 | Please attach a standalone Go test code that shows the problem, and what you expected to happen.
22 |
23 | If you are asking for an API change or some such which inhibits you from providing a working code, please do your best to come up with a near-valid code.
24 |
25 | **Additional context**
26 | Add any other context or screenshots about the feature request here. Please delete this section if unnecessary.
27 |
28 | **Sponsors**
29 | Are you sponsoring the authors? If so, let us know. Otherwise, please delete this section.
30 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Contribution Guidelines**
11 |
12 | Before filing an issue, please read the contents of [CONTRIBUTING.md](https://github.com/lestrrat-go/libxml2/blob/v2/.github/CONTRIBUTING.md), and follow its instructions.
13 |
14 | **Abstract**
15 | Please describe concisely what you want to accomplish, including prerequisite information. Please remember that if _you_ cannot articulate the problem, we cannot guess what you are thinking.
16 |
17 | **Describe the proposed solution/change**
18 | Please attach a standalone Go test code that shows the problem, and what you expected to happen.
19 |
20 | If it's a behavior change, please include a failing (or would-be failing) test case. If it's a structural or an API change, we understand that you cannot create a complete compiling code, but please do your best to produce a a near-valid code that shows exactly what you want
21 |
22 | **Analysis**
23 | Please describe alternative solutions that you have considered, and pros/cons between them.
24 |
25 | **Additional context**
26 | Add any other context or screenshots about the feature request here. Please delete this section if unnecessary.
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/others.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 'Other Issues'
3 | about: 'Other types of issues'
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Contribution Guidelines**
11 |
12 | Before filing an issue, please read the contents of [CONTRIBUTING.md](https://github.com/lestrrat-go/libxml2/blob/v2/.github/CONTRIBUTING.md), and follow its instructions.
13 |
--------------------------------------------------------------------------------
/.github/auto-assign-pr.yml:
--------------------------------------------------------------------------------
1 | addReviewers: true
2 | addAssignees: false
3 | reviewers:
4 | - lestrrat
5 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "gomod" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "daily"
12 | target-branch: "master"
13 | labels:
14 | - "go"
15 | - "dependencies"
16 | - "dependabot"
17 | - package-ecosystem: "gomod" # See documentation for possible values
18 | directory: "/" # Location of package manifests
19 | schedule:
20 | interval: "daily"
21 | target-branch: "develop/v1"
22 | labels:
23 | - "go"
24 | - "dependencies"
25 | - "dependabot"
26 | - package-ecosystem: "github-actions"
27 | directory: "/"
28 | schedule:
29 | interval: "daily"
30 | target-branch: "develop/v2"
31 | - package-ecosystem: "github-actions"
32 | directory: "/"
33 | schedule:
34 | interval: "daily"
35 | target-branch: "develop/v1"
36 |
37 |
--------------------------------------------------------------------------------
/.github/workflows/assign-issue.yml:
--------------------------------------------------------------------------------
1 | name: Assign Issue
2 | on:
3 | issues:
4 | types: [opened]
5 |
6 | jobs:
7 | auto-assign:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: 'Auto-assign issue'
11 | uses: pozil/auto-assign-issue@v1
12 | with:
13 | assignees: lestrrat
14 |
--------------------------------------------------------------------------------
/.github/workflows/assign-pr.yml:
--------------------------------------------------------------------------------
1 | name: 'Auto Assign'
2 | on:
3 | pull_request:
4 | types: [opened, ready_for_review]
5 |
6 | jobs:
7 | add-reviews:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: kentaro-m/auto-assign-action@v1.2.5
11 | with:
12 | configuration-path: .github/auto-assign-pr.yml
13 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 | on:
3 | push:
4 | branches:
5 | - master
6 | pull_request:
7 | branches:
8 | - master
9 |
10 | jobs:
11 | ubuntu:
12 | runs-on: ubuntu-latest
13 | strategy:
14 | matrix:
15 | go: [ '1.21' ]
16 | link:
17 | - type: dynamic
18 | goflags: ""
19 | - type: static
20 | # On Ubuntu, libxml2 is compiled with GCC and is linked to libicu, which introduces a
21 | # stealth dependency on libstdc++ at link-time
22 | goflags: "-ldflags '-extldflags -lstdc++' -tags 'osusergo netgo static_build'"
23 | name: "Test [ Go ${{ matrix.go }}, ${{ matrix.link.type }} linking ]"
24 | steps:
25 | - name: Checkout repository
26 | uses: actions/checkout@v4
27 | - name: Cache Go modules
28 | uses: actions/cache@v3
29 | with:
30 | path: |
31 | ~/go/pkg/mod
32 | ~/.cache/go-build
33 | ~/.cache/bazel
34 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
35 | restore-keys: |
36 | ${{ runner.os }}-go-
37 | - name: Install Go stable version
38 | if: matrix.go != 'tip'
39 | uses: actions/setup-go@v4
40 | with:
41 | go-version: ${{ matrix.go }}
42 | check-latest: true
43 | - name: Run Go tests
44 | run: go test -race ${{ matrix.link.goflags }} ./...
45 | - name: Test linking capability
46 | run: |
47 | go build -o linktest ${{ matrix.link.goflags }} ./test/link
48 | file linktest | grep '${{ matrix.link.type }}ally linked'
49 | archlinux:
50 | runs-on: ubuntu-latest
51 | strategy:
52 | matrix:
53 | go: [ '1.21' ]
54 | container:
55 | image: archlinux:latest
56 | name: "Test [ Arch Linux + Go ${{ matrix.go }} ]"
57 | steps:
58 | - uses: actions/checkout@v3
59 | - name: Set Up Arch Linux
60 | run: |
61 | pacman -Syy --noconfirm
62 | pacman -Syu --noconfirm
63 | pacman -S --noconfirm base-devel
64 | pacman -S --noconfirm libxml2=2.12.7
65 | - name: Set up Go
66 | uses: actions/setup-go@v4
67 | with:
68 | go-version: ${{ matrix.go }}
69 | - name: Test
70 | run: go test ./...
71 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ "master" ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "master" ]
20 | schedule:
21 | - cron: '40 13 * * 5'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ubuntu-latest
27 | permissions:
28 | actions: read
29 | contents: read
30 | security-events: write
31 |
32 | strategy:
33 | fail-fast: false
34 | matrix:
35 | language: [ 'go' ]
36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 | # Use only 'java' to analyze code written in Java, Kotlin or both
38 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
39 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
40 |
41 | steps:
42 | - name: Checkout repository
43 | uses: actions/checkout@v4
44 |
45 | # Initializes the CodeQL tools for scanning.
46 | - name: Initialize CodeQL
47 | uses: github/codeql-action/init@v2
48 | with:
49 | languages: ${{ matrix.language }}
50 | # If you wish to specify custom queries, you can do so here or in a config file.
51 | # By default, queries listed here will override any specified in a config file.
52 | # Prefix the list here with "+" to use these queries and those in the config file.
53 |
54 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
55 | # queries: security-extended,security-and-quality
56 |
57 |
58 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
59 | # If this step fails, then you should remove it and run the build manually (see below)
60 | - name: Autobuild
61 | uses: github/codeql-action/autobuild@v2
62 |
63 | # ℹ️ Command-line programs to run using the OS shell.
64 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
65 |
66 | # If the Autobuild fails above, remove it and uncomment the following three lines.
67 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
68 |
69 | # - run: |
70 | # echo "Run, Build Application using script"
71 | # ./location_of_script_within_repo/buildscript.sh
72 |
73 | - name: Perform CodeQL Analysis
74 | uses: github/codeql-action/analyze@v2
75 | with:
76 | category: "/language:${{matrix.language}}"
77 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 | on:
3 | push: {}
4 | pull_request:
5 | branches:
6 | - master
7 | jobs:
8 | golangci:
9 | name: lint
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 | - uses: actions/setup-go@v4
14 | with:
15 | go-version: '1.20'
16 | check-latest: true
17 | - uses: golangci/golangci-lint-action@v3
18 | with:
19 | version: v1.54.2
20 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: 'Close stale issues and PRs'
2 | on:
3 | schedule:
4 | - cron: '30 1 * * *'
5 |
6 | jobs:
7 | stale:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: actions/stale@v8
11 | with:
12 | stale-issue-message: 'This issue is stale because it has been open 14 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
13 | stale-pr-message: 'This PR is stale because it has been open 14 days with no activity. Remove stale label or comment or this will be closed in 14 days.'
14 | close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity. This does not mean your issue is rejected, but rather it is done to hide it from the view of the maintains for the time being. Feel free to reopen if you have new comments'
15 | close-pr-message: 'This PR was closed because it has been stalled for 14 days with no activity. This does not mean your PR is rejected, but rather it is done to hide it from the view of the maintainers for the time being. Feel free to reopen if you have new comments or chnages that you would like to include. '
16 | days-before-issue-stale: 14
17 | days-before-pr-stale: 14
18 | days-before-issue-close: 7
19 | days-before-pr-close: 7
20 | exempt-issue-labels: long-term
21 | exempt-pr-labels: long-term
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
2 | *.o
3 | *.a
4 | *.so
5 |
6 | # Folders
7 | _obj
8 | _test
9 |
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 |
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 |
20 | _testmain.go
21 |
22 | *.exe
23 | *.test
24 | *.prof
25 |
26 | .idea
27 |
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | run:
2 |
3 | linters-settings:
4 | govet:
5 | enable-all: true
6 | disable:
7 | - shadow
8 | - fieldalignment
9 |
10 | linters:
11 | enable-all: true
12 | disable:
13 | - cyclop
14 | - deadcode # deprecated
15 | - depguard
16 | - dupl
17 | - exhaustive
18 | - exhaustivestruct
19 | - errorlint
20 | - funlen
21 | - gci
22 | - gochecknoglobals
23 | - gochecknoinits
24 | - gocognit
25 | - gocritic
26 | - gocyclo
27 | - godot
28 | - godox
29 | - goerr113
30 | - gofumpt
31 | - golint #deprecated
32 | - gomnd
33 | - gosec
34 | - govet
35 | - interfacer # deprecated
36 | - ifshort
37 | - ireturn # No, I _LIKE_ returning interfaces
38 | - lll
39 | - maintidx # Do this in code review
40 | - maligned # deprecated
41 | - makezero
42 | - nakedret
43 | - nestif
44 | - nlreturn
45 | - nonamedreturns # visit this back later
46 | - nosnakecase
47 | - paralleltest
48 | - scopelint # deprecated
49 | - structcheck # deprecated
50 | - tagliatelle
51 | - testpackage
52 | - thelper # Tests are fine
53 | - varcheck # deprecated
54 | - varnamelen # Short names are ok
55 | - wrapcheck
56 | - wsl
57 |
58 | issues:
59 | exclude-rules:
60 | # not needed
61 | - path: /*.go
62 | text: "ST1003: should not use underscores in package names"
63 | linters:
64 | - stylecheck
65 | - path: /*.go
66 | text: "don't use an underscore in package name"
67 | linters:
68 | - revive
69 | - path: /*.go
70 | linters:
71 | - contextcheck
72 | - exhaustruct
73 | - path: /main.go
74 | linters:
75 | - errcheck
76 | - path: /*_test.go
77 | linters:
78 | - errcheck
79 | - errchkjson
80 | - forcetypeassert
81 | - path: /*_example_test.go
82 | linters:
83 | - forbidigo
84 |
85 | # Maximum issues count per one linter. Set to 0 to disable. Default is 50.
86 | max-issues-per-linter: 0
87 |
88 | # Maximum count of issues with the same text. Set to 0 to disable. Default is 3.
89 | max-same-issues: 0
90 |
91 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 lestrrat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # libxml2
2 |
3 |
4 | **NOTICE**: Instead of making people use this library while wondering if this module is ever going to be modified any time soon, I've opted to archive it. I may come back to it later, but for the time being I have no motivation or pressing need to work on this project.
5 |
6 |
7 |
8 | Interface to libxml2, with DOM interface.
9 |
10 | [](https://travis-ci.org/lestrrat-go/libxml2)
11 |
12 | [](https://godoc.org/github.com/lestrrat-go/libxml2)
13 |
14 | # Index
15 |
16 | * [Why?](#why)
17 | * [FAQ](#faq)
18 |
19 | ## Why?
20 |
21 | I needed to write [go-xmlsec](https://github.com/lestrrat-go/xmlsec). This means we need to build trees using libxml2, and then muck with it in xmlsec: Two separate packages in Go means we cannot (safely) pass around `C.xmlFooPtr` objects (also, you pay a penalty for pointer types). This package carefully avoid references to `C.xmlFooPtr` types and uses uintptr to pass data around, so other libraries that needs to interact with libxml2 can safely interact with it.
22 |
23 | ## Status
24 |
25 | * This library should be considered alpha grade. API may still change.
26 | * Much of commonly used functionalities from libxml2 that *I* use are there already, and are known to be functional
27 |
28 | ## Package Layout:
29 |
30 | | Name | Description |
31 | |---------|-------------------------------------------------------------|
32 | | libxml2 | Globally available utility functions, such as `ParseString` |
33 | | types | Common data types, such as `types.Node` |
34 | | parser | Parser routines |
35 | | dom | DOM-like manipulation of XML document/nodes |
36 | | xpath | XPath related tools |
37 | | xsd | XML Schema related tools |
38 | | clib | Wrapper around C libxml2 library - DO NOT TOUCH IF UNSURE |
39 |
40 | ## Features
41 |
42 | Create XML documents using DOM-like interface:
43 |
44 | ```go
45 | d := dom.CreateDocument()
46 | e, err := d.CreateElement("foo")
47 | if err != nil {
48 | println(err)
49 | return
50 | }
51 | d.SetDocumentElement(e)
52 | ...
53 | ```
54 |
55 | Parse documents:
56 |
57 | ```go
58 | d, err := libxml2.ParseString(xmlstring)
59 | if err != nil {
60 | println(err)
61 | return
62 | }
63 | ```
64 |
65 | Use XPath to extract node values:
66 |
67 | ```go
68 | text := xpath.String(node.Find("//xpath/expression"))
69 | ```
70 |
71 | ## Examples
72 |
73 | ### Basic XML Example
74 |
75 | ```go
76 | import (
77 | "log"
78 | "net/http"
79 |
80 | "github.com/lestrrat-go/libxml2"
81 | "github.com/lestrrat-go/libxml2/parser"
82 | "github.com/lestrrat-go/libxml2/types"
83 | "github.com/lestrrat-go/libxml2/xpath"
84 | )
85 |
86 | func ExampleXML() {
87 | res, err := http.Get("http://blog.golang.org/feed.atom")
88 | if err != nil {
89 | panic("failed to get blog.golang.org: " + err.Error())
90 | }
91 |
92 | p := parser.New()
93 | doc, err := p.ParseReader(res.Body)
94 | defer res.Body.Close()
95 |
96 | if err != nil {
97 | panic("failed to parse XML: " + err.Error())
98 | }
99 | defer doc.Free()
100 |
101 | doc.Walk(func(n types.Node) error {
102 | log.Printf(n.NodeName())
103 | return nil
104 | })
105 |
106 | root, err := doc.DocumentElement()
107 | if err != nil {
108 | log.Printf("Failed to fetch document element: %s", err)
109 | return
110 | }
111 |
112 | ctx, err := xpath.NewContext(root)
113 | if err != nil {
114 | log.Printf("Failed to create xpath context: %s", err)
115 | return
116 | }
117 | defer ctx.Free()
118 |
119 | ctx.RegisterNS("atom", "http://www.w3.org/2005/Atom")
120 | title := xpath.String(ctx.Find("/atom:feed/atom:title/text()"))
121 | log.Printf("feed title = %s", title)
122 | }
123 | ```
124 |
125 | ### Basic HTML Example
126 |
127 | ```go
128 | func ExampleHTML() {
129 | res, err := http.Get("http://golang.org")
130 | if err != nil {
131 | panic("failed to get golang.org: " + err.Error())
132 | }
133 |
134 | doc, err := libxml2.ParseHTMLReader(res.Body)
135 | if err != nil {
136 | panic("failed to parse HTML: " + err.Error())
137 | }
138 | defer doc.Free()
139 |
140 | doc.Walk(func(n types.Node) error {
141 | log.Printf(n.NodeName())
142 | return nil
143 | })
144 |
145 | nodes := xpath.NodeList(doc.Find(`//div[@id="menu"]/a`))
146 | for i := 0; i < len(nodes); i++ {
147 | log.Printf("Found node: %s", nodes[i].NodeName())
148 | }
149 | }
150 | ```
151 |
152 | ### XSD Validation
153 |
154 | ```go
155 | import (
156 | "io/ioutil"
157 | "log"
158 | "os"
159 | "path/filepath"
160 |
161 | "github.com/lestrrat-go/libxml2"
162 | "github.com/lestrrat-go/libxml2/xsd"
163 | )
164 |
165 | func ExampleXSD() {
166 | xsdfile := filepath.Join("test", "xmldsig-core-schema.xsd")
167 | f, err := os.Open(xsdfile)
168 | if err != nil {
169 | log.Printf("failed to open file: %s", err)
170 | return
171 | }
172 | defer f.Close()
173 |
174 | buf, err := ioutil.ReadAll(f)
175 | if err != nil {
176 | log.Printf("failed to read file: %s", err)
177 | return
178 | }
179 |
180 | s, err := xsd.Parse(buf)
181 | if err != nil {
182 | log.Printf("failed to parse XSD: %s", err)
183 | return
184 | }
185 | defer s.Free()
186 |
187 | d, err := libxml2.ParseString(``)
188 | if err != nil {
189 | log.Printf("failed to parse XML: %s", err)
190 | return
191 | }
192 | defer d.Free()
193 |
194 | if err := s.Validate(d); err != nil {
195 | for _, e := range err.(xsd.SchemaValidationError).Errors() {
196 | log.Printf("error: %s", e.Error())
197 | }
198 | return
199 | }
200 |
201 | log.Printf("validation successful!")
202 | }
203 | ```
204 |
205 | ## Caveats
206 |
207 | ### Other libraries
208 |
209 | There exists many similar libraries. I want speed, I want DOM, and I want XPath.When all of these are met, I'd be happy to switch to another library.
210 |
211 | For now my closest contender was [xmlpath](https://github.com/go-xmlpath/xmlpath), but as of this writing it suffers in the speed (for xpath) area a bit:
212 |
213 | ```
214 | shoebill% go test -v -run=none -benchmem -benchtime=5s -bench .
215 | PASS
216 | BenchmarkXmlpathXmlpath-4 500000 11737 ns/op 721 B/op 6 allocs/op
217 | BenchmarkLibxml2Xmlpath-4 1000000 7627 ns/op 368 B/op 15 allocs/op
218 | BenchmarkEncodingXMLDOM-4 2000000 4079 ns/op 4560 B/op 9 allocs/op
219 | BenchmarkLibxml2DOM-4 1000000 11454 ns/op 264 B/op 7 allocs/op
220 | ok github.com/lestrrat-go/libxml2 37.597s
221 | ```
222 |
223 | ## FAQ
224 |
225 | ### "It won't build"
226 |
227 | The very first thing you need to be aware is that this is a _C binding_ to
228 | libxml2. You should understand how to build C programs, how to debug them,
229 | or at least be able to ask the right questions and deal with a great deal
230 | more than Go alone.
231 |
232 | Having said that, the most common causes for build errors are:
233 |
234 | 1. **You have not installed libxml2 / You installed it incorrectly**
235 |
236 | The first one is obvious, but I get this a lot. You have to install libxml2.
237 | If you are installing via some sort of package manager like apt/apk, remember
238 | that you need to install the "development" files as well. The name of the
239 | package differs in each environment, but it's usually something like "libxml2-dev".
240 |
241 | The second is more subtle, and tends to happen when you install your libxml2
242 | in a non-standard location. This causes problems for other tools such as
243 | your C compiler or pkg-config. See more below
244 |
245 | 2. **Your header files are not in the search path**
246 |
247 | If you don't understand what header files are or how they work, this is where
248 | you should either look for your local C-guru, or study how these things work
249 | before filing an issue on this repository.
250 |
251 | Your C compiler, which is invoked via Go, needs to be able to find the libxml2
252 | header files. If you installed them in a non-standard location, for example,
253 | such as outside of /usr/include and /usr/local/include, you _may_ have to
254 | configure them yourself.
255 |
256 | How to configure them depends greatly on your environment, and again, if you
257 | don't understand how you can fix it, you should consult your local C-guru
258 | about it, not this repository.
259 |
260 | 3. **Your pkg-config files are not in the search path**
261 |
262 | If you don't understand what pkg-config does, this is where you should either
263 | look for your local sysadmin friend, or study how these things work
264 | before filing an issue on this repository.
265 |
266 | pkg-config provides metadata about a installed components, such as build flags
267 | that are required. Go uses it to figure out how to build and link Go programs
268 | that needs to interact with things written in C.
269 |
270 | However, pkg-config is merely a thin frontend to extract information from
271 | file(s) that each component provided upon installation.
272 | pkg-config itself needs to know where to find these files.
273 |
274 | Make sure that the output of the following command contains `libxml-2.0`.
275 | If not, and you don't understand how to fix this yourself, you should consult
276 | your local sysadmin friend about it, not this repository
277 |
278 | ```
279 | pkg-config --list-all
280 | ```
281 |
282 | ### "Fatal error: 'libxml/HTMLparser.h' file not found"
283 |
284 | See the first FAQ entry.
285 |
286 | ### I can't statically link this module to libxml2
287 |
288 | Use the `static_build` tag when building this module, for example:
289 |
290 | ```sh
291 | go build -tags static_build
292 | ```
293 |
294 | ## See Also
295 |
296 | * https://github.com/lestrrat-go/xmlsec
297 |
298 | ## Credits
299 |
300 | * Work on this library was generously sponsored by HDE Inc (https://www.hde.co.jp)
301 |
--------------------------------------------------------------------------------
/clib/interface.go:
--------------------------------------------------------------------------------
1 | package clib
2 |
3 | import "errors"
4 |
5 | const (
6 | MaxEncodingLength = 256
7 | MaxAttributeNameLength = 1024
8 | MaxElementNameLength = 1024
9 | MaxNamespaceURILength = 4096
10 | MaxValueBufferSize = 4096
11 | MaxXPathExpressionLength = 4096
12 | )
13 |
14 | // C14NMode represents the C14N mode supported by libxml2
15 | type C14NMode int
16 |
17 | // PtrSource is the single interface that connects the rest of
18 | // libxml2 package with this package. The clib packages does not
19 | // really care what sort of object you pass to these low-level
20 | // functions, as long as the arguments fulfill this interface.
21 | //
22 | // Obviously this causes problems if you pass the an Element node
23 | // where a Document node is expected, but it is the caller's
24 | // responsibility to align the argument list.
25 | type PtrSource interface {
26 | Pointer() uintptr
27 | }
28 |
29 | // XMLNodeType identifies the type of the underlying C struct
30 | type XMLNodeType int
31 |
32 | const (
33 | ElementNode XMLNodeType = iota + 1
34 | AttributeNode
35 | TextNode
36 | CDataSectionNode
37 | EntityRefNode
38 | EntityNode
39 | PiNode
40 | CommentNode
41 | DocumentNode
42 | DocumentTypeNode
43 | DocumentFragNode
44 | NotationNode
45 | HTMLDocumentNode
46 | DTDNode
47 | ElementDecl
48 | AttributeDecl
49 | EntityDecl
50 | NamespaceDecl
51 | XIncludeStart
52 | XIncludeEnd
53 | DocbDocumentNode
54 | )
55 |
56 | var (
57 | ErrAttributeNotFound = errors.New("attribute not found")
58 | ErrAttributeNameTooLong = errors.New("attribute name too long")
59 | ErrElementNameTooLong = errors.New("element name too long")
60 | ErrNamespaceURITooLong = errors.New("namespace uri too long")
61 | ErrValueTooLong = errors.New("value too long")
62 | ErrXPathExpressionTooLong = errors.New("xpath expression too long")
63 | // ErrInvalidAttribute is returned when the Attribute struct (probably
64 | // the pointer to the underlying C struct is not valid)
65 | ErrInvalidAttribute = errors.New("invalid attribute")
66 | ErrInvalidArgument = errors.New("invalid argument")
67 | // ErrInvalidDocument is returned when the Document struct (probably
68 | // the pointer to the underlying C struct is not valid)
69 | ErrInvalidDocument = errors.New("invalid document")
70 | // ErrInvalidParser is returned when the Parser struct (probably
71 | // the pointer to the underlying C struct is not valid)
72 | ErrInvalidParser = errors.New("invalid parser")
73 | // ErrInvalidNamespace is returned when the Namespace struct (probably
74 | // the pointer to the underlying C struct is not valid)
75 | ErrInvalidNamespace = errors.New("invalid namespace")
76 | // ErrInvalidNode is returned when the Node struct (probably
77 | // the pointer to the underlying C struct is not valid)
78 | ErrInvalidNode = errors.New("invalid node")
79 | ErrInvalidNodeName = errors.New("invalid node name")
80 | // ErrInvalidXPathContext is returned when the XPathContext struct (probably
81 | // the pointer to the underlying C struct is not valid)
82 | ErrInvalidXPathContext = errors.New("invalid xpath context")
83 | // ErrInvalidXPathExpression is returned when the XPathExpression struct (probably
84 | // the pointer to the underlying C struct is not valid)
85 | ErrInvalidXPathExpression = errors.New("invalid xpath expression")
86 | // ErrInvalidXPathObject is returned when the XPathObject struct (probably
87 | // the pointer to the underlying C struct is not valid)
88 | ErrInvalidXPathObject = errors.New("invalid xpath object")
89 | // ErrInvalidSchema is returned when the Schema struct (probably
90 | // the pointer to the underlying C struct is not valid)
91 | ErrInvalidSchema = errors.New("invalid schema")
92 | ErrNodeNotFound = errors.New("node not found")
93 | ErrXPathEmptyResult = errors.New("empty xpath result")
94 | ErrXPathCompileFailure = errors.New("xpath compilation failed")
95 | ErrXPathNamespaceRegisterFailure = errors.New("cannot register namespace")
96 | )
97 |
98 | //nolint:errname
99 | type ErrNamespaceNotFound struct {
100 | Target string
101 | }
102 |
103 | func (e ErrNamespaceNotFound) Error() string {
104 | return "namespace not found: " + e.Target
105 | }
106 |
107 | type XPathObjectType int
108 |
109 | const (
110 | XPathUndefinedType XPathObjectType = iota
111 | XPathNodeSetType
112 | XPathBooleanType
113 | XPathNumberType
114 | XPathStringType
115 | XPathPointType
116 | XPathRangeType
117 | XPathLocationSetType
118 | XPathUsersType
119 | XPathXSLTTreeType
120 | )
121 |
--------------------------------------------------------------------------------
/clib/link_dynamic.go:
--------------------------------------------------------------------------------
1 | //go:build !static_build
2 | // +build !static_build
3 |
4 | package clib
5 |
6 | // #cgo pkg-config: libxml-2.0
7 | import "C"
8 |
--------------------------------------------------------------------------------
/clib/link_static.go:
--------------------------------------------------------------------------------
1 | //go:build static_build
2 | // +build static_build
3 |
4 | package clib
5 |
6 | // #cgo pkg-config: --static libxml-2.0
7 | // #cgo LDFLAGS: -static
8 | import "C"
9 |
--------------------------------------------------------------------------------
/dom/c14n_test.go:
--------------------------------------------------------------------------------
1 | package dom_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/lestrrat-go/libxml2"
7 | "github.com/lestrrat-go/libxml2/dom"
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestC14N(t *testing.T) {
12 | expected := `
14 | Hello, world!
15 |
16 |
17 | `
18 |
19 | doc, err := libxml2.ParseString(`
20 |
22 |
23 | Hello, world!
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 | `)
33 |
34 | if !assert.NoError(t, err, "Parse document should succeed") {
35 | return
36 | }
37 |
38 | s, err := dom.C14NSerialize{Mode: dom.C14NExclusive1_0, WithComments: true}.Serialize(doc)
39 | if !assert.NoError(t, err, "C14N should succeed") {
40 | return
41 | }
42 | t.Logf("C14N -> %s", s)
43 | t.Logf("expected -> %s", expected)
44 |
45 | if !assert.Equal(t, expected, s, "C14N content matches") {
46 | return
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/dom/document_test.go:
--------------------------------------------------------------------------------
1 | package dom_test
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 |
7 | "github.com/lestrrat-go/libxml2/clib"
8 | "github.com/lestrrat-go/libxml2/dom"
9 | "github.com/lestrrat-go/libxml2/types"
10 | "github.com/stretchr/testify/assert"
11 | )
12 |
13 | // Tests for DOM Level 3
14 |
15 | func TestDocumentAttributes(t *testing.T) {
16 | doc := dom.CreateDocument()
17 | defer doc.Free()
18 | if doc.Encoding() != "" {
19 | t.Errorf("Encoding should be empty string at first, got '%s'", doc.Encoding())
20 | }
21 |
22 | if doc.Version() != "1.0" {
23 | t.Errorf("Version should be 1.0 by default, got '%s'", doc.Version())
24 | }
25 |
26 | if doc.Standalone() != -1 {
27 | t.Errorf("Standalone should be -1 by default, got '%d'", doc.Standalone())
28 | }
29 |
30 | for _, enc := range []string{"utf-8", "euc-jp", "sjis", "iso-8859-1"} {
31 | doc.SetEncoding(enc)
32 | if doc.Encoding() != enc {
33 | t.Errorf("Expected encoding '%s', got '%s'", enc, doc.Encoding())
34 | }
35 | }
36 |
37 | for _, v := range []string{"1.5", "4.12", "12.5"} {
38 | doc.SetVersion(v)
39 | if doc.Version() != v {
40 | t.Errorf("Expected version '%s', got '%s'", v, doc.Version())
41 | }
42 | }
43 |
44 | doc.SetStandalone(1)
45 | if doc.Standalone() != 1 {
46 | t.Errorf("Expected standalone 1, got '%d'", doc.Standalone())
47 | }
48 |
49 | doc.SetBaseURI("localhost/here.xml")
50 | if doc.URI() != "localhost/here.xml" {
51 | t.Errorf("Expected URI 'localhost/here.xml', got '%s'", doc.URI())
52 | }
53 | }
54 |
55 | func checkElement(t *testing.T, e types.Element, assertName, testCase string) {
56 | if e == nil {
57 | t.Errorf("%s: Element is nil", testCase)
58 | return
59 | }
60 |
61 | if e.NodeType() != clib.ElementNode {
62 | t.Errorf("%s: Expected node type 'ElementNode', got '%s'", testCase, e.NodeType())
63 | return
64 | }
65 |
66 | if e.NodeName() != assertName {
67 | t.Errorf("%s: Expected NodeName '%s', got '%s'", testCase, assertName, e.NodeName())
68 | return
69 | }
70 | }
71 |
72 | func createElementAndCheck(t *testing.T, doc *dom.Document, name, assertName, testCase string) {
73 | node, err := doc.CreateElement(name)
74 | if err != nil {
75 | t.Errorf("Failed to create new element '%s': %s", name, err)
76 | return
77 | }
78 | checkElement(t, node, assertName, testCase)
79 | }
80 |
81 | func withDocument(cb func(*dom.Document)) {
82 | doc := dom.CreateDocument()
83 | defer doc.Free()
84 |
85 | cb(doc)
86 | }
87 |
88 | func TestDocumentCreateElements(t *testing.T) {
89 | withDocument(func(d *dom.Document) {
90 | createElementAndCheck(t, d, "foo", "foo", "Simple Element")
91 | })
92 |
93 | withDocument(func(d *dom.Document) {
94 | d.SetEncoding("iso-8859-1")
95 | createElementAndCheck(t, d, "foo", "foo", "Create element with document with encoding")
96 | })
97 |
98 | withDocument(func(d *dom.Document) {
99 | caseName := "Create element with namespace"
100 | const prefix = "foo"
101 | const localName = "bar"
102 | e, err := d.CreateElementNS("http://kungfoo", fmt.Sprintf("%s:%s", prefix, localName))
103 | if err != nil {
104 | t.Errorf("failed to create namespaced element: %s", err)
105 | return
106 | }
107 |
108 | checkElement(t, e, "foo:bar", caseName)
109 |
110 | if e.Prefix() != prefix {
111 | t.Errorf("%s: Expected prefix '%s', got '%s'", caseName, prefix, e.Prefix())
112 | }
113 | if e.LocalName() != localName {
114 | t.Errorf("%s: Expected local name '%s', got '%s'", caseName, localName, e.LocalName())
115 | }
116 | if e.NamespaceURI() != "http://kungfoo" {
117 | t.Errorf("%s: Expected namespace uri '%s', got '%s'", caseName, "http://kungfoo", e.NamespaceURI())
118 | }
119 | })
120 |
121 | // Bad elements
122 | withDocument(func(d *dom.Document) {
123 | badnames := []string{";", "&", "<><", "/", "1A"}
124 | for _, name := range badnames {
125 | if _, err := d.CreateElement(name); err == nil {
126 | t.Errorf("Creation of element name '%s' should fail", name)
127 | }
128 | }
129 | })
130 | }
131 |
132 | func TestDocumentCreateText(t *testing.T) {
133 | withDocument(func(d *dom.Document) {
134 | const nodeName = "foo"
135 | node, err := d.CreateTextNode(nodeName)
136 | if err != nil {
137 | t.Errorf("Failed to create text node: %s", err)
138 | return
139 | }
140 |
141 | if node.NodeType() != clib.TextNode {
142 | t.Errorf("Expected NodeType '%s', got '%s'", clib.TextNode, node.NodeType())
143 | return
144 | }
145 |
146 | if node.NodeValue() != nodeName {
147 | t.Errorf("Expeted NodeValue '%s', got '%s'", nodeName, node.NodeValue())
148 | return
149 | }
150 | })
151 | }
152 |
153 | func TestDocumentCreateComment(t *testing.T) {
154 | withDocument(func(d *dom.Document) {
155 | const nodeName = "foo"
156 | node, err := d.CreateCommentNode(nodeName)
157 | if err != nil {
158 | t.Errorf("Failed to create Comment node: %s", err)
159 | return
160 | }
161 |
162 | if node.NodeType() != clib.CommentNode {
163 | t.Errorf("Expected NodeType '%s', got '%s'", clib.CommentNode, node.NodeType())
164 | return
165 | }
166 |
167 | if node.NodeValue() != nodeName {
168 | t.Errorf("Expeted NodeValue '%s', got '%s'", nodeName, node.NodeValue())
169 | return
170 | }
171 |
172 | if node.String() != "" {
173 | t.Errorf("Expeted String() to return 'foo', got '%s'", node.String())
174 | return
175 | }
176 | })
177 | }
178 |
179 | func TestDocumentCreateCDataSection(t *testing.T) {
180 | withDocument(func(d *dom.Document) {
181 | const name = "foo"
182 | node, err := d.CreateCDataSection(name)
183 | if err != nil {
184 | t.Errorf("Failed to create CDataSection node: %s", err)
185 | return
186 | }
187 |
188 | if node.NodeType() != clib.CDataSectionNode {
189 | t.Errorf("Expected NodeType '%s', got '%s'", clib.CDataSectionNode, node.NodeType())
190 | return
191 | }
192 |
193 | if node.NodeValue() != name {
194 | t.Errorf("Expeted NodeValue '%s', got '%s'", name, node.NodeValue())
195 | return
196 | }
197 |
198 | if node.String() != "" {
199 | t.Errorf("Expeted String() to return 'foo', got '%s'", node.String())
200 | return
201 | }
202 | })
203 | }
204 |
205 | func TestDocumentCreateAttribute(t *testing.T) {
206 | withDocument(func(d *dom.Document) {
207 | node, err := d.CreateAttribute("foo", "bar")
208 | if err != nil {
209 | t.Errorf("Failed to create Attribute node: %s", err)
210 | return
211 | }
212 |
213 | if node.NodeType() != clib.AttributeNode {
214 | t.Errorf("Expected NodeType '%s', got '%s'", clib.AttributeNode, node.NodeType())
215 | return
216 | }
217 |
218 | if node.NodeName() != "foo" {
219 | t.Errorf("Expeted NodeName 'foo', got '%s'", node.NodeName())
220 | return
221 | }
222 |
223 | if node.NodeValue() != "bar" {
224 | t.Errorf("Expeted NodeValue 'foo', got '%s'", node.NodeValue())
225 | return
226 | }
227 |
228 | if node.String() != ` foo="bar"` {
229 | t.Errorf(`Expeted String() to return ' foo="bar"', got '%s'`, node.String())
230 | return
231 | }
232 |
233 | if node.HasChildNodes() {
234 | t.Errorf("Expected HashChildNodes to return false")
235 | return
236 | }
237 |
238 | // Attribute nodes claim to not have any child nodes, but they do?!
239 | content, err := node.FirstChild()
240 | if !assert.NoError(t, err, "Expected FirstChild to return a node") {
241 | return
242 | }
243 |
244 | if content.NodeType() != clib.TextNode {
245 | t.Errorf("Expected content node NodeType '%s', got '%s'", clib.TextNode, content.NodeType())
246 | return
247 | }
248 | })
249 |
250 | // Bad elements
251 | withDocument(func(d *dom.Document) {
252 | badnames := []string{";", "&", "<><", "/", "1A"}
253 | for _, name := range badnames {
254 | if _, err := d.CreateAttribute(name, "bar"); err == nil {
255 | t.Errorf("Creation of attribute name '%s' should fail", name)
256 | }
257 | }
258 | })
259 | }
260 |
261 | func TestDocumentCreateAttributeNS(t *testing.T) {
262 | withDocument(func(d *dom.Document) {
263 | elem, err := d.CreateElement("foo")
264 | if err != nil {
265 | t.Errorf("Failed to create Element node: %s", err)
266 | return
267 | }
268 | d.SetDocumentElement(elem)
269 |
270 | attr, err := d.CreateAttribute("attr", "e & f")
271 | if err != nil {
272 | t.Errorf("Failed to create Attribute node: %s", err)
273 | return
274 | }
275 | elem.AddChild(attr)
276 |
277 | if elem.String() != `` {
278 | t.Errorf(`Expected String '', got '%s'`, elem.String())
279 | return
280 | }
281 | elem.RemoveAttribute("attr")
282 |
283 | attr, err = d.CreateAttributeNS("", "attr2", "a & b")
284 | if err != nil {
285 | t.Errorf("Failed to create Attribute node: %s", err)
286 | return
287 | }
288 | elem.AddChild(attr)
289 |
290 | if elem.String() != `` {
291 | t.Errorf(`Expected String '', got '%s'`, elem.String())
292 | return
293 | }
294 | elem.RemoveAttribute("attr2")
295 |
296 | attr, err = d.CreateAttributeNS("http://kungfoo", "foo:attr3", "g & h")
297 | if err != nil {
298 | t.Errorf("Failed to create Attribute node: %s", err)
299 | return
300 | }
301 | elem.AddChild(attr)
302 |
303 | if elem.String() != `` {
304 | t.Errorf(`Expected String '', got '%s'`, elem.String())
305 | return
306 | }
307 | })
308 |
309 | withDocument(func(d *dom.Document) {
310 | _, err := d.CreateAttributeNS("http://kungfoo", "kung:foo", "bar")
311 | if err == nil {
312 | t.Errorf("Creating Attribute node w/o root node should have failed")
313 | return
314 | }
315 |
316 | elem, err := d.CreateElement("foo")
317 | if err != nil {
318 | t.Errorf("Failed to create Element node: %s", err)
319 | return
320 | }
321 | d.SetDocumentElement(elem)
322 |
323 | attr, err := d.CreateAttributeNS("http://kungfoo", "kung:foo", "bar")
324 | if err != nil {
325 | t.Errorf("Failed to create Attribute node: %s", err)
326 | return
327 | }
328 |
329 | if attr.NodeName() != "kung:foo" {
330 | t.Errorf("Expected NodeName 'kung:foo', got '%s'", attr.NodeName())
331 | return
332 | }
333 |
334 | if attr.LocalName() != "foo" {
335 | t.Errorf("Expected LocalName 'foo', got '%s'", attr.LocalName())
336 | return
337 | }
338 |
339 | if attr.NodeValue() != "bar" {
340 | t.Errorf("Expected NodeValue() 'bar', got '%s'", attr.NodeValue())
341 | return
342 | }
343 |
344 | attr.SetNodeValue(`bar&`)
345 | if attr.NodeValue() != `bar&` {
346 | t.Errorf("Expected NodeValue() 'bar&', got '%s'", attr.NodeValue())
347 | return
348 | }
349 | })
350 |
351 | // Bad elements
352 | withDocument(func(d *dom.Document) {
353 | elem, err := d.CreateElement("foo")
354 | if err != nil {
355 | t.Errorf("Failed to create Element node: %s", err)
356 | return
357 | }
358 | d.SetDocumentElement(elem)
359 |
360 | badnames := []string{";", "&", "<><", "/", "1A"}
361 | for _, name := range badnames {
362 | if _, err := d.CreateAttributeNS("http://kungfoo", name, "bar"); err == nil {
363 | t.Errorf("Creation of attribute name '%s' should fail", name)
364 | }
365 | }
366 | })
367 | }
368 |
--------------------------------------------------------------------------------
/dom/dom.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/lestrrat-go/libxml2/xpath"
7 | )
8 |
9 | var docPool sync.Pool
10 |
11 | func init() {
12 | SetupXPathCallback()
13 | docPool = sync.Pool{}
14 | docPool.New = func() interface{} {
15 | return &Document{}
16 | }
17 | }
18 |
19 | func SetupXPathCallback() {
20 | xpath.WrapNodeFunc = WrapNode
21 | }
22 |
23 | func WrapDocument(n uintptr) *Document {
24 | //nolint:forcetypeassert
25 | doc := docPool.Get().(*Document)
26 | doc.mortal = false
27 | doc.ptr = n
28 | return doc
29 | }
30 |
--------------------------------------------------------------------------------
/dom/interface.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "errors"
5 |
6 | "github.com/lestrrat-go/libxml2/clib"
7 | )
8 |
9 | var (
10 | ErrAttributeNotFound = clib.ErrAttributeNotFound
11 | ErrInvalidNodeType = errors.New("invalid node type")
12 | )
13 |
14 | // XMLNodeType identifies the type of the underlying C struct
15 | type XMLNodeType clib.XMLNodeType
16 |
17 | const (
18 | ElementNode = clib.ElementNode
19 | AttributeNode = clib.AttributeNode
20 | TextNode = clib.TextNode
21 | CDataSectionNode = clib.CDataSectionNode
22 | EntityRefNode = clib.EntityRefNode
23 | EntityNode = clib.EntityNode
24 | PiNode = clib.PiNode
25 | CommentNode = clib.CommentNode
26 | DocumentNode = clib.DocumentNode
27 | DocumentTypeNode = clib.DocumentTypeNode
28 | DocumentFragNode = clib.DocumentFragNode
29 | NotationNode = clib.NotationNode
30 | HTMLDocumentNode = clib.HTMLDocumentNode
31 | DTDNode = clib.DTDNode
32 | ElementDecl = clib.ElementDecl
33 | AttributeDecl = clib.AttributeDecl
34 | EntityDecl = clib.EntityDecl
35 | NamespaceDecl = clib.NamespaceDecl
36 | XIncludeStart = clib.XIncludeStart
37 | XIncludeEnd = clib.XIncludeEnd
38 | DocbDocumentNode = clib.DocbDocumentNode
39 | )
40 |
41 | type XMLNode struct {
42 | ptr uintptr // *C.xmlNode
43 | mortal bool
44 | }
45 |
46 | type Attribute struct {
47 | XMLNode
48 | }
49 |
50 | type CDataSection struct {
51 | XMLNode
52 | }
53 |
54 | type Pi struct {
55 | XMLNode
56 | }
57 |
58 | type Comment struct {
59 | XMLNode
60 | }
61 |
62 | type Element struct {
63 | XMLNode
64 | }
65 |
66 | type Document struct {
67 | ptr uintptr // *C.xmlDoc
68 | mortal bool
69 | }
70 |
71 | type Text struct {
72 | XMLNode
73 | }
74 |
75 | type Namespace struct {
76 | XMLNode
77 | }
78 |
79 | type Serializer interface {
80 | Serialize(interface{}) (string, error)
81 | }
82 |
83 | // note: Serialize takes an interface because some serializers only allow
84 | // Document, whereas others might allow Nodes
85 |
86 | // C14NMode represents the C14N mode supported by libxml2
87 | type C14NMode int
88 |
89 | //nolint:revive,stylecheck
90 | const (
91 | C14N1_0 C14NMode = iota
92 | C14NExclusive1_0
93 | C14N1_1
94 | )
95 |
96 | // C14NSerialize implements the Serializer interface, and generates
97 | // XML in C14N format.
98 | type C14NSerialize struct {
99 | Mode C14NMode
100 | WithComments bool
101 | }
102 |
--------------------------------------------------------------------------------
/dom/node.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "github.com/lestrrat-go/libxml2/clib"
5 | "github.com/lestrrat-go/libxml2/types"
6 | "github.com/lestrrat-go/libxml2/xpath"
7 | "github.com/pkg/errors"
8 | )
9 |
10 | // ChildNodes returns the child nodes
11 | func (n *XMLNode) ChildNodes() (types.NodeList, error) {
12 | list, err := clib.XMLChildNodes(n)
13 | if err != nil {
14 | return nil, errors.Wrap(err, "failed to get child node pointers")
15 | }
16 |
17 | ret := make(types.NodeList, len(list))
18 | for i, x := range list {
19 | ret[i], err = WrapNode(x)
20 | if err != nil {
21 | return nil, errors.Wrap(err, "failed to wrap node pointer")
22 | }
23 | }
24 | return ret, nil
25 | }
26 |
27 | func (n *XMLNode) RemoveChild(t types.Node) error {
28 | return clib.XMLRemoveChild(n, t)
29 | }
30 |
31 | // Pointer returns the pointer to the underlying C struct
32 | func (n *XMLNode) Pointer() uintptr {
33 | return n.ptr
34 | }
35 |
36 | // String returns the string representation
37 | func (n *XMLNode) String() string {
38 | return n.ToString(0, false)
39 | }
40 |
41 | // OwnerDocument returns the Document that this node belongs to
42 | func (n *XMLNode) OwnerDocument() (types.Document, error) {
43 | ptr, err := clib.XMLOwnerDocument(n)
44 | if err != nil {
45 | return nil, errors.Wrap(err, "failed to get valid owner document")
46 | }
47 |
48 | if ptr == 0 {
49 | return nil, errors.Wrap(clib.ErrInvalidDocument, "failed to get valid owner document")
50 | }
51 | return WrapDocument(ptr), nil
52 | }
53 |
54 | // NodeName returns the node name
55 | func (n *XMLNode) NodeName() string {
56 | s, err := clib.XMLNodeName(n)
57 | if err != nil {
58 | return ""
59 | }
60 | return s
61 | }
62 |
63 | // NodeValue returns the node value
64 | func (n *XMLNode) NodeValue() string {
65 | s, err := clib.XMLNodeValue(n)
66 | if err != nil {
67 | return ""
68 | }
69 | return s
70 | }
71 |
72 | // Literal returns the literal string value
73 | func (n XMLNode) Literal() (string, error) {
74 | return n.String(), nil
75 | }
76 |
77 | // IsSameNode returns true if two nodes point to the same node
78 | func (n *XMLNode) IsSameNode(other types.Node) bool {
79 | return n.Pointer() == other.Pointer()
80 | }
81 |
82 | // Copy creates a copy of the node
83 | func (n *XMLNode) Copy() (types.Node, error) {
84 | doc, err := n.OwnerDocument()
85 | if err != nil {
86 | return nil, errors.Wrap(err, "failed to get owner document")
87 | }
88 | nptr, err := clib.XMLDocCopyNode(n, doc, 1)
89 | if err != nil {
90 | return nil, errors.Wrap(err, "failed to copy document nodes")
91 | }
92 | return WrapNode(nptr)
93 | }
94 |
95 | // SetDocument sets the document of this node and its descendants
96 | func (n *XMLNode) SetDocument(d types.Document) error {
97 | return clib.XMLSetTreeDoc(n, d)
98 | }
99 |
100 | // ParseInContext parses a chunk of XML in the context of the current
101 | // node. This makes it safe to append the resulting node to the current
102 | // node or other nodes in the same document.
103 | func (n *XMLNode) ParseInContext(s string, o int) (types.Node, error) {
104 | nptr, err := clib.XMLParseInNodeContext(n, s, o)
105 | if err != nil {
106 | return nil, errors.Wrap(err, "failed to parse input")
107 | }
108 | return WrapNode(nptr)
109 | }
110 |
111 | // Find evaluates the xpath expression and returns the matching nodes
112 | func (n *XMLNode) Find(expr string) (types.XPathResult, error) {
113 | ctx, err := xpath.NewContext(n)
114 | if err != nil {
115 | return nil, errors.Wrap(err, "failed to create new XPath context")
116 | }
117 | defer ctx.Free()
118 |
119 | return ctx.Find(expr)
120 | }
121 |
122 | // FindExpr evalues the pre-compiled xpath expression and returns the matching nodes
123 | func (n *XMLNode) FindExpr(expr *xpath.Expression) (types.XPathResult, error) {
124 | ctx, err := xpath.NewContext(n)
125 | if err != nil {
126 | return nil, errors.Wrap(err, "failed to create new XPath context")
127 | }
128 | defer ctx.Free()
129 |
130 | return ctx.FindExpr(expr)
131 | }
132 |
133 | // HasChildNodes returns true if the node contains children
134 | func (n *XMLNode) HasChildNodes() bool {
135 | return clib.XMLHasChildNodes(n)
136 | }
137 |
138 | // FirstChild reutrns the first child node
139 | func (n *XMLNode) FirstChild() (types.Node, error) {
140 | ptr, err := clib.XMLFirstChild(n)
141 | if err != nil {
142 | return nil, errors.Wrap(err, "failed to get valid pointer to first child")
143 | }
144 | return WrapNode(ptr)
145 | }
146 |
147 | // LastChild returns the last child node
148 | func (n *XMLNode) LastChild() (types.Node, error) {
149 | ptr, err := clib.XMLLastChild(n)
150 | if err != nil {
151 | return nil, errors.Wrap(err, "failed to get valid pointer to first child")
152 | }
153 | return WrapNode(ptr)
154 | }
155 |
156 | // LocalName returns the local name
157 | func (n *XMLNode) LocalName() string {
158 | return clib.XMLLocalName(n)
159 | }
160 |
161 | // NamespaceURI returns the namespace URI associated with this node
162 | func (n *XMLNode) NamespaceURI() string {
163 | return clib.XMLNamespaceURI(n)
164 | }
165 |
166 | // NextSibling returns the next sibling
167 | func (n *XMLNode) NextSibling() (types.Node, error) {
168 | ptr, err := clib.XMLNextSibling(n)
169 | if err != nil {
170 | return nil, errors.Wrap(err, "failed to get valid pointer to next child")
171 | }
172 | if ptr == 0 {
173 | return nil, nil
174 | }
175 | return WrapNode(ptr)
176 | }
177 |
178 | // ParentNode returns the parent node
179 | func (n *XMLNode) ParentNode() (types.Node, error) {
180 | ptr, err := clib.XMLParentNode(n)
181 | if err != nil {
182 | return nil, errors.Wrap(err, "failed to get valid pointer to parent node")
183 | }
184 |
185 | return WrapNode(ptr)
186 | }
187 |
188 | // Prefix returns the prefix from the node name, if any
189 | func (n *XMLNode) Prefix() string {
190 | return clib.XMLPrefix(n)
191 | }
192 |
193 | // PreviousSibling returns the previous sibling
194 | func (n *XMLNode) PreviousSibling() (types.Node, error) {
195 | ptr, err := clib.XMLPreviousSibling(n)
196 | if err != nil {
197 | return nil, errors.Wrap(err, "failed to get valid pointer to previous child")
198 | }
199 |
200 | return WrapNode(ptr)
201 | }
202 |
203 | // SetNodeName sets the node name
204 | func (n *XMLNode) SetNodeName(name string) {
205 | _ = clib.XMLSetNodeName(n, name)
206 | }
207 |
208 | // SetNodeValue sets the node value
209 | func (n *XMLNode) SetNodeValue(value string) {
210 | _ = clib.XMLSetNodeValue(n, value)
211 | }
212 |
213 | // AddChild appends the node
214 | func (n *XMLNode) AddChild(child types.Node) error {
215 | return clib.XMLAddChild(n, child)
216 | }
217 |
218 | // TextContent returns the text content
219 | func (n *XMLNode) TextContent() string {
220 | return clib.XMLTextContent(n)
221 | }
222 |
223 | // ToString returns the string representation. (But it should probably
224 | // be deprecated)
225 | func (n *XMLNode) ToString(format int, docencoding bool) string {
226 | return clib.XMLToString(n, format, docencoding)
227 | }
228 |
229 | // LookupNamespacePrefix returns the prefix associated with the given URL
230 | func (n *XMLNode) LookupNamespacePrefix(href string) (string, error) {
231 | return clib.XMLLookupNamespacePrefix(n, href)
232 | }
233 |
234 | // LookupNamespaceURI returns the URI associated with the given prefix
235 | func (n *XMLNode) LookupNamespaceURI(prefix string) (string, error) {
236 | return clib.XMLLookupNamespaceURI(n, prefix)
237 | }
238 |
239 | // NodeType returns the XMLNodeType
240 | func (n *XMLNode) NodeType() clib.XMLNodeType {
241 | return clib.XMLGetNodeType(n)
242 | }
243 |
244 | // MakeMortal flags the node so that `AutoFree` calls Free()
245 | // to release the underlying C resources.
246 | func (n *XMLNode) MakeMortal() {
247 | n.mortal = true
248 | }
249 |
250 | // MakePersistent flags the node so that `AutoFree` becomes a no-op.
251 | // Make sure to call this if you used `MakeMortal` and `AutoFree`,
252 | // but you then decided to keep the node around.
253 | func (n *XMLNode) MakePersistent() {
254 | n.mortal = false
255 | }
256 |
257 | // Free releases the underlying C struct
258 | func (n *XMLNode) Free() {
259 | _ = clib.XMLFreeNode(n)
260 | n.ptr = 0
261 | }
262 |
263 | func walk(n types.Node, fn func(types.Node) error) error {
264 | if err := fn(n); err != nil {
265 | return errors.Wrap(err, "failed to call callback")
266 | }
267 | children, err := n.ChildNodes()
268 | if err != nil {
269 | return errors.Wrap(err, "failed to fetch child nodes")
270 | }
271 | for _, c := range children {
272 | if err := walk(c, fn); err != nil {
273 | return errors.Wrap(err, "failed to walk to child nodes")
274 | }
275 | }
276 | return nil
277 | }
278 |
279 | // Walk traverses through all of the nodes
280 | func (n *XMLNode) Walk(fn func(types.Node) error) error {
281 | return walk(n, fn)
282 | }
283 |
284 | // AutoFree allows you to free the underlying C resources. It is
285 | // meant to be called from defer. If you don't call `MakeMortal()` or
286 | // do call `MakePersistent()`, AutoFree is a no-op.
287 | func (n *XMLNode) AutoFree() {
288 | if !n.mortal {
289 | return
290 | }
291 | n.Free()
292 | }
293 |
--------------------------------------------------------------------------------
/dom/node_attr.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "github.com/lestrrat-go/libxml2/clib"
5 | )
6 |
7 | // Free releases the underlying C struct
8 | func (n *Attribute) Free() {
9 | _ = clib.XMLFreeProp(n)
10 | }
11 |
12 | // HasChildNodes returns true if the node contains any child nodes.
13 | // By definition attributes cannot have children, so this always
14 | // returns false
15 | func (n *Attribute) HasChildNodes() bool {
16 | return false
17 | }
18 |
19 | // Value returns the value of the attribute.
20 | func (n *Attribute) Value() string {
21 | v, err := clib.XMLNodeValue(n)
22 | if err != nil {
23 | return ""
24 | }
25 | return v
26 | }
27 |
--------------------------------------------------------------------------------
/dom/node_document.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "github.com/lestrrat-go/libxml2/clib"
5 | "github.com/lestrrat-go/libxml2/types"
6 | "github.com/pkg/errors"
7 | )
8 |
9 | // CreateDocument creates a new document with version="1.0", and no encoding
10 | func CreateDocument() *Document {
11 | return NewDocument("1.0", "")
12 | }
13 |
14 | // NewDocument creates a new document
15 | func NewDocument(version, encoding string) *Document {
16 | ptr := clib.XMLCreateDocument(version, encoding)
17 | return WrapDocument(ptr)
18 | }
19 |
20 | // Pointer returns the pointer to the underlying C struct
21 | func (d *Document) Pointer() uintptr {
22 | return d.ptr
23 | }
24 |
25 | // AutoFree calls Free() if the document is moral.
26 | func (d *Document) AutoFree() {
27 | if !d.mortal {
28 | return
29 | }
30 | d.Free()
31 | }
32 |
33 | // MakeMortal sets the flag
34 | func (d *Document) MakeMortal() {
35 | d.mortal = true
36 | }
37 |
38 | // MakePersistent unsets the flag
39 | func (d *Document) MakePersistent() {
40 | d.mortal = false
41 | }
42 |
43 | // IsSameNode checks if the underlying C pointer points to the same C struct
44 | func (d *Document) IsSameNode(n types.Node) bool {
45 | return d.ptr == n.Pointer()
46 | }
47 |
48 | // HasChildNodes returns true if the document node is available
49 | func (d *Document) HasChildNodes() bool {
50 | _, err := d.DocumentElement()
51 | return err != nil
52 | }
53 |
54 | // FirstChild returns the document element
55 | func (d *Document) FirstChild() (types.Node, error) {
56 | root, err := d.DocumentElement()
57 | if err != nil {
58 | return nil, errors.Wrap(err, "failed to get document element")
59 | }
60 |
61 | return root, nil
62 | }
63 |
64 | // LastChild returns the document element
65 | func (d *Document) LastChild() (types.Node, error) {
66 | root, err := d.DocumentElement()
67 | if err != nil {
68 | return nil, errors.Wrap(err, "failed to get document element")
69 | }
70 |
71 | return root, nil
72 | }
73 |
74 | // NextSibling always returns nil for Document
75 | func (d *Document) NextSibling() (types.Node, error) {
76 | return nil, errors.New("document has no siblings")
77 | }
78 |
79 | // PreviousSibling always returns nil for Document
80 | func (d *Document) PreviousSibling() (types.Node, error) {
81 | return nil, errors.New("document has no siblings")
82 | }
83 |
84 | // NodeName always returns an empty string for Document
85 | func (d *Document) NodeName() string {
86 | return ""
87 | }
88 |
89 | // SetNodeName is a no op for document
90 | func (d *Document) SetNodeName(_ string) {
91 | // return errors.New("cannot set node name on a document")
92 | }
93 |
94 | // NodeValue always returns an empty string for Document
95 | func (d *Document) NodeValue() string {
96 | return ""
97 | }
98 |
99 | // SetNodeValue is a no op for document
100 | func (d *Document) SetNodeValue(_ string) {
101 | // return errors.New("cannot set node value on a document")
102 | }
103 |
104 | // OwnerDocument always returns the document itself
105 | func (d *Document) OwnerDocument() (types.Document, error) {
106 | return d, nil
107 | }
108 |
109 | // SetDocument always returns an error for a document
110 | func (d *Document) SetDocument(_ types.Document) error {
111 | return errors.New("cannot set document on a document")
112 | }
113 |
114 | // ParentNode always returns an error for a document
115 | func (d *Document) ParentNode() (types.Node, error) {
116 | return nil, errors.New("document has no parent node")
117 | }
118 |
119 | // ParseInContext is currently unimplemented
120 | func (d *Document) ParseInContext(_ string, _ int) (types.Node, error) {
121 | return nil, errors.New("unimplemented")
122 | }
123 |
124 | // Literal is currently just an alias to Dump(false)
125 | func (d *Document) Literal() (string, error) {
126 | return d.Dump(false), nil
127 | }
128 |
129 | // TextContent returns the text content
130 | func (d *Document) TextContent() string {
131 | return clib.XMLTextContent(d)
132 | }
133 |
134 | // ToString is currently just an alias to Dump(false)
135 | func (d *Document) ToString(_ int, b bool) string {
136 | return d.Dump(b)
137 | }
138 |
139 | // ChildNodes returns the document element
140 | func (d *Document) ChildNodes() (types.NodeList, error) {
141 | root, err := d.DocumentElement()
142 | if err != nil {
143 | return nil, errors.Wrap(err, "failed to get document element")
144 | }
145 |
146 | return []types.Node{root}, nil
147 | }
148 |
149 | // Copy is currently unimplemented
150 | func (d *Document) Copy() (types.Node, error) {
151 | // Unimplemented
152 | return nil, errors.New("unimplemented")
153 | }
154 |
155 | // AddChild is a no op for Document
156 | func (d *Document) AddChild(_ types.Node) error {
157 | return errors.New("method AddChild is not available for Document node")
158 | }
159 |
160 | // CreateAttribute creates a new attribute
161 | func (d *Document) CreateAttribute(k, v string) (*Attribute, error) {
162 | attr, err := clib.XMLNewDocProp(d, k, v)
163 | if err != nil {
164 | return nil, errors.Wrap(err, "failed to get document property")
165 | }
166 | return wrapAttributeNode(attr), nil
167 | }
168 |
169 | // CreateAttributeNS creates a new attribute with the given XML namespace
170 | func (d *Document) CreateAttributeNS(nsuri, k, v string) (*Attribute, error) {
171 | if nsuri == "" {
172 | return d.CreateAttribute(k, v)
173 | }
174 |
175 | ptr, err := clib.XMLCreateAttributeNS(d, nsuri, k, v)
176 | if err != nil {
177 | return nil, errors.Wrap(err, "failed to create attribute")
178 | }
179 | return wrapAttributeNode(ptr), nil
180 | }
181 |
182 | // CreateCDataSection creates a new CDATA section node
183 | func (d *Document) CreateCDataSection(txt string) (*CDataSection, error) {
184 | cdata, err := clib.XMLNewCDataBlock(d, txt)
185 | if err != nil {
186 | return nil, errors.Wrap(err, "failed to create CDATA block")
187 | }
188 | return wrapCDataSectionNode(cdata), nil
189 | }
190 |
191 | // CreateCommentNode creates a new comment node
192 | func (d *Document) CreateCommentNode(txt string) (*Comment, error) {
193 | ptr, err := clib.XMLNewComment(txt)
194 | if err != nil {
195 | return nil, errors.Wrap(err, "failed to create comment")
196 | }
197 | return wrapCommentNode(ptr), nil
198 | }
199 |
200 | // CreateElement creates a new element node
201 | func (d *Document) CreateElement(name string) (types.Element, error) {
202 | ptr, err := clib.XMLCreateElement(d, name)
203 | if err != nil {
204 | return nil, errors.Wrap(err, "failed to create element")
205 | }
206 | return wrapElementNode(ptr), nil
207 | }
208 |
209 | // CreateElementNS creates a new element node in the given XML namespace
210 | func (d *Document) CreateElementNS(nsuri, name string) (types.Element, error) {
211 | ptr, err := clib.XMLCreateElementNS(d, nsuri, name)
212 | if err != nil {
213 | return nil, errors.Wrap(err, "failed to create element")
214 | }
215 | return wrapElementNode(ptr), nil
216 | }
217 |
218 | // CreateTextNode creates a new text node
219 | func (d *Document) CreateTextNode(txt string) (*Text, error) {
220 | ptr, err := clib.XMLNewText(txt)
221 | if err != nil {
222 | return nil, errors.Wrap(err, "failed to create text node")
223 | }
224 | return wrapTextNode(ptr), nil
225 | }
226 |
227 | // DocumentElement returns the root node of the document
228 | func (d *Document) DocumentElement() (types.Node, error) {
229 | n, err := clib.XMLDocumentElement(d)
230 | if err != nil {
231 | return nil, errors.Wrap(err, "failed to get document element")
232 | }
233 | return WrapNode(n)
234 | }
235 |
236 | // Find returns the nodes that can be selected with the
237 | // given xpath string
238 | func (d *Document) Find(xpath string) (types.XPathResult, error) {
239 | root, err := d.DocumentElement()
240 | if err != nil {
241 | return nil, errors.Wrap(err, "failed to get document element")
242 | }
243 | return root.Find(xpath)
244 | }
245 |
246 | // Encoding returns the d
247 | func (d *Document) Encoding() string {
248 | return clib.XMLDocumentEncoding(d)
249 | }
250 |
251 | // Free releases the underlying C struct
252 | func (d *Document) Free() {
253 | _ = clib.XMLFreeDoc(d)
254 | d.ptr = 0
255 | docPool.Put(d)
256 | }
257 |
258 | // String formats the document, always without formatting.
259 | func (d *Document) String() string {
260 | return clib.XMLDocumentString(d, d.Encoding(), false)
261 | }
262 |
263 | // Dump formats the document with or withour formatting.
264 | func (d *Document) Dump(format bool) string {
265 | return clib.XMLDocumentString(d, d.Encoding(), format)
266 | }
267 |
268 | // NodeType returns the XMLNodeType
269 | func (d *Document) NodeType() clib.XMLNodeType {
270 | return DocumentNode
271 | }
272 |
273 | // SetBaseURI sets the base URI
274 | func (d *Document) SetBaseURI(s string) {
275 | clib.XMLNodeSetBase(d, s)
276 | }
277 |
278 | // SetDocumentElement sets the document element
279 | func (d *Document) SetDocumentElement(n types.Node) error {
280 | return clib.XMLSetDocumentElement(d, n)
281 | }
282 |
283 | // SetEncoding sets the encoding of the document
284 | func (d *Document) SetEncoding(e string) {
285 | clib.XMLSetDocumentEncoding(d, e)
286 | }
287 |
288 | // SetStandalone sets the standalone flag
289 | func (d *Document) SetStandalone(v int) {
290 | clib.XMLSetDocumentStandalone(d, v)
291 | }
292 |
293 | // SetVersion sets the version of the document
294 | func (d *Document) SetVersion(v string) {
295 | clib.XMLSetDocumentVersion(d, v)
296 | }
297 |
298 | // Standalone returns the value of the standalone flag
299 | func (d *Document) Standalone() int {
300 | return clib.XMLDocumentStandalone(d)
301 | }
302 |
303 | // URI returns the document URI
304 | func (d *Document) URI() string {
305 | return clib.XMLDocumentURI(d)
306 | }
307 |
308 | // Version returns the version of the document
309 | func (d *Document) Version() string {
310 | return clib.XMLDocumentVersion(d)
311 | }
312 |
313 | // Walk traverses the nodes in the document
314 | func (d *Document) Walk(fn func(types.Node) error) error {
315 | root, err := d.DocumentElement()
316 | if err != nil {
317 | return errors.Wrap(err, "failed to get document element")
318 | }
319 | return walk(root, fn)
320 | }
321 |
322 | // LookupNamespacePrefix looks for a namespace prefix that matches
323 | // the given namespace URI
324 | func (d *Document) LookupNamespacePrefix(href string) (string, error) {
325 | root, err := d.DocumentElement()
326 | if err != nil {
327 | return "", errors.Wrap(err, "failed to get document element")
328 | }
329 |
330 | return root.LookupNamespacePrefix(href)
331 | }
332 |
333 | // LookupNamespaceURI looks for a namespace uri that matches
334 | // the given namespace prefix
335 | func (d *Document) LookupNamespaceURI(prefix string) (string, error) {
336 | root, err := d.DocumentElement()
337 | if err != nil {
338 | return "", errors.Wrap(err, "failed to get document element")
339 | }
340 |
341 | return root.LookupNamespaceURI(prefix)
342 | }
343 |
344 | func (d *Document) RemoveChild(n types.Node) error {
345 | root, err := d.DocumentElement()
346 | if err != nil {
347 | return errors.Wrap(err, "failed to get document element")
348 | }
349 | return root.RemoveChild(n)
350 | }
351 |
--------------------------------------------------------------------------------
/dom/node_element.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "bytes"
5 | "errors"
6 | "strings"
7 |
8 | "github.com/lestrrat-go/libxml2/clib"
9 | "github.com/lestrrat-go/libxml2/types"
10 | )
11 |
12 | // SetNamespace sets up a new namespace on the given node.
13 | // An XML namespace declaration is explicitly created only if
14 | // the activate flag is enabled, and the namespace is not
15 | // declared in a previous tree hierarchy.
16 | func (n *Element) SetNamespace(uri, prefix string, activate ...bool) error {
17 | var activateflag bool
18 | if len(activate) < 1 {
19 | activateflag = true
20 | } else {
21 | activateflag = activate[0]
22 | }
23 |
24 | if uri == "" && prefix == "" {
25 | // Empty namespace
26 | doc, err := n.OwnerDocument()
27 | if err != nil {
28 | return err
29 | }
30 | nsptr, err := clib.XMLSearchNs(doc, n, "")
31 | if err != nil {
32 | return err
33 | }
34 |
35 | ns := wrapNamespaceNode(nsptr)
36 | if ns.URI() != "" {
37 | if activateflag {
38 | _ = clib.XMLSetNs(n, nil)
39 | }
40 | }
41 | return nil
42 | }
43 |
44 | if uri == "" {
45 | return errors.New("missing uri for SetNamespace")
46 | }
47 |
48 | ns, err := clib.XMLNewNs(n, uri, prefix)
49 | if err != nil {
50 | return err
51 | }
52 |
53 | if activateflag {
54 | if err := clib.XMLSetNs(n, wrapNamespaceNode(ns)); err != nil {
55 | return err
56 | }
57 | }
58 | return nil
59 | }
60 |
61 | // AppendText adds a new text node
62 | func (n *Element) AppendText(s string) error {
63 | return clib.XMLAppendText(n, s)
64 | }
65 |
66 | // SetAttribute sets an attribute
67 | func (n *Element) SetAttribute(name, value string) error {
68 | return clib.XMLSetProp(n, name, value)
69 | }
70 |
71 | // GetAttribute retrieves the value of an attribute
72 | func (n *Element) GetAttribute(name string) (types.Attribute, error) {
73 | attrNode, err := clib.XMLElementGetAttributeNode(n, name)
74 | if err != nil {
75 | return nil, err
76 | }
77 | return wrapAttributeNode(attrNode), nil
78 | }
79 |
80 | // Attributes returns a list of attributes on a node
81 | func (n *Element) Attributes() ([]types.Attribute, error) {
82 | attrs, err := clib.XMLElementAttributes(n)
83 | if err != nil {
84 | return nil, err
85 | }
86 | ret := make([]types.Attribute, len(attrs))
87 | for i, attr := range attrs {
88 | ret[i] = wrapAttributeNode(attr)
89 | }
90 | return ret, nil
91 | }
92 |
93 | // RemoveAttribute completely removes an attribute from the node
94 | func (n *Element) RemoveAttribute(name string) error {
95 | i := strings.IndexByte(name, ':')
96 | if i == -1 {
97 | return clib.XMLUnsetProp(n, name)
98 | }
99 |
100 | // look for the prefix
101 | doc, err := n.OwnerDocument()
102 | if err != nil {
103 | return err
104 | }
105 | ns, err := clib.XMLSearchNs(doc, n, name[:i])
106 | if err != nil {
107 | return ErrAttributeNotFound
108 | }
109 |
110 | return clib.XMLUnsetNsProp(n, wrapNamespaceNode(ns), name)
111 | }
112 |
113 | // GetNamespaces returns Namespace objects associated with this
114 | // element. WARNING: This method currently returns namespace
115 | // objects which allocates C structures for each namespace.
116 | // Therefore you MUST free the structures, or otherwise you
117 | // WILL leak memory.
118 | func (n *Element) GetNamespaces() ([]types.Namespace, error) {
119 | list, err := clib.XMLElementNamespaces(n)
120 | if err != nil {
121 | return nil, err
122 | }
123 | ret := make([]types.Namespace, len(list))
124 | for i, nsptr := range list {
125 | ret[i] = wrapNamespaceNode(nsptr)
126 | }
127 | return ret, nil
128 | }
129 |
130 | // Literal returns a stringified version of this node and its
131 | // children, inclusive.
132 | func (n Element) Literal() (string, error) {
133 | buf := bytes.Buffer{}
134 | children, err := n.ChildNodes()
135 | if err != nil {
136 | return "", err
137 | }
138 | for _, c := range children {
139 | l, err := c.Literal()
140 | if err != nil {
141 | return "", err
142 | }
143 | buf.WriteString(l)
144 | }
145 | return buf.String(), nil
146 | }
147 |
--------------------------------------------------------------------------------
/dom/node_namespace.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "github.com/lestrrat-go/libxml2/clib"
5 | )
6 |
7 | // URI returns the namespace URL
8 | func (n *Namespace) URI() string {
9 | return clib.XMLNamespaceHref(n)
10 | }
11 |
12 | // Prefix returns the prefix for this namespace
13 | func (n *Namespace) Prefix() string {
14 | return clib.XMLNamespacePrefix(n)
15 | }
16 |
17 | // Free releases the underlying C struct
18 | func (n *Namespace) Free() {
19 | clib.XMLNamespaceFree(n)
20 | n.ptr = 0
21 | }
22 |
23 | // String returns the stringified Namespace
24 | func (n *Namespace) String() string {
25 | prefix := n.Prefix()
26 | if prefix != "" {
27 | prefix = ":" + prefix
28 | }
29 | return "xmlns" + prefix + `="` + n.URI() + `"`
30 | }
31 |
--------------------------------------------------------------------------------
/dom/node_test.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 |
7 | "github.com/lestrrat-go/libxml2/clib"
8 | "github.com/lestrrat-go/libxml2/types"
9 | "github.com/stretchr/testify/assert"
10 | )
11 |
12 | func init() {
13 | clib.ReportErrors(false)
14 | }
15 |
16 | type XMLNodeTypeToString struct {
17 | v clib.XMLNodeType
18 | e string
19 | }
20 |
21 | func TestXMLNodeTypeStringer(t *testing.T) {
22 | values := []XMLNodeTypeToString{
23 | {
24 | v: ElementNode,
25 | e: "ElementNode",
26 | },
27 | {
28 | v: AttributeNode,
29 | e: "AttributeNode",
30 | },
31 | {
32 | v: TextNode,
33 | e: "TextNode",
34 | },
35 | {
36 | v: CDataSectionNode,
37 | e: "CDataSectionNode",
38 | },
39 | {
40 | v: EntityRefNode,
41 | e: "EntityRefNode",
42 | },
43 | {
44 | v: EntityNode,
45 | e: "EntityNode",
46 | },
47 | {
48 | v: PiNode,
49 | e: "PiNode",
50 | },
51 | {
52 | v: CommentNode,
53 | e: "CommentNode",
54 | },
55 | {
56 | v: DocumentNode,
57 | e: "DocumentNode",
58 | },
59 | {
60 | v: DocumentTypeNode,
61 | e: "DocumentTypeNode",
62 | },
63 | {
64 | v: DocumentFragNode,
65 | e: "DocumentFragNode",
66 | },
67 | {
68 | v: NotationNode,
69 | e: "NotationNode",
70 | },
71 | {
72 | v: HTMLDocumentNode,
73 | e: "HTMLDocumentNode",
74 | },
75 | {
76 | v: DTDNode,
77 | e: "DTDNode",
78 | },
79 | {
80 | v: ElementDecl,
81 | e: "ElementDecl",
82 | },
83 | {
84 | v: AttributeDecl,
85 | e: "AttributeDecl",
86 | },
87 | {
88 | v: EntityDecl,
89 | e: "EntityDecl",
90 | },
91 | {
92 | v: NamespaceDecl,
93 | e: "NamespaceDecl",
94 | },
95 | {
96 | v: XIncludeStart,
97 | e: "XIncludeStart",
98 | },
99 | {
100 | v: XIncludeEnd,
101 | e: "XIncludeEnd",
102 | },
103 | {
104 | v: DocbDocumentNode,
105 | e: "DocbDocumentNode",
106 | },
107 | }
108 |
109 | for _, d := range values {
110 | if d.v.String() != d.e {
111 | t.Errorf("e '%s', got '%s'", d.e, d.v.String())
112 | }
113 | }
114 | }
115 |
116 | func TestDOM(t *testing.T) {
117 | doc := CreateDocument()
118 | defer doc.Free()
119 |
120 | root, err := doc.CreateElement("root")
121 | if err != nil {
122 | t.Errorf("Failed to create root element: %s", err)
123 | return
124 | }
125 |
126 | doc.SetDocumentElement(root)
127 | var toRemove types.Node
128 | for i := 1; i <= 3; i++ {
129 | child, err := doc.CreateElement(fmt.Sprintf("child%d", i))
130 | if !assert.NoError(t, err, "dom.CreateElement(child%d) should succeed", i) {
131 | return
132 | }
133 | child.AppendText(fmt.Sprintf("text%d", i))
134 | root.AddChild(child)
135 |
136 | if i == 2 {
137 | toRemove = child
138 | }
139 | }
140 |
141 | // Temporary test
142 | expected := `
143 | text1text2text3
144 | `
145 | if !assert.Equal(t, expected, doc.String(), "Failed to create XML document") {
146 | return
147 | }
148 |
149 | if !assert.NoError(t, root.RemoveChild(toRemove), "RemoveChild should succeed") {
150 | return
151 | }
152 | expected = `
153 | text1text3
154 | `
155 | if !assert.Equal(t, expected, doc.String(), "XML should match") {
156 | return
157 | }
158 | }
159 |
160 | func TestNode_StandaloneWithNamespaces(t *testing.T) {
161 | uri := "http://kungfoo"
162 | prefix := "foo"
163 | name := "bar"
164 |
165 | doc := CreateDocument()
166 | elem, err := doc.CreateElementNS(uri, prefix+":"+name)
167 | if !assert.NoError(t, err, "CreateElementNS snould succeed") {
168 | return
169 | }
170 |
171 | lookedup, err := elem.LookupNamespaceURI(prefix)
172 | if !assert.NoError(t, err, "LookupNamespaceURI should succeed") {
173 | return
174 | }
175 | if !assert.Equal(t, uri, lookedup, "LookupNamespaceURI succeeds") {
176 | return
177 | }
178 |
179 | lookedup, err = elem.LookupNamespacePrefix(uri)
180 | if !assert.NoError(t, err, "LookupNamespacePrefix should succeed") {
181 | return
182 | }
183 | if !assert.Equal(t, prefix, lookedup, "LookupNamespacePrefix succeeds") {
184 | return
185 | }
186 |
187 | nslist, err := elem.GetNamespaces()
188 | if !assert.NoError(t, err, "GetNamespaces succeeds") {
189 | return
190 | }
191 |
192 | defer func() {
193 | for _, ns := range nslist {
194 | ns.Free()
195 | }
196 | }()
197 |
198 | if !assert.Len(t, nslist, 1, "GetNamespaces returns 1 namespace") {
199 | return
200 | }
201 | }
202 |
203 | func TestAttribute(t *testing.T) {
204 | doc := CreateDocument()
205 | attr, err := doc.CreateAttribute("foo", "bar")
206 | if !assert.NoError(t, err, "attribute created") {
207 | return
208 | }
209 |
210 | if !assert.NotPanics(t, func() { attr.Free() }, "free should not panic") {
211 | return
212 | }
213 | }
214 |
215 | func TestCreateElementNS(t *testing.T) {
216 | doc := CreateDocument()
217 | root, err := doc.CreateElementNS("http://foo.bar.baz", "foo:root")
218 | if !assert.NoError(t, err, "CreateElementNS should succeed") {
219 | return
220 | }
221 | doc.SetDocumentElement(root)
222 |
223 | n1, err := doc.CreateElementNS("http://foo.bar.baz", "foo:n1")
224 | if !assert.NoError(t, err, "CreateElementNS should succeed") {
225 | return
226 | }
227 | root.AddChild(n1)
228 |
229 | n2, err := doc.CreateElementNS("http://foo.bar.baz", "bar:n2")
230 | if !assert.NoError(t, err, "CreateElementNS should succeed") {
231 | return
232 | }
233 | root.AddChild(n2)
234 |
235 | _, err = doc.CreateElementNS("http://foo.bar.baz.quux", "foo:n3")
236 | if !assert.Error(t, err, "CreateElementNS should fail") {
237 | return
238 | }
239 |
240 | t.Logf("%s", doc.Dump(false))
241 | }
242 |
--------------------------------------------------------------------------------
/dom/node_text.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "github.com/lestrrat-go/libxml2/clib"
5 | )
6 |
7 | func (n *CDataSection) Literal() (string, error) {
8 | return clib.XMLNodeValue(n)
9 | }
10 |
11 | // Data returns the content associated with this node
12 | func (n *Text) Data() string {
13 | return clib.XMLTextData(n)
14 | }
15 |
--------------------------------------------------------------------------------
/dom/node_wrap.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | // Auto-generated by internal/cmd/genwrapnode/genwrapnode.go. DO NOT EDIT!
4 |
5 | import (
6 | "fmt"
7 |
8 | "github.com/lestrrat-go/libxml2/clib"
9 | "github.com/lestrrat-go/libxml2/types"
10 | )
11 |
12 | func wrapNamespaceNode(ptr uintptr) *Namespace {
13 | var n Namespace
14 | n.ptr = ptr
15 | return &n
16 | }
17 |
18 | func wrapAttributeNode(ptr uintptr) *Attribute {
19 | var n Attribute
20 | n.ptr = ptr
21 | return &n
22 | }
23 |
24 | func wrapCDataSectionNode(ptr uintptr) *CDataSection {
25 | var n CDataSection
26 | n.ptr = ptr
27 | return &n
28 | }
29 |
30 | func wrapCommentNode(ptr uintptr) *Comment {
31 | var n Comment
32 | n.ptr = ptr
33 | return &n
34 | }
35 |
36 | func wrapElementNode(ptr uintptr) *Element {
37 | var n Element
38 | n.ptr = ptr
39 | return &n
40 | }
41 |
42 | func wrapTextNode(ptr uintptr) *Text {
43 | var n Text
44 | n.ptr = ptr
45 | return &n
46 | }
47 |
48 | func wrapPiNode(ptr uintptr) *Pi {
49 | var n Pi
50 | n.ptr = ptr
51 | return &n
52 | }
53 |
54 | // WrapNode is a function created with the sole purpose of allowing
55 | // go-libxml2 consumers that can generate a C.xmlNode pointer to
56 | // create libxml2.Node types, e.g. go-xmlsec.
57 | func WrapNode(n uintptr) (types.Node, error) {
58 | switch typ := clib.XMLGetNodeTypeRaw(n); typ {
59 | case clib.AttributeNode:
60 | return wrapAttributeNode(n), nil
61 | case clib.CDataSectionNode:
62 | return wrapCDataSectionNode(n), nil
63 | case clib.CommentNode:
64 | return wrapCommentNode(n), nil
65 | case clib.ElementNode:
66 | return wrapElementNode(n), nil
67 | case clib.TextNode:
68 | return wrapTextNode(n), nil
69 | case clib.PiNode:
70 | return wrapPiNode(n), nil
71 | default:
72 | return nil, fmt.Errorf("unknown node: %d", typ)
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/dom/serialize.go:
--------------------------------------------------------------------------------
1 | package dom
2 |
3 | import (
4 | "github.com/lestrrat-go/libxml2/clib"
5 | "github.com/lestrrat-go/libxml2/types"
6 | )
7 |
8 | // Serialize produces serialization of the document, canonicalized.
9 | func (s C14NSerialize) Serialize(n types.Node) (string, error) {
10 | /*
11 | * Below document is taken from libxml2 directly. Pay special attention
12 | * to the required settings when parsing the document to be canonicalized.
13 | *
14 | * ---
15 | * Canonical form of an XML document could be created if and only if
16 | * a) default attributes (if any) are added to all nodes
17 | * b) all character and parsed entity references are resolved
18 | * In order to achieve this in libxml2 the document MUST be loaded with
19 | * following global setings:
20 | *
21 | * xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS;
22 | * xmlSubstituteEntitiesDefault(1);
23 | *
24 | * or corresponding parser context setting:
25 | * xmlParserCtxtPtr ctxt;
26 | *
27 | * ...
28 | * ctxt->loadsubset = XML_DETECT_IDS | XML_COMPLETE_ATTRS;
29 | * ctxt->replaceEntities = 1;
30 | * ...
31 | * ---
32 | *
33 | * In go-libxml2, this translates to:
34 | *
35 | * options = XMLParserDTDLoad | XMLParserDTDAttr | XMLParserNoEnt
36 | *
37 | */
38 | switch n.(type) {
39 | case *Document:
40 | default:
41 | return "", ErrInvalidNodeType
42 | }
43 |
44 | return clib.XMLC14NDocDumpMemory(n, int(s.Mode), s.WithComments)
45 | }
46 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/lestrrat-go/libxml2
2 |
3 | go 1.21
4 |
5 | require (
6 | github.com/pkg/errors v0.9.1
7 | github.com/stretchr/testify v1.8.4
8 | gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7
9 | )
10 |
11 | require (
12 | github.com/davecgh/go-spew v1.1.1 // indirect
13 | github.com/pmezard/go-difflib v1.0.0 // indirect
14 | gopkg.in/yaml.v3 v3.0.1 // indirect
15 | launchpad.net/gocheck v0.0.0-20140225173054-000000000087 // indirect
16 | launchpad.net/xmlpath v0.0.0-20130614043138-000000000004 // indirect
17 | )
18 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
4 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
7 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
8 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
11 | gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7 h1:zibSPXbkfB1Dwl76rJgLa68xcdHu42qmFTe6vAnU4wA=
12 | gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7/go.mod h1:wo0SW5T6XqIKCCAge330Cd5sm+7VI6v85OrQHIk50KM=
13 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
14 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
15 | launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54=
16 | launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM=
17 | launchpad.net/xmlpath v0.0.0-20130614043138-000000000004 h1:B8nNZBUrx8YufDCAJjvO/lVs4GxXMQHyrjwJdJzXMFg=
18 | launchpad.net/xmlpath v0.0.0-20130614043138-000000000004/go.mod h1:vqyExLOM3qBx7mvYRkoxjSCF945s0mbe7YynlKYXtsA=
19 |
--------------------------------------------------------------------------------
/html.go:
--------------------------------------------------------------------------------
1 | package libxml2
2 |
3 | import (
4 | "bytes"
5 | "io"
6 |
7 | "github.com/lestrrat-go/libxml2/clib"
8 | "github.com/lestrrat-go/libxml2/dom"
9 | "github.com/lestrrat-go/libxml2/parser"
10 | "github.com/lestrrat-go/libxml2/types"
11 | "github.com/pkg/errors"
12 | )
13 |
14 | // ParseHTML parses an HTML document. You can omit the options
15 | // argument, or you can provide one bitwise-or'ed option
16 | func ParseHTML(content []byte, options ...parser.HTMLOption) (types.Document, error) {
17 | return ParseHTMLString(string(content), options...)
18 | }
19 |
20 | // ParseHTMLString parses an HTML document. You can omit the options
21 | // argument, or you can provide one bitwise-or'ed option
22 | func ParseHTMLString(content string, options ...parser.HTMLOption) (types.Document, error) {
23 | var option parser.HTMLOption
24 | if len(options) > 0 {
25 | option = options[0]
26 | } else {
27 | option = parser.DefaultHTMLOptions
28 | }
29 | docptr, err := clib.HTMLReadDoc(content, "", "", int(option))
30 | if err != nil {
31 | return nil, errors.Wrap(err, "failed to read document")
32 | }
33 |
34 | if docptr == 0 {
35 | return nil, errors.Wrap(clib.ErrInvalidDocument, "failed to get valid document pointer")
36 | }
37 | return dom.WrapDocument(docptr), nil
38 | }
39 |
40 | // ParseHTMLReader parses an HTML document. You can omit the options
41 | // argument, or you can provide one bitwise-or'ed option
42 | func ParseHTMLReader(in io.Reader, options ...parser.HTMLOption) (types.Document, error) {
43 | buf := &bytes.Buffer{}
44 | if _, err := buf.ReadFrom(in); err != nil {
45 | return nil, errors.Wrap(err, "failed to rea from io.Reader")
46 | }
47 |
48 | return ParseHTMLString(buf.String(), options...)
49 | }
50 |
--------------------------------------------------------------------------------
/html_test.go:
--------------------------------------------------------------------------------
1 | package libxml2_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/lestrrat-go/libxml2"
7 | "github.com/lestrrat-go/libxml2/xpath"
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestParseHTML(t *testing.T) {
12 | doc, err := libxml2.ParseHTMLString(`
Hello, World!
Lorem Ipsum
`)
13 | if err != nil {
14 | t.Errorf("Failed to parse: %s", err)
15 | return
16 | }
17 | defer doc.Free()
18 |
19 | root, err := doc.DocumentElement()
20 | if !assert.NoError(t, err, "DocumentElement() should succeed") {
21 | return
22 | }
23 | if !assert.True(t, root.IsSameNode(root), "root == root") {
24 | return
25 | }
26 |
27 | nodes := xpath.NodeList(doc.Find("/html/body/h1"))
28 | if len(nodes) != 1 {
29 | t.Errorf("Could not find matching nodes")
30 | return
31 | }
32 |
33 | if nodes[0].TextContent() != "Hello, World!" {
34 | t.Errorf("h1 content is not 'Hello, World!', got %s", nodes[0].TextContent())
35 | return
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/internal/cmd/genwrapnode/genwrapnode.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "go/format"
7 | "io"
8 | "log"
9 | "os"
10 | "strconv"
11 |
12 | "github.com/pkg/errors"
13 | )
14 |
15 | func main() {
16 | if err := _main(); err != nil {
17 | log.Printf("%s", err)
18 | os.Exit(1)
19 | }
20 | }
21 |
22 | func _main() error {
23 | var buf bytes.Buffer
24 |
25 | buf.WriteString("package dom")
26 | buf.WriteString("\n\n// Auto-generated by internal/cmd/genwrapnode/genwrapnode.go. DO NOT EDIT!")
27 | buf.WriteString("\n\nimport (")
28 | buf.WriteString("\n\"fmt\"\n")
29 | for _, lib := range []string{"github.com/lestrrat-go/libxml2/clib", "github.com/lestrrat-go/libxml2/types"} {
30 | fmt.Fprintf(&buf, "\n%s", strconv.Quote(lib))
31 | }
32 | buf.WriteString("\n)")
33 |
34 | nodeTypes := []string{
35 | `Namespace`,
36 | `Attribute`,
37 | `CDataSection`,
38 | `Comment`,
39 | `Element`,
40 | `Text`,
41 | `Pi`,
42 | }
43 |
44 | for _, typ := range nodeTypes {
45 | fmt.Fprintf(&buf, "\n\nfunc wrap%sNode(ptr uintptr) *%s {", typ, typ)
46 | fmt.Fprintf(&buf, "\nvar n %s", typ)
47 | buf.WriteString("\nn.ptr = ptr")
48 | buf.WriteString("\nreturn &n")
49 | buf.WriteString("\n}")
50 | }
51 |
52 | buf.WriteString("\n\n// WrapNode is a function created with the sole purpose of allowing")
53 | buf.WriteString("\n// go-libxml2 consumers that can generate a C.xmlNode pointer to")
54 | buf.WriteString("\n// create libxml2.Node types, e.g. go-xmlsec.")
55 | buf.WriteString("\nfunc WrapNode(n uintptr) (types.Node, error) {")
56 | buf.WriteString("\nswitch typ := clib.XMLGetNodeTypeRaw(n); typ {")
57 |
58 | for _, typ := range nodeTypes {
59 | // XXX hmm, this never existed. don't have time to debug right now.
60 | // possibly an omission bug?
61 | if typ == "Namespace" {
62 | continue
63 | }
64 | fmt.Fprintf(&buf, "\ncase clib.%sNode:", typ)
65 | fmt.Fprintf(&buf, "\nreturn wrap%sNode(n), nil", typ)
66 | }
67 |
68 | buf.WriteString("\ndefault:")
69 | buf.WriteString("\nreturn nil, fmt.Errorf(\"unknown node: %%d\", typ)")
70 | buf.WriteString("\n}")
71 | buf.WriteString("\n}")
72 |
73 | src, err := format.Source(buf.Bytes())
74 | if err != nil {
75 | log.Printf("%s", buf.Bytes())
76 | return err
77 | }
78 |
79 | var out io.Writer = os.Stdout
80 | args := os.Args
81 | if len(args) > 2 && args[1] == "--" {
82 | args = append(append([]string(nil), args[1:]...), args[2:]...)
83 | }
84 |
85 | if len(args) > 1 {
86 | f, err := os.OpenFile(args[1], os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
87 | if err != nil {
88 | return errors.Wrapf(err, `failed to open %s`, args[1])
89 | }
90 | defer f.Close()
91 | out = f
92 | }
93 |
94 | _, _ = out.Write(src)
95 | return nil
96 | }
97 |
--------------------------------------------------------------------------------
/internal/debug/debug_off.go:
--------------------------------------------------------------------------------
1 | //go:build !debug
2 | // +build !debug
3 |
4 | package debug
5 |
6 | const Enabled = false
7 |
8 | // Printf is no op unless you compile with the `debug` tag
9 | func Printf(_ string, _ ...interface{}) {}
10 |
--------------------------------------------------------------------------------
/internal/debug/debug_on.go:
--------------------------------------------------------------------------------
1 | //+build debug
2 |
3 | package debug
4 |
5 | import (
6 | "log"
7 | "os"
8 | )
9 |
10 | const Enabled = true
11 |
12 | var logger = log.New(os.Stdout, "|DEBUG| ", 0)
13 |
14 | // Printf prints debug messages. Only available if compiled with "debug" tag
15 | func Printf(f string, args ...interface{}) {
16 | logger.Printf(f, args...)
17 | }
18 |
--------------------------------------------------------------------------------
/internal/option/interface.go:
--------------------------------------------------------------------------------
1 | package option
2 |
3 | const (
4 | OptKeyWithURI = `with-uri`
5 | )
6 |
--------------------------------------------------------------------------------
/internal/option/option.go:
--------------------------------------------------------------------------------
1 | package option
2 |
3 | type Interface interface {
4 | Name() string
5 | Value() interface{}
6 | }
7 |
8 | type Option struct {
9 | name string
10 | value interface{}
11 | }
12 |
13 | func New(name string, value interface{}) *Option {
14 | return &Option{
15 | name: name,
16 | value: value,
17 | }
18 | }
19 |
20 | func (o *Option) Name() string {
21 | return o.name
22 | }
23 | func (o *Option) Value() interface{} {
24 | return o.value
25 | }
26 |
--------------------------------------------------------------------------------
/libxml2.go:
--------------------------------------------------------------------------------
1 | //go:generate go run internal/cmd/genwrapnode/genwrapnode.go -- dom/node_wrap.go
2 |
3 | /*
4 | Package libxml2 is an interface to libxml2 library, providing XML and HTML parsers
5 | with DOM interface. The inspiration is Perl5's XML::LibXML module.
6 |
7 | This library is still in very early stages of development. API may still change
8 | without notice.
9 |
10 | For the time being, the API is being written so that thye are as close as we
11 | can get to DOM Layer 3, but some methods will, for the time being, be punted
12 | and aliases for simpler methods that don't necessarily check for the DOM's
13 | correctness will be used.
14 |
15 | Also, the return values are still shaky -- I'm still debating how to handle error cases gracefully.
16 | */
17 | package libxml2
18 |
--------------------------------------------------------------------------------
/libxml2_bench_test.go:
--------------------------------------------------------------------------------
1 | // This file is build-tag protected because it involves loading an external
2 | // library (xmlpath)
3 | package libxml2_test
4 |
5 | import (
6 | "bytes"
7 | "encoding/xml"
8 | "os"
9 | "path/filepath"
10 | "testing"
11 |
12 | "github.com/lestrrat-go/libxml2"
13 | "github.com/lestrrat-go/libxml2/dom"
14 | "github.com/lestrrat-go/libxml2/xpath"
15 | "github.com/stretchr/testify/assert"
16 | "gopkg.in/xmlpath.v1"
17 | )
18 |
19 | var xmlfile = filepath.Join("test", "feed.atom")
20 |
21 | func BenchmarkXmlpathXmlpath(b *testing.B) {
22 | f, err := os.Open(xmlfile)
23 | if err != nil {
24 | b.Fatalf("%s", err)
25 | }
26 |
27 | root, err := xmlpath.Parse(f)
28 | if err != nil {
29 | b.Fatalf("%s", err)
30 | }
31 | for i := 0; i < b.N; i++ {
32 | p, err := xmlpath.Compile(`//entry`)
33 | if err != nil {
34 | b.Fatalf("%s", err)
35 | }
36 | it := p.Iter(root)
37 | for it.Next() {
38 | n := it.Node()
39 | _ = n
40 | }
41 | }
42 | }
43 |
44 | func TestBenchmarkLibxml2Xmlpath(t *testing.T) {
45 | f, err := os.Open(xmlfile)
46 | if !assert.NoError(t, err, "os.Open succeeds") {
47 | return
48 | }
49 |
50 | doc, err := libxml2.ParseReader(f)
51 | if !assert.NoError(t, err, "ParseReader succeeds") {
52 | return
53 | }
54 |
55 | xpc, err := xpath.NewContext(doc)
56 | if !assert.NoError(t, err, "xpath.NewContext succeeds") {
57 | return
58 | }
59 | xpc.RegisterNS("atom", "http://www.w3.org/2005/Atom")
60 |
61 | res, err := xpc.Find(`//atom:entry`)
62 | if !assert.NoError(t, err, "xpc.Find succeeds") {
63 | return
64 | }
65 | defer res.Free()
66 |
67 | iter := res.NodeIter()
68 | if !assert.NotEmpty(t, iter, "res.NodeIter succeeds") {
69 | return
70 | }
71 |
72 | count := 0
73 | for iter.Next() {
74 | n := iter.Node()
75 | if !assert.NotEmpty(t, n, "iter.Node returns something") {
76 | return
77 | }
78 | count++
79 | }
80 | if !assert.True(t, count > 0, "there's at least 1 node") {
81 | return
82 | }
83 | }
84 |
85 | func BenchmarkLibxml2Xmlpath(b *testing.B) {
86 | f, err := os.Open(xmlfile)
87 | if err != nil {
88 | b.Fatalf("%s", err)
89 | }
90 |
91 | doc, err := libxml2.ParseReader(f)
92 | if err != nil {
93 | b.Fatalf("%s", err)
94 | }
95 |
96 | xpc, err := xpath.NewContext(doc)
97 | if err != nil {
98 | b.Fatalf("%s", err)
99 | }
100 | xpc.RegisterNS("atom", "http://www.w3.org/2005/Atom")
101 | for i := 0; i < b.N; i++ {
102 | iter := xpath.NodeIter(xpc.Find(`//atom:entry`))
103 | for iter.Next() {
104 | n := iter.Node()
105 | _ = n
106 | }
107 | }
108 | }
109 |
110 | //nolint:musttag
111 | type Foo struct {
112 | XMLName xml.Name `xml:"https://github.com/lestrrat-go/libxml2/foo foo:foo"`
113 | Field1 string
114 | Field2 string `xml:",attr"`
115 | }
116 |
117 | func BenchmarkEncodingXMLDOM(b *testing.B) {
118 | var buf bytes.Buffer
119 | f := Foo{
120 | Field1: "Hello, World!",
121 | Field2: "Hello, Attribute!",
122 | }
123 | for i := 0; i < b.N; i++ {
124 | buf.Reset()
125 | enc := xml.NewEncoder(&buf)
126 | enc.Encode(f)
127 | }
128 | }
129 |
130 | func BenchmarkLibxml2DOM(b *testing.B) {
131 | var buf bytes.Buffer
132 | const nsuri = `https://github.com/lestrrat-go/libxml2/foo`
133 | f := Foo{
134 | Field1: "Hello, World!",
135 | Field2: "Hello, Attribute!",
136 | }
137 | for i := 0; i < b.N; i++ {
138 | d := dom.CreateDocument()
139 |
140 | root, err := d.CreateElementNS(nsuri, "foo:foo")
141 | if err != nil {
142 | d.Free()
143 | panic(err)
144 | }
145 | d.SetDocumentElement(root)
146 |
147 | f1xml, err := d.CreateElement("Field1")
148 | if err != nil {
149 | d.Free()
150 | panic(err)
151 | }
152 | root.AddChild(f1xml)
153 |
154 | f1xml.SetAttribute("Field2", f.Field2)
155 |
156 | f1xml.AppendText(f.Field1)
157 | buf.Reset()
158 | buf.WriteString(d.Dump(false))
159 | d.Free()
160 | }
161 | }
162 |
--------------------------------------------------------------------------------
/libxml2_example_test.go:
--------------------------------------------------------------------------------
1 | package libxml2_test
2 |
3 | import (
4 | "log"
5 | "net/http"
6 |
7 | "github.com/lestrrat-go/libxml2"
8 | "github.com/lestrrat-go/libxml2/parser"
9 | "github.com/lestrrat-go/libxml2/types"
10 | "github.com/lestrrat-go/libxml2/xpath"
11 | )
12 |
13 | //nolint:testableexamples
14 | func ExampleXML() {
15 | //nolint:noctx
16 | res, err := http.Get("http://blog.golang.org/feed.atom")
17 | if err != nil {
18 | panic("failed to get blog.golang.org: " + err.Error())
19 | }
20 |
21 | p := parser.New()
22 | doc, err := p.ParseReader(res.Body)
23 | defer res.Body.Close()
24 |
25 | if err != nil {
26 | panic("failed to parse XML: " + err.Error())
27 | }
28 | defer doc.Free()
29 |
30 | doc.Walk(func(n types.Node) error {
31 | log.Println(n.NodeName())
32 | return nil
33 | })
34 |
35 | root, err := doc.DocumentElement()
36 | if err != nil {
37 | log.Printf("Failed to fetch document element: %s", err)
38 | return
39 | }
40 |
41 | ctx, err := xpath.NewContext(root)
42 | if err != nil {
43 | log.Printf("Failed to create xpath context: %s", err)
44 | return
45 | }
46 | defer ctx.Free()
47 |
48 | ctx.RegisterNS("atom", "http://www.w3.org/2005/Atom")
49 | title := xpath.String(ctx.Find("/atom:feed/atom:title/text()"))
50 | log.Printf("feed title = %s", title)
51 | }
52 |
53 | //nolint:testableexamples
54 | func ExampleHTML() {
55 | //nolint:noctx
56 | res, err := http.Get("http://golang.org")
57 | if err != nil {
58 | panic("failed to get golang.org: " + err.Error())
59 | }
60 | defer res.Body.Close()
61 |
62 | doc, err := libxml2.ParseHTMLReader(res.Body)
63 | if err != nil {
64 | panic("failed to parse HTML: " + err.Error())
65 | }
66 | defer doc.Free()
67 |
68 | doc.Walk(func(n types.Node) error {
69 | log.Println(n.NodeName())
70 | return nil
71 | })
72 |
73 | nodes := xpath.NodeList(doc.Find(`//div[@id="menu"]/a`))
74 | for i := 0; i < len(nodes); i++ {
75 | log.Printf("Found node: %s", nodes[i].NodeName())
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
1 | package libxml2
2 |
3 | import (
4 | "io"
5 |
6 | "github.com/lestrrat-go/libxml2/parser"
7 | "github.com/lestrrat-go/libxml2/types"
8 | )
9 |
10 | // Parse parses the given buffer and returns a Document.
11 | func Parse(buf []byte, o ...parser.Option) (types.Document, error) {
12 | p := parser.New(o...)
13 | return p.Parse(buf)
14 | }
15 |
16 | // ParseString parses the given string and returns a Document.
17 | func ParseString(s string, o ...parser.Option) (types.Document, error) {
18 | p := parser.New(o...)
19 | return p.ParseString(s)
20 | }
21 |
22 | // ParseReader parses XML from the given io.Reader and returns a Document.
23 | func ParseReader(rdr io.Reader, o ...parser.Option) (types.Document, error) {
24 | p := parser.New(o...)
25 | return p.ParseReader(rdr)
26 | }
27 |
--------------------------------------------------------------------------------
/parser/interface.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import "errors"
4 |
5 | var (
6 | // ErrMalformedXML is returned when the XML source is malformed
7 | ErrMalformedXML = errors.New("malformed XML")
8 | )
9 |
10 | // HTMLOption represents the HTML parser options that
11 | // can be used when parsing HTML
12 | type HTMLOption int
13 |
14 | const (
15 | // HTMLParseRecover enables relaxed parsing
16 | HTMLParseRecover HTMLOption = 1 << 0
17 | // HTMLParseNoDefDTD disables using a default doctype when absent
18 | HTMLParseNoDefDTD = 1 << 2
19 | // HTMLParseNoError suppresses error reports
20 | HTMLParseNoError = 1 << 5
21 | // HTMLParseNoWarning suppresses warning reports
22 | HTMLParseNoWarning = 1 << 6
23 | // HTMLParsePedantic enables pedantic error reporting
24 | HTMLParsePedantic = 1 << 7
25 | // HTMLParseNoBlanks removes blank nodes
26 | HTMLParseNoBlanks = 1 << 8
27 | // HTMLParseNoNet forbids network access during parsing
28 | HTMLParseNoNet = 1 << 11
29 | // HTMLParseNoImplied disables implied html/body elements
30 | HTMLParseNoImplied = 1 << 13
31 | // HTMLParseCompact enables compaction of small text nodes
32 | HTMLParseCompact = 1 << 16
33 | // HTMLParseIgnoreEnc ignores internal document encoding hints
34 | HTMLParseIgnoreEnc = 1 << 21
35 | )
36 |
37 | // DefaultHTMLOptions represents the default set of options
38 | // used in the ParseHTML* functions
39 | const DefaultHTMLOptions = HTMLParseCompact | HTMLParseNoBlanks | HTMLParseNoError | HTMLParseNoWarning
40 |
41 | // Option represents the parser option bit
42 | type Option int
43 |
44 | const (
45 | XMLParseRecover Option = 1 << iota /* recover on errors */
46 | XMLParseNoEnt /* substitute entities */
47 | XMLParseDTDLoad /* load the external subset */
48 | XMLParseDTDAttr /* default DTD attributes */
49 | XMLParseDTDValid /* validate with the DTD */
50 | XMLParseNoError /* suppress error reports */
51 | XMLParseNoWarning /* suppress warning reports */
52 | XMLParsePedantic /* pedantic error reporting */
53 | XMLParseNoBlanks /* remove blank nodes */
54 | XMLParseSAX1 /* use the SAX1 interface internally */
55 | XMLParseXInclude /* Implement XInclude substitution */
56 | XMLParseNoNet /* Forbid network access */
57 | XMLParseNoDict /* Do not reuse the context dictionary */
58 | XMLParseNsclean /* remove redundant namespaces declarations */
59 | XMLParseNoCDATA /* merge CDATA as text nodes */
60 | XMLParseNoXIncNode /* do not generate XINCLUDE START/END nodes */
61 | XMLParseCompact /* compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree) */
62 | XMLParseOld10 /* parse using XML-1.0 before update 5 */
63 | XMLParseNoBaseFix /* do not fixup XINCLUDE xml:base uris */
64 | XMLParseHuge /* relax any hardcoded limit from the parser */
65 | XMLParseOldSAX /* parse using SAX2 interface before 2.7.0 */
66 | XMLParseIgnoreEnc /* ignore internal document encoding hint */
67 | XMLParseBigLines /* Store big lines numbers in text PSVI field */
68 | XMLParseMax
69 | XMLParseEmptyOption Option = 0
70 | )
71 |
72 | // Ctxt represents the Parser context. You normally should be using
73 | // Parser, but if you for some reason need to do more low-level
74 | // magic you will have to tinker with this struct
75 | type Ctxt struct {
76 | ptr uintptr // *C.xmlParserCtxt
77 | }
78 |
79 | // Parser represents the high-level parser.
80 | type Parser struct {
81 | Options Option
82 | }
83 |
--------------------------------------------------------------------------------
/parser/parser.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import (
4 | "bytes"
5 | "io"
6 |
7 | "github.com/lestrrat-go/libxml2/clib"
8 | "github.com/lestrrat-go/libxml2/dom"
9 | "github.com/lestrrat-go/libxml2/types"
10 | "github.com/pkg/errors"
11 | )
12 |
13 | const _OptionName = "RecoverNoEntDTDLoadDTDAttrDTDValidNoErrorNoWarningPedanticNoBlanksSAX1XIncludeNoNetNoDictNscleanNoCDATANoXIncNodeCompactOld10NoBaseFixHugeOldSAXIgnoreEncBigLines"
14 |
15 | var _OptionMap = map[int]string{
16 | 1: _OptionName[0:7],
17 | 2: _OptionName[7:12],
18 | 4: _OptionName[12:19],
19 | 8: _OptionName[19:26],
20 | 16: _OptionName[26:34],
21 | 32: _OptionName[34:41],
22 | 64: _OptionName[41:50],
23 | 128: _OptionName[50:58],
24 | 256: _OptionName[58:66],
25 | 512: _OptionName[66:70],
26 | 1024: _OptionName[70:78],
27 | 2048: _OptionName[78:83],
28 | 4096: _OptionName[83:89],
29 | 8192: _OptionName[89:96],
30 | 16384: _OptionName[96:103],
31 | 32768: _OptionName[103:113],
32 | 65536: _OptionName[113:120],
33 | 131072: _OptionName[120:125],
34 | 262144: _OptionName[125:134],
35 | 524288: _OptionName[134:138],
36 | 1048576: _OptionName[138:144],
37 | 2097152: _OptionName[144:153],
38 | 4194304: _OptionName[153:161],
39 | }
40 |
41 | // Set flips the option bit in the given Option
42 | func (o *Option) Set(options ...Option) {
43 | v := int(*o) // current value
44 | for _, i := range options {
45 | v = v | int(i)
46 | }
47 | *o = Option(v)
48 | }
49 |
50 | // String creates a string representation of the Option
51 | func (o Option) String() string {
52 | if o == XMLParseEmptyOption {
53 | return "[]"
54 | }
55 |
56 | i := int(o)
57 | b := bytes.Buffer{}
58 | b.Write([]byte{'['})
59 | for x := 1; x < int(XMLParseMax); x = x << 1 {
60 | if (i & x) == x {
61 | v, ok := _OptionMap[x]
62 | if !ok {
63 | v = "Option(Unknown)"
64 | }
65 | b.WriteString(v)
66 | b.Write([]byte{'|'})
67 | }
68 | }
69 | x := b.Bytes()
70 | if x[len(x)-1] == '|' {
71 | x[len(x)-1] = ']'
72 | } else {
73 | x = append(x, ']')
74 | }
75 | return string(x)
76 | }
77 |
78 | // New creates a new Parser with the given options.
79 | func New(opts ...Option) *Parser {
80 | var o Option
81 |
82 | for _, opt := range opts {
83 | o = o | opt
84 | }
85 |
86 | return &Parser{
87 | Options: o,
88 | }
89 | }
90 |
91 | // Parse parses XML from the given byte buffer
92 | func (p *Parser) Parse(buf []byte) (types.Document, error) {
93 | return p.ParseString(string(buf))
94 | }
95 |
96 | // ParseString parses XML from the given string
97 | func (p *Parser) ParseString(s string) (types.Document, error) {
98 | ctx, err := NewCtxt(s, p.Options)
99 | if err != nil {
100 | return nil, errors.Wrap(err, "failed to create parse context")
101 | }
102 | defer func() { _ = ctx.Free() }()
103 |
104 | docptr, err := clib.XMLCtxtReadMemory(ctx, s, "", "", int(p.Options))
105 | if err != nil {
106 | return nil, errors.Wrap(err, "failed to create parse input")
107 | }
108 |
109 | if docptr != 0 {
110 | return dom.WrapDocument(docptr), nil
111 | }
112 | return nil, errors.New("failed to generate document pointer")
113 | }
114 |
115 | // ParseReader parses XML from the given io.Reader
116 | func (p *Parser) ParseReader(in io.Reader) (types.Document, error) {
117 | buf := &bytes.Buffer{}
118 | if _, err := buf.ReadFrom(in); err != nil {
119 | return nil, errors.Wrap(err, "failed to read from reader")
120 | }
121 |
122 | return p.ParseString(buf.String())
123 | }
124 |
125 | // NewCtxt creates a new Parser context
126 | func NewCtxt(s string, o Option) (*Ctxt, error) {
127 | ctxptr, err := clib.XMLCreateMemoryParserCtxt(s, int(o))
128 | if err != nil {
129 | return nil, errors.Wrap(err, "failed to execute XMLCreateMemoryParserCtxt")
130 | }
131 | return &Ctxt{ptr: ctxptr}, nil
132 | }
133 |
134 | // Pointer returns the underlying C struct
135 | func (ctx Ctxt) Pointer() uintptr {
136 | return ctx.ptr
137 | }
138 |
139 | // Parse starts the parsing on the Ctxt
140 | func (ctx Ctxt) Parse() error {
141 | return clib.XMLParseDocument(ctx)
142 | }
143 |
144 | // Free releases the underlying C struct
145 | func (ctx *Ctxt) Free() error {
146 | if err := clib.XMLFreeParserCtxt(ctx); err != nil {
147 | return errors.Wrap(err, "failed to free parser context")
148 | }
149 |
150 | ctx.ptr = 0
151 | return nil
152 | }
153 |
--------------------------------------------------------------------------------
/parser_test.go:
--------------------------------------------------------------------------------
1 | package libxml2
2 |
3 | import (
4 | "regexp"
5 | "testing"
6 |
7 | "github.com/lestrrat-go/libxml2/dom"
8 | "github.com/lestrrat-go/libxml2/types"
9 |
10 | "github.com/lestrrat-go/libxml2/clib"
11 | "github.com/lestrrat-go/libxml2/parser"
12 | "github.com/stretchr/testify/assert"
13 | )
14 |
15 | const stdXMLDecl = `` + "\n"
16 |
17 | var (
18 | goodWFNSStrings = []string{
19 | stdXMLDecl + `` + "\n",
20 | stdXMLDecl + `` + "\n",
21 | stdXMLDecl + `` + "\n",
22 | stdXMLDecl + `` + "\n",
23 | stdXMLDecl + `` + "\n",
24 | }
25 | goodWFStrings = []string{
26 | ``,
27 | ``,
28 | ``,
29 | `` + "\n" + ``,
30 | `` + "\n" + ``,
31 | stdXMLDecl + ` ` + "\n",
32 | stdXMLDecl + ` `,
33 | stdXMLDecl + ` `,
34 | stdXMLDecl + `&"\` + "`" + `]]>`,
35 | stdXMLDecl + `<>&"'`,
36 | stdXMLDecl + ` `,
37 | stdXMLDecl + `foo`,
38 | stdXMLDecl + `foo`,
39 | stdXMLDecl + `foo`,
40 | stdXMLDecl + ``,
41 | stdXMLDecl + `"/>`,
42 | }
43 | goodWFDTDStrings = []string{
44 | stdXMLDecl + `` + "\n" + `]>` + "\n" + `&foo;`,
45 | stdXMLDecl + `]>&foo;`,
46 | stdXMLDecl + `]>&foo;>`,
47 | stdXMLDecl + `]>&foo;>`,
48 | stdXMLDecl + `]>&foo;>`,
49 | stdXMLDecl + `]>`,
50 | stdXMLDecl + `]>`,
51 | }
52 | badWFStrings = []string{
53 | "", // totally empty document
54 | stdXMLDecl, // only XML Declaration
55 | "", // comment only is like an empty document
56 | `]>`, // no good either ...
57 | "", // single tag (tag mismatch)
58 | "foo", // trailing junk
59 | "foo", // leading junk
60 | "", // bad attribute
61 | `&", // bad char
63 | `&//0x20;`, // bad chart
64 | "r/>", // bad encoding
65 | "&foo;", // undefind entity
66 | ">", // unterminated entity
67 | stdXMLDecl + `]>`, // bad placed entity
68 | stdXMLDecl + `]>`, // even worse
69 | "", // bad comment
70 | "", // bad either... (is this conform with the spec????)
71 | }
72 | )
73 |
74 | func parseShouldSucceed(t *testing.T, opts parser.Option, inputs []string) {
75 | t.Logf("Test parsing with parser %v", opts)
76 | for _, s := range inputs {
77 | d, err := ParseString(s, opts)
78 | if !assert.NoError(t, err, "Parse should succeed") {
79 | return
80 | }
81 | d.Free()
82 | }
83 | }
84 |
85 | func parseShouldFail(t *testing.T, opts parser.Option, inputs []string) {
86 | for _, s := range inputs {
87 | d, err := ParseString(s, opts)
88 | if err == nil {
89 | d.Free()
90 | t.Errorf("Expected failure to parse '%s'", s)
91 | }
92 | }
93 | }
94 |
95 | type ParseOptionToString struct {
96 | v parser.Option
97 | e string
98 | }
99 |
100 | func TestParseOptionStringer(t *testing.T) {
101 | values := []ParseOptionToString{
102 | {
103 | v: parser.XMLParseRecover,
104 | e: "Recover",
105 | },
106 | {
107 | v: parser.XMLParseNoEnt,
108 | e: "NoEnt",
109 | },
110 | {
111 | v: parser.XMLParseDTDLoad,
112 | e: "DTDLoad",
113 | },
114 | {
115 | v: parser.XMLParseDTDAttr,
116 | e: "DTDAttr",
117 | },
118 | {
119 | v: parser.XMLParseDTDValid,
120 | e: "DTDValid",
121 | },
122 | {
123 | v: parser.XMLParseNoError,
124 | e: "NoError",
125 | },
126 | {
127 | v: parser.XMLParseNoWarning,
128 | e: "NoWarning",
129 | },
130 | {
131 | v: parser.XMLParsePedantic,
132 | e: "Pedantic",
133 | },
134 | {
135 | v: parser.XMLParseNoBlanks,
136 | e: "NoBlanks",
137 | },
138 | {
139 | v: parser.XMLParseSAX1,
140 | e: "SAX1",
141 | },
142 | {
143 | v: parser.XMLParseXInclude,
144 | e: "XInclude",
145 | },
146 | {
147 | v: parser.XMLParseNoNet,
148 | e: "NoNet",
149 | },
150 | {
151 | v: parser.XMLParseNoDict,
152 | e: "NoDict",
153 | },
154 | {
155 | v: parser.XMLParseNsclean,
156 | e: "Nsclean",
157 | },
158 | {
159 | v: parser.XMLParseNoCDATA,
160 | e: "NoCDATA",
161 | },
162 | {
163 | v: parser.XMLParseNoXIncNode,
164 | e: "NoXIncNode",
165 | },
166 | {
167 | v: parser.XMLParseCompact,
168 | e: "Compact",
169 | },
170 | {
171 | v: parser.XMLParseOld10,
172 | e: "Old10",
173 | },
174 | {
175 | v: parser.XMLParseNoBaseFix,
176 | e: "NoBaseFix",
177 | },
178 | {
179 | v: parser.XMLParseHuge,
180 | e: "Huge",
181 | },
182 | {
183 | v: parser.XMLParseOldSAX,
184 | e: "OldSAX",
185 | },
186 | {
187 | v: parser.XMLParseIgnoreEnc,
188 | e: "IgnoreEnc",
189 | },
190 | {
191 | v: parser.XMLParseBigLines,
192 | e: "BigLines",
193 | },
194 | }
195 |
196 | for _, d := range values {
197 | if d.v.String() != "["+d.e+"]" {
198 | t.Errorf("e '%s', got '%s'", d.e, d.v.String())
199 | }
200 | }
201 | }
202 |
203 | func TestParseEmpty(t *testing.T) {
204 | doc, err := ParseString(``)
205 | if err == nil {
206 | t.Errorf("Parse of empty string should fail")
207 | defer doc.Free()
208 | }
209 | }
210 |
211 | func TestParse(t *testing.T) {
212 | inputs := [][]string{
213 | goodWFStrings,
214 | goodWFNSStrings,
215 | goodWFDTDStrings,
216 | }
217 |
218 | for _, input := range inputs {
219 | parseShouldSucceed(t, 0, input)
220 | }
221 | }
222 |
223 | func TestParseBad(t *testing.T) {
224 | clib.ReportErrors(false)
225 | defer clib.ReportErrors(true)
226 |
227 | inputs := [][]string{
228 | badWFStrings,
229 | }
230 |
231 | for _, input := range inputs {
232 | parseShouldFail(t, 0, input)
233 | }
234 | }
235 |
236 | func TestParseNoBlanks(t *testing.T) {
237 | inputs := [][]string{
238 | goodWFStrings,
239 | goodWFNSStrings,
240 | goodWFDTDStrings,
241 | }
242 | for _, input := range inputs {
243 | parseShouldSucceed(t, parser.XMLParseNoBlanks, input)
244 | }
245 | }
246 |
247 | func TestRoundtripNoBlanks(t *testing.T) {
248 | doc, err := ParseString(` `, parser.XMLParseNoBlanks)
249 | if err != nil {
250 | t.Errorf("failed to parse string: %s", err)
251 | return
252 | }
253 |
254 | if !assert.Regexp(t, regexp.MustCompile(``), doc.Dump(false), "stringified xml should have no blanks") {
255 | return
256 | }
257 | }
258 |
259 | func TestOptionsShouldCombine(t *testing.T) {
260 | opts := map[parser.Option][]parser.Option{
261 | parser.Option(64): {parser.XMLParseNoWarning},
262 | parser.Option(96): {parser.XMLParseNoWarning, parser.XMLParseNoError},
263 | }
264 |
265 | for expected, options := range opts {
266 | p := parser.New(options...)
267 | assert.Equal(t, expected, p.Options)
268 | }
269 | }
270 |
271 | func TestGHIssue23(t *testing.T) {
272 | const src = `
273 |
274 | Hello
275 | Goodbye!
276 | `
277 |
278 | doc, err := ParseString(src, parser.XMLParseRecover, parser.XMLParseNoWarning, parser.XMLParseNoError)
279 | if !assert.NoError(t, err, "should pass") {
280 | return
281 | }
282 | doc.Free()
283 | }
284 |
285 | func TestCommentWrapNodeIssue(t *testing.T) {
286 | // should wrap comment node
287 | const testHTML = ""
288 |
289 | doc, err := ParseHTMLString(testHTML, parser.HTMLParseRecover)
290 | if err != nil {
291 | t.Fatalf("Got error when parsing HTML: %v", err)
292 | }
293 |
294 | bodyRes, err := doc.Find("//body")
295 | if err != nil {
296 | t.Fatalf("Got error when grabbing body: %v", err)
297 | }
298 |
299 | bodyChildren, err := bodyRes.NodeList().First().ChildNodes()
300 | if err != nil {
301 | t.Fatalf("Got error when grabbing body's children: %v", err)
302 | }
303 |
304 | if str := bodyChildren.String(); str != testHTML {
305 | t.Fatalf("HTML did not convert back correctly, expected: %v, got: %v.", testHTML, str)
306 | }
307 | }
308 |
309 | func TestPiWrapNodeIssue(t *testing.T) {
310 | // should wrap Pi node
311 | const textXML = "\ntest \n"
312 | doc, err := ParseString(textXML)
313 | if err != nil {
314 | t.Fatalf("Got error when parsing xml: %v", err)
315 | }
316 |
317 | nodes, err := doc.ChildNodes()
318 | if err != nil {
319 | t.Fatalf("Got error when getting childnodes: %v", err)
320 | }
321 |
322 | for _, node := range nodes {
323 | if node.HasChildNodes() {
324 | if _, err := node.ChildNodes(); err != nil {
325 | t.Fatalf("Got error when getting childnodes of childnodes: %v", err)
326 | }
327 | }
328 | }
329 |
330 | if str := doc.String(); str != textXML {
331 | t.Fatalf("XML did not convert back correctly, expected: %v, got: %v", textXML, str)
332 | }
333 | }
334 |
335 | func TestGetNonexistentAttributeReturnsRecoverableError(t *testing.T) {
336 | const src = ``
337 | doc, err := ParseString(src)
338 | if !assert.NoError(t, err, "Should parse") {
339 | return
340 | }
341 | defer doc.Free()
342 |
343 | rootNode, err := doc.DocumentElement()
344 | if !assert.NoError(t, err, "Should find root element") {
345 | return
346 | }
347 |
348 | el, ok := rootNode.(types.Element)
349 | if !ok {
350 | t.Fatalf("Root node was not an element")
351 | }
352 |
353 | _, err = el.GetAttribute("non-existent")
354 | if err != dom.ErrAttributeNotFound {
355 | t.Fatalf("GetAttribute() error not comparable to existing library")
356 | }
357 | }
358 |
--------------------------------------------------------------------------------
/test/euc-jp.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lestrrat-go/libxml2/c934e3fcb9d356b6842830761f72b4802d40c60a/test/euc-jp.xml
--------------------------------------------------------------------------------
/test/go_libxml2_local.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 | 4cfcf843c66979eb1df2bd0c52817edb753a52ba
8 |
9 |
10 | this is a test string only.
11 | be218408a748759fb98363593b8f544eb054171bced856ca98bd972823dec0b07b205453fc3c46f23c934d0959f1e05b609c011b6ada84a7050ad7c910b24bf1
12 |
13 |
14 | foobar
15 | f7b34871a562283ee92bbda00485eb45
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/test/go_libxml2_remote.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 | 4cfcf843c66979eb1df2bd0c52817edb753a52ba
8 |
9 |
10 | this is a test string only.
11 | be218408a748759fb98363593b8f544eb054171bced856ca98bd972823dec0b07b205453fc3c46f23c934d0959f1e05b609c011b6ada84a7050ad7c910b24bf1
12 |
13 |
14 | foobar
15 | f7b34871a562283ee92bbda00485eb45
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/test/link/test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/lestrrat-go/libxml2"
5 | )
6 |
7 | func main() {
8 | doc, err := libxml2.ParseHTMLString(`Hello, World!
Lorem Ipsum
`)
9 | if err != nil {
10 | panic(err)
11 | }
12 | doc.Free()
13 | }
14 |
--------------------------------------------------------------------------------
/test/schema/lib/types/cksum.xsd:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/test/schema/lib/types/net.xsd:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
--------------------------------------------------------------------------------
/test/schema/lib/types/std.xsd:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/test/schema/lib/types/unix.xsd:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
--------------------------------------------------------------------------------
/test/schema/projects/go_libxml2_local.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/test/schema/projects/go_libxml2_remote.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/test/sjis.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lestrrat-go/libxml2/c934e3fcb9d356b6842830761f72b4802d40c60a/test/sjis.xml
--------------------------------------------------------------------------------
/test/utf-8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | はろー、わーるど!
4 |
5 |
--------------------------------------------------------------------------------
/test/xmldsig-core-schema.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 |
10 | ]>
11 |
12 |
27 |
28 |
29 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
216 |
217 |
218 |
219 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
--------------------------------------------------------------------------------
/types/interface.go:
--------------------------------------------------------------------------------
1 | package types
2 |
3 | import "github.com/lestrrat-go/libxml2/clib"
4 |
5 | // PtrSource defines the interface for things that is backed by
6 | // a C backend
7 | type PtrSource interface {
8 | // Pointer returns the underlying C pointer. This is an exported
9 | // method to allow various internal go-libxml2 packages to interoperate
10 | // on each other. End users are STRONGLY advised not to touch this
11 | // method or its return values
12 | Pointer() uintptr
13 |
14 | // Free releases the underlying resources
15 | Free()
16 | }
17 |
18 | // XPathExpression defines the interface for XPath expression
19 | type XPathExpression interface {
20 | PtrSource
21 | }
22 |
23 | // XPathResult defines the interface for result of calling Find().
24 | type XPathResult interface {
25 | Bool() bool
26 | Free()
27 | NodeList() NodeList
28 | NodeIter() NodeIter
29 | Number() float64
30 | String() string
31 | Type() clib.XPathObjectType
32 | }
33 |
34 | // Document defines the interface for XML document
35 | type Document interface {
36 | Node
37 | CreateElement(string) (Element, error)
38 | CreateElementNS(string, string) (Element, error)
39 | DocumentElement() (Node, error)
40 | Dump(bool) string
41 | Encoding() string
42 | }
43 |
44 | // Attribute defines the interface for XML attribute
45 | type Attribute interface {
46 | Node
47 | Value() string
48 | }
49 |
50 | // Element defines the interface for XML element
51 | //
52 | //nolint:interfacebloat
53 | type Element interface {
54 | Node
55 | AppendText(string) error
56 | Attributes() ([]Attribute, error)
57 | GetAttribute(string) (Attribute, error)
58 | GetNamespaces() ([]Namespace, error)
59 | LocalName() string
60 | NamespaceURI() string
61 | Prefix() string
62 | RemoveAttribute(string) error
63 | SetAttribute(string, string) error
64 | SetNamespace(string, string, ...bool) error
65 | }
66 |
67 | // Namespace defines the interface for XML namespace
68 | type Namespace interface {
69 | Node
70 | Prefix() string
71 | URI() string
72 | }
73 |
74 | // Node defines the basic DOM interface
75 | //
76 | //nolint:interfacebloat
77 | type Node interface {
78 | PtrSource
79 |
80 | ParseInContext(string, int) (Node, error)
81 |
82 | AddChild(Node) error
83 | ChildNodes() (NodeList, error)
84 | Copy() (Node, error)
85 | OwnerDocument() (Document, error)
86 | Find(string) (XPathResult, error)
87 | FirstChild() (Node, error)
88 | HasChildNodes() bool
89 | IsSameNode(Node) bool
90 | LastChild() (Node, error)
91 | // Literal is almost the same as String(), except for things like Element
92 | // and Attribute nodes. String() will return the XML stringification of
93 | // these, but Literal() will return the "value" associated with them.
94 | Literal() (string, error)
95 | LookupNamespacePrefix(string) (string, error)
96 | LookupNamespaceURI(string) (string, error)
97 | NextSibling() (Node, error)
98 | NodeName() string
99 | NodeType() clib.XMLNodeType
100 | NodeValue() string
101 | ParentNode() (Node, error)
102 | PreviousSibling() (Node, error)
103 | RemoveChild(Node) error
104 | SetDocument(d Document) error
105 | SetNodeName(string)
106 | SetNodeValue(string)
107 | String() string
108 | TextContent() string
109 | ToString(int, bool) string
110 | Walk(func(Node) error) error
111 |
112 | MakeMortal()
113 | MakePersistent()
114 | AutoFree()
115 | }
116 |
117 | type NodeIter interface {
118 | Next() bool
119 | Node() Node
120 | }
121 |
122 | // NodeList is a set of Nodes
123 | type NodeList []Node
124 |
--------------------------------------------------------------------------------
/types/types.go:
--------------------------------------------------------------------------------
1 | /*
2 | Package types exist to provide with common types that are used
3 | through out in go-libxml2. This package contains mainly interfaces
4 | to things that are implemented else. It is in its own package
5 | so that any package can refer to these interfaces without introducing
6 | circular dependecy
7 | */
8 | package types
9 |
10 | import "bytes"
11 |
12 | // String returns the string representation of the NodeList
13 | func (n NodeList) String() string {
14 | buf := bytes.Buffer{}
15 | for _, x := range n {
16 | buf.WriteString(x.String())
17 | }
18 | return buf.String()
19 | }
20 |
21 | // NodeValue returns the concatenation of NodeValue within the nodes in NodeList
22 | func (n NodeList) NodeValue() string {
23 | buf := bytes.Buffer{}
24 | for _, x := range n {
25 | buf.WriteString(x.NodeValue())
26 | }
27 | return buf.String()
28 | }
29 |
30 | // Literal returns the string representation of the NodeList (using Literal())
31 | func (n NodeList) Literal() (string, error) {
32 | buf := bytes.Buffer{}
33 | for _, x := range n {
34 | l, err := x.Literal()
35 | if err != nil {
36 | return "", err
37 | }
38 | buf.WriteString(l)
39 | }
40 | return buf.String(), nil
41 | }
42 |
43 | // First returns the first node in the list, or nil otherwise.
44 | func (n NodeList) First() Node {
45 | if n == nil {
46 | return nil
47 | }
48 |
49 | if len(n) > 0 {
50 | return n[0]
51 | }
52 |
53 | return nil
54 | }
55 |
--------------------------------------------------------------------------------
/xml_test.go:
--------------------------------------------------------------------------------
1 | package libxml2
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "testing"
7 |
8 | "github.com/lestrrat-go/libxml2/dom"
9 | "github.com/lestrrat-go/libxml2/parser"
10 | "github.com/lestrrat-go/libxml2/types"
11 | "github.com/lestrrat-go/libxml2/xpath"
12 | "github.com/stretchr/testify/assert"
13 | )
14 |
15 | func TestEncoding(t *testing.T) {
16 | for _, enc := range []string{`utf-8`, `sjis`, `euc-jp`} {
17 | fn := fmt.Sprintf(`test/%s.xml`, enc)
18 | f, err := os.Open(fn)
19 | if err != nil {
20 | t.Errorf("Failed to open %s: %s", fn, err)
21 | return
22 | }
23 | defer f.Close()
24 |
25 | p := parser.New()
26 | doc, err := p.ParseReader(f)
27 | if err != nil {
28 | t.Errorf("Failed to parse %s: %s", fn, err)
29 | return
30 | }
31 |
32 | if doc.Encoding() != enc {
33 | t.Errorf("Expected encoding %s, got %s", enc, doc.Encoding())
34 | return
35 | }
36 | }
37 | }
38 |
39 | func TestNamespacedReconciliation(t *testing.T) {
40 | d := dom.CreateDocument()
41 | root, err := d.CreateElement("foo")
42 | if !assert.NoError(t, err, "failed to create document") {
43 | return
44 | }
45 | d.SetDocumentElement(root)
46 | if !assert.NoError(t, root.SetNamespace("http://default", "root"), "SetNamespace should succeed") {
47 | return
48 | }
49 |
50 | if !assert.NoError(t, root.SetNamespace("http://children", "child", false), "SetNamespace (no-activate) should succeed") {
51 | return
52 | }
53 |
54 | n, err := d.CreateElementNS("http://default", "branch")
55 | if !assert.NoError(t, err, "CreateElementNS should succeed") {
56 | return
57 | }
58 | root.AddChild(n)
59 |
60 | _, err = n.GetAttribute("xmlns")
61 | if !assert.Error(t, err, "GetAttribute should fail with not found") ||
62 | !assert.Equal(t, "attribute not found", err.Error(), "error matches") {
63 | return
64 | }
65 |
66 | var c types.Element
67 | for _, name := range []string{"a", "b", "c"} {
68 | child, err := d.CreateElementNS("http://children", "child:"+name)
69 | if !assert.NoError(t, err, "CreateElementNS should succeed") {
70 | return
71 | }
72 | if name == "c" {
73 | c = child
74 | }
75 | n.AddChild(child)
76 | _, err = n.GetAttribute("xmlns:child")
77 | if !assert.Error(t, err, "GetAttribute should fail with not found") ||
78 | !assert.Equal(t, "attribute not found", err.Error(), "error matches") {
79 | return
80 | }
81 | }
82 |
83 | if !assert.NoError(t, c.SetAttribute("xmlns:foo", "http://children"), "SetAttribute should succeed") {
84 | return
85 | }
86 |
87 | attr, err := c.GetAttribute("xmlns:foo")
88 | if !assert.NoError(t, err, "xmlns:foo should exist") {
89 | return
90 | }
91 | if !assert.Equal(t, "http://children", attr.Value(), "attribute matches") {
92 | return
93 | }
94 |
95 | child, err := d.CreateElementNS("http://other", "branch")
96 | if !assert.NoError(t, err, "creating element with default namespace") {
97 | return
98 | }
99 | n.AddChild(child)
100 |
101 | // XXX This still fails
102 | /*
103 | attr, err = child.GetAttribute("xmlns")
104 | if !assert.NoError(t, err, "GetAttribute should succeed") {
105 | return
106 | }
107 | if !assert.Equal(t, "http://other", attr.Value(), "attribute matches") {
108 | return
109 | }
110 | */
111 |
112 | t.Logf("%s", d.String())
113 | }
114 |
115 | func TestRegressionGH7(t *testing.T) {
116 | doc, err := ParseHTMLString(`
117 |
118 |
119 |
120 |
122 | 1234
123 |
124 |
125 | `)
126 |
127 | if !assert.NoError(t, err, "ParseHTMLString should succeed") {
128 | return
129 | }
130 |
131 | nodes := xpath.NodeList(doc.Find(`./body/div`))
132 | if !assert.NotEmpty(t, nodes, "Find should succeed") {
133 | return
134 | }
135 |
136 | v, err := nodes.Literal()
137 | if !assert.NoError(t, err, "Literal() should succeed") {
138 | return
139 | }
140 | if !assert.NotEmpty(t, v, "Literal() should return some string") {
141 | return
142 | }
143 | t.Logf("v = '%s'", v)
144 | }
145 |
146 | func TestGHIssue43(t *testing.T) {
147 | d := dom.CreateDocument()
148 | r, _ := d.CreateElement("root")
149 | r.SetNamespace("http://some.uri", "pfx", true)
150 | d.SetDocumentElement(r)
151 | e, _ := d.CreateElement("elem")
152 | e.SetNamespace("http://other.uri", "", true)
153 | r.AddChild(e)
154 | s := d.ToString(1, true)
155 |
156 | if !assert.Contains(t, s, `= n.nlen {
28 | return false
29 | }
30 |
31 | n.cur++
32 | node, err := WrapNodeFunc(n.nodes[n.cur])
33 | if err != nil {
34 | n.nlen = 0
35 | return false
36 | }
37 | n.curnode = node
38 | return true
39 | }
40 |
41 | func (n *NodeIterator) Node() types.Node {
42 | return n.curnode
43 | }
44 |
--------------------------------------------------------------------------------
/xpath/util.go:
--------------------------------------------------------------------------------
1 | package xpath
2 |
3 | import "github.com/lestrrat-go/libxml2/types"
4 |
5 | // String returns the string component of the result, and as a side effect
6 | // releases the Result by calling Free() on it. Use this if you do not
7 | // really care about the error value from Find()
8 | func String(r types.XPathResult, err error) string {
9 | if err != nil {
10 | return ""
11 | }
12 |
13 | defer r.Free()
14 | return r.String()
15 | }
16 |
17 | // Bool returns the boolean component of the result, and as a side effect
18 | // releases the Result by calling Free() on it. Use this if you do not
19 | // really care about the error value from Find()
20 | func Bool(r types.XPathResult, err error) bool {
21 | if err != nil {
22 | return false
23 | }
24 |
25 | defer r.Free()
26 | return r.Bool()
27 | }
28 |
29 | // Number returns the numeric component of the result, and as a side effect
30 | // releases the Result by calling Free() on it. Use this if you do not
31 | // really care about the error value from Find()
32 | func Number(r types.XPathResult, err error) float64 {
33 | if err != nil {
34 | return 0
35 | }
36 |
37 | defer r.Free()
38 | return r.Number()
39 | }
40 |
41 | // NodeList returns the nodes associated with this result, and as a side effect
42 | // releases the result by calling Free() on it. Use this if you do not
43 | // really care about the error value from Find().
44 | func NodeList(r types.XPathResult, err error) types.NodeList {
45 | if err != nil {
46 | return nil
47 | }
48 |
49 | defer r.Free()
50 | return r.NodeList()
51 | }
52 |
53 | // NodeIter returns an iterator that will return the nodes assocaied with
54 | // this reult, and as a side effect releases the result by calling Free()
55 | // on it. Use this if you do not really care about the error value from Find().
56 | func NodeIter(r types.XPathResult, err error) types.NodeIter {
57 | if err != nil {
58 | return NewNodeIterator(nil)
59 | }
60 |
61 | defer r.Free()
62 | return r.NodeIter()
63 | }
64 |
--------------------------------------------------------------------------------
/xpath/xpath.go:
--------------------------------------------------------------------------------
1 | /*
2 | Package xpath contains tools to handle XPath evaluation.
3 |
4 | Because of a very quirky dependency between this package and the
5 | github.com/lestrrat/libxml2/dom package, you MUST import both
6 | packages to properly use it.
7 |
8 | import (
9 | "github.com/lestrrat-go/libxml2/dom"
10 | "github.com/lestrrat-go/libxml2/xpath"
11 | )
12 |
13 | Or, if you have no use for dom package in your program, and you
14 | don't want to use the magical "_" import, you can do the initialization
15 | yourself just to appease the compiler:
16 |
17 | func init() {
18 | dom.SetupXPathCallback()
19 | }
20 | */
21 | package xpath
22 |
23 | import (
24 | "fmt"
25 |
26 | "github.com/lestrrat-go/libxml2/clib"
27 | "github.com/lestrrat-go/libxml2/types"
28 | "github.com/pkg/errors"
29 | )
30 |
31 | // Pointer returns the underlying C struct
32 | func (x Object) Pointer() uintptr {
33 | return x.ptr
34 | }
35 |
36 | // Type returns the clib.XPathObjectType
37 | func (x Object) Type() clib.XPathObjectType {
38 | return clib.XMLXPathObjectType(x)
39 | }
40 |
41 | // Number returns the floatval component of the Object as float64
42 | func (x Object) Number() float64 {
43 | return clib.XMLXPathObjectFloat64(x)
44 | }
45 |
46 | // Bool returns the boolval component of the Object
47 | func (x Object) Bool() bool {
48 | return clib.XMLXPathObjectBool(x)
49 | }
50 |
51 | // WrapNodeFunc is a function that gets called when Object.NodeList()
52 | // is called. This is necessary because during the call to NodeList(),
53 | // the underlying C pointers are materialized to objects in a different
54 | // package ("github.com/lestrrat-go/libxml2/dom"), and said package
55 | // uses this package... Yes, a circular dependency.
56 | //
57 | // Normally this means that both pacckages should live under the same
58 | // unified package, but in this case they are independent enough that
59 | // we have decided they warrant to be separated.
60 | //
61 | // So this WrapNodeFunc is our workaround for this problem: when
62 | // github.com/lestrrat-go/libxml2/dom is loaded, it automatically
63 | // initializes this function to an appropriate function on the fly.
64 | var WrapNodeFunc func(uintptr) (types.Node, error)
65 |
66 | // NodeList returns the list of nodes included in this Object
67 | func (x Object) NodeList() types.NodeList {
68 | if WrapNodeFunc == nil {
69 | panic("WarapNodeFunc not initialized. read XXX for details")
70 | }
71 |
72 | nl, err := clib.XMLXPathObjectNodeList(x)
73 | if err != nil {
74 | return nil
75 | }
76 |
77 | ret := make([]types.Node, len(nl))
78 | for i, p := range nl {
79 | n, err := WrapNodeFunc(p)
80 | if err != nil {
81 | return nil
82 | }
83 | ret[i] = n
84 | }
85 | return ret
86 | }
87 |
88 | func (x Object) NodeIter() types.NodeIter {
89 | nl, err := clib.XMLXPathObjectNodeList(x)
90 | if err != nil {
91 | return NewNodeIterator(nil)
92 | }
93 | return NewNodeIterator(nl)
94 | }
95 |
96 | // String returns the stringified value of the nodes included in
97 | // this Object. If the Object is anything other than a
98 | // NodeSet, then we fallback to using fmt.Sprintf to generate
99 | // some sort of readable output
100 | func (x Object) String() string {
101 | switch x.Type() {
102 | case NodeSetType:
103 | nl := x.NodeList()
104 | if nl == nil {
105 | return ""
106 | }
107 | if x.ForceLiteral {
108 | s, err := nl.Literal()
109 | if err == nil {
110 | return s
111 | }
112 | return ""
113 | }
114 | return nl.NodeValue()
115 | default:
116 | return fmt.Sprintf("%T", x)
117 | }
118 | }
119 |
120 | // Free releases the underlying C structs
121 | func (x *Object) Free() {
122 | clib.XMLXPathFreeObject(x)
123 | }
124 |
125 | // NewExpression compiles the given XPath expression string
126 | func NewExpression(s string) (*Expression, error) {
127 | ptr, err := clib.XMLXPathCompile(s)
128 | if err != nil {
129 | return nil, errors.Wrap(err, "failed to compile expression")
130 | }
131 |
132 | return &Expression{ptr: ptr, expr: s}, nil
133 | }
134 |
135 | // Pointer returns the underlying C struct
136 | func (x *Expression) Pointer() uintptr {
137 | return x.ptr
138 | }
139 |
140 | // String returns the expression as it was given to NewExpression
141 | func (x Expression) String() string {
142 | return x.expr
143 | }
144 |
145 | // Free releases the underlying C structs in the Expression
146 | func (x *Expression) Free() {
147 | _ = clib.XMLXPathFreeCompExpr(x)
148 | }
149 |
150 | // NewContext creates a new Context, optionally providing
151 | // with a context node.
152 | //
153 | // Note that although we are specifying `n... Node` for the argument,
154 | // only the first, node is considered for the context node
155 | func NewContext(n ...types.Node) (*Context, error) {
156 | var node types.Node
157 | if len(n) > 0 {
158 | node = n[0]
159 | }
160 |
161 | ctxptr, err := clib.XMLXPathNewContext(node)
162 | if err != nil {
163 | return nil, errors.Wrap(err, "failed to get valid xpath context")
164 | }
165 |
166 | return &Context{ptr: ctxptr}, nil
167 | }
168 |
169 | // Pointer returns a pointer to the underlying C struct
170 | func (x *Context) Pointer() uintptr {
171 | return x.ptr
172 | }
173 |
174 | // SetContextNode sets or resets the context node which
175 | // XPath expressions will be evaluated against.
176 | func (x *Context) SetContextNode(n types.Node) error {
177 | return clib.XMLXPathContextSetContextNode(x, n)
178 | }
179 |
180 | // Exists compiles and evaluates the xpath expression, and returns
181 | // true if a corresponding node exists
182 | func (x *Context) Exists(xpath string) bool {
183 | list := NodeList(x.Find(xpath))
184 | if list == nil {
185 | return false
186 | }
187 |
188 | return len(list) > 0
189 | }
190 |
191 | // Free releases the underlying C structs in the XPath
192 | func (x *Context) Free() {
193 | _ = clib.XMLXPathFreeContext(x)
194 | }
195 |
196 | // Find evaluates the expression s against the nodes registered
197 | // in x. It returns the resulting data evaluated to an Result.
198 | //
199 | // You MUST call Free() on the Result, or you will leak memory
200 | // If you don't really care for errors and just want to grab the
201 | // value of Result, checkout xpath.String(), xpath.Number(), xpath.Bool()
202 | // et al.
203 | func (x *Context) Find(s string) (types.XPathResult, error) {
204 | expr, err := NewExpression(s)
205 | if err != nil {
206 | return nil, errors.Wrap(err, "failed to compile expression")
207 | }
208 | defer expr.Free()
209 |
210 | return x.FindExpr(expr)
211 | }
212 |
213 | // FindExpr evaluates the given XPath expression and returns an Object.
214 | // You must call `Free()` on this returned object
215 | //
216 | // You MUST call Free() on the Result, or you will leak memory
217 | func (x *Context) FindExpr(expr types.XPathExpression) (types.XPathResult, error) {
218 | res, err := clib.XMLEvalXPath(x, expr)
219 | if err != nil {
220 | return nil, errors.Wrap(err, "failed to evaluate expression")
221 | }
222 |
223 | return &Object{ptr: res}, nil
224 | }
225 |
226 | // LookupNamespaceURI looksup the namespace URI associated with prefix
227 | func (x *Context) LookupNamespaceURI(prefix string) (string, error) {
228 | return clib.XMLXPathNSLookup(x, prefix)
229 | }
230 |
231 | // RegisterNS registers a namespace so it can be used in an Expression
232 | func (x *Context) RegisterNS(name, nsuri string) error {
233 | return clib.XMLXPathRegisterNS(x, name, nsuri)
234 | }
235 |
--------------------------------------------------------------------------------
/xpath/xpath_test.go:
--------------------------------------------------------------------------------
1 | package xpath_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/lestrrat-go/libxml2"
7 | "github.com/lestrrat-go/libxml2/xpath"
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestXPathContext(t *testing.T) {
12 | doc, err := libxml2.ParseString(``)
13 | if err != nil {
14 | t.Errorf("Failed to parse string: %s", err)
15 | }
16 | defer doc.Free()
17 |
18 | root, err := doc.DocumentElement()
19 | if !assert.NoError(t, err, "DocumentElement should succeed") {
20 | return
21 | }
22 |
23 | ctx, err := xpath.NewContext(root)
24 | if err != nil {
25 | t.Errorf("Failed to initialize XPathContext: %s", err)
26 | return
27 | }
28 | defer ctx.Free()
29 |
30 | // Use a string
31 | exprString := `/*`
32 | nodes := xpath.NodeList(ctx.Find(exprString))
33 | if len(nodes) != 1 {
34 | t.Errorf("Expected 1 nodes, got %d", len(nodes))
35 | return
36 | }
37 |
38 | iter := xpath.NodeIter(ctx.Find(exprString))
39 | {
40 | count := 0
41 | for iter.Next() {
42 | iter.Node()
43 | count++
44 | }
45 | if !assert.Equal(t, 1, count, "got 1 nodes from iterator") {
46 | return
47 | }
48 | }
49 |
50 | // Use an explicitly compiled expression
51 | expr, err := xpath.NewExpression(exprString)
52 | if err != nil {
53 | t.Errorf("Failed to compile xpath: %s", err)
54 | return
55 | }
56 | defer expr.Free()
57 |
58 | nodes = xpath.NodeList(ctx.FindExpr(expr))
59 | if len(nodes) != 1 {
60 | t.Errorf("Expected 1 nodes, got %d", len(nodes))
61 | return
62 | }
63 | }
64 |
65 | func TestXPathContextExpression_Number(t *testing.T) {
66 | ctx, err := xpath.NewContext()
67 | if err != nil {
68 | t.Errorf("Failed to initialize XPathContext: %s", err)
69 | return
70 | }
71 | defer ctx.Free()
72 |
73 | if !assert.Equal(t, float64(2), xpath.Number(ctx.Find("1+1")), "XPath evaluates to 2") {
74 | return
75 | }
76 | if !assert.Equal(t, float64(0), xpath.Number(ctx.Find("1<>1")), "XPath evaluates to 0") {
77 | return
78 | }
79 | }
80 |
81 | func TestXPathContextExpression_Boolean(t *testing.T) {
82 | ctx, err := xpath.NewContext()
83 | if err != nil {
84 | t.Errorf("Failed to initialize XPathContext: %s", err)
85 | return
86 | }
87 | defer ctx.Free()
88 |
89 | if !assert.True(t, xpath.Bool(ctx.Find("1=1")), "XPath evaluates to true") {
90 | return
91 | }
92 | if !assert.False(t, xpath.Bool(ctx.Find("1<>1")), "XPath evaluates to false") {
93 | return
94 | }
95 | }
96 |
97 | func TestXPathContextExpression_NodeList(t *testing.T) {
98 | doc, err := libxml2.ParseString(`bazquux`)
99 | if err != nil {
100 | t.Errorf("Failed to parse string: %s", err)
101 | }
102 | defer doc.Free()
103 |
104 | root, err := doc.DocumentElement()
105 | if !assert.NoError(t, err, "DocumentElement should succeed") {
106 | return
107 | }
108 |
109 | ctx, err := xpath.NewContext(root)
110 | if err != nil {
111 | t.Errorf("Failed to initialize XPathContext: %s", err)
112 | return
113 | }
114 | defer ctx.Free()
115 |
116 | if !assert.Len(t, xpath.NodeList(ctx.Find("/foo/bar")), 2, "XPath evaluates to 2 nodes") {
117 | return
118 | }
119 |
120 | if !assert.Len(t, xpath.NodeList(ctx.Find("/foo/bar[bogus")), 0, "XPath evaluates to 0 nodes") {
121 | return
122 | }
123 |
124 | if !assert.Equal(t, "bazquux", xpath.String(ctx.Find("/foo/bar")), "XPath evaluates to 'bazquux'") {
125 | return
126 | }
127 |
128 | if !assert.Equal(t, "", xpath.String(ctx.Find("/[bogus")), "XPath evaluates to ''") {
129 | return
130 | }
131 | }
132 |
133 | func TestXPathContextExpression_Namespaces(t *testing.T) {
134 | doc, err := libxml2.ParseString(``)
135 | if err != nil {
136 | t.Errorf("Failed to parse string: %s", err)
137 | }
138 | defer doc.Free()
139 |
140 | root, err := doc.DocumentElement()
141 | if !assert.NoError(t, err, "DocumentElement() should succeed") {
142 | return
143 | }
144 |
145 | ctx, err := xpath.NewContext(root)
146 | if err != nil {
147 | t.Errorf("Failed to initialize XPathContext: %s", err)
148 | return
149 | }
150 | defer ctx.Free()
151 |
152 | prefix := `xxx`
153 | nsuri := `http://example.com/foobar`
154 | if err := ctx.RegisterNS(prefix, nsuri); err != nil {
155 | t.Errorf("Failed to register namespace: %s", err)
156 | return
157 | }
158 |
159 | nodes := xpath.NodeList(ctx.Find(`/xxx:foo`))
160 | if len(nodes) != 1 {
161 | t.Errorf(`Expected 1 node, got %d`, len(nodes))
162 | return
163 | }
164 | if nodes[0].NodeName() != "foo" {
165 | t.Errorf(`Expected NodeName() "foo", got "%s"`, nodes[0].NodeName())
166 | return
167 | }
168 |
169 | gotns, err := ctx.LookupNamespaceURI(prefix)
170 | if err != nil {
171 | t.Errorf(`LookupNamespaceURI failed: %s`, err)
172 | return
173 | }
174 |
175 | if gotns != nsuri {
176 | t.Errorf(`Expected LookupNamespaceURI("%s") "%s", got "%s"`, prefix, nsuri, gotns)
177 | return
178 | }
179 |
180 | if !ctx.Exists(`//xxx:bar/@a`) {
181 | t.Errorf(`Expected "//xxx:bar/@a" to exist`)
182 | return
183 | }
184 | if ctx.Exists(`//xxx:bar/@b`) {
185 | t.Errorf(`Expected "//xxx:bar/@b" to NOT exist`)
186 | return
187 | }
188 | }
189 |
--------------------------------------------------------------------------------
/xsd/interface.go:
--------------------------------------------------------------------------------
1 | package xsd
2 |
3 | import "github.com/lestrrat-go/libxml2/internal/option"
4 |
5 | // Schema represents an XML schema.
6 | type Schema struct {
7 | ptr uintptr // *C.xmlSchema
8 | }
9 |
10 | // SchemaValidationError is returned when the Validate() function
11 | // finds errors. When there are multiple errors, you may access
12 | // them using the Errors() method
13 | type SchemaValidationError struct {
14 | errors []error
15 | }
16 |
17 | type Option = option.Interface
18 |
--------------------------------------------------------------------------------
/xsd/option.go:
--------------------------------------------------------------------------------
1 | package xsd
2 |
3 | import (
4 | "net/url"
5 | "os"
6 | "path/filepath"
7 |
8 | "github.com/lestrrat-go/libxml2/internal/option"
9 | )
10 |
11 | // WithPath provides a hint to the XSD parser as to where the
12 | // document being parsed is located at.
13 | //
14 | // This is useful when you must resolve relative paths inside a
15 | // document, because to use relative paths the parser needs to
16 | // know the reference location (i.e. location of the document
17 | // being parsed). In case where you are parsing using `ParseFromFile()`
18 | // this is handled automatically by the `ParseFromFile` method,
19 | // but if you are using `Parse` method this is required
20 | //
21 | // If the path is provided as a relative path, the current directory
22 | // should be obtainable via `os.Getwd` when this call is made, otherwise
23 | // path resolution may fail in weird ways.
24 | func WithPath(path string) Option {
25 | if !filepath.IsAbs(path) {
26 | if curdir, err := os.Getwd(); err == nil {
27 | path = filepath.Join(curdir, path)
28 | }
29 | }
30 |
31 | return WithURI(
32 | (&url.URL{
33 | Scheme: `file`,
34 | Path: path,
35 | }).String(),
36 | )
37 | }
38 |
39 | func WithURI(v string) Option {
40 | return option.New(option.OptKeyWithURI, v)
41 | }
42 |
--------------------------------------------------------------------------------
/xsd/xsd.go:
--------------------------------------------------------------------------------
1 | // Package xsd contains some of the tools available from libxml2
2 | // that allows you to validate your XML against an XSD
3 | //
4 | // This is basically all you need to do:
5 | //
6 | // schema, err := xsd.Parse(xsdsrc)
7 | // if err != nil {
8 | // panic(err)
9 | // }
10 | // defer schema.Free()
11 | // if err := schema.Validate(doc); err != nil{
12 | // for _, e := range err.(SchemaValidationErr).Error() {
13 | // println(e.Error())
14 | // }
15 | // }
16 | package xsd
17 |
18 | import (
19 | "github.com/lestrrat-go/libxml2/clib"
20 | "github.com/lestrrat-go/libxml2/types"
21 | "github.com/pkg/errors"
22 | )
23 |
24 | const ValueVCCreate = 1
25 |
26 | // Parse is used to parse an XML Schema Document to produce a
27 | // Schema instance. Make sure to call Free() on the instance
28 | // when you are done with it.
29 |
30 | func Parse(buf []byte, options ...Option) (*Schema, error) {
31 | // xsd.WithURI(...)
32 | sptr, err := clib.XMLSchemaParse(buf, options...)
33 | if err != nil {
34 | return nil, errors.Wrap(err, "failed to parse input")
35 | }
36 |
37 | return &Schema{ptr: sptr}, nil
38 | }
39 |
40 | // ParseFromFile is used to parse an XML schema using only the file path.
41 | // Make sure to call Free() on the instance when you are done with it.
42 | func ParseFromFile(path string) (*Schema, error) {
43 | sptr, err := clib.XMLSchemaParseFromFile(path)
44 | if err != nil {
45 | return nil, errors.Wrap(err, "failed to parse input from file")
46 | }
47 |
48 | return &Schema{ptr: sptr}, nil
49 | }
50 |
51 | // Pointer returns the underlying C struct
52 | func (s *Schema) Pointer() uintptr {
53 | return s.ptr
54 | }
55 |
56 | // Free frees the underlying C struct
57 | func (s *Schema) Free() {
58 | if err := clib.XMLSchemaFree(s); err != nil {
59 | return
60 | }
61 | s.ptr = 0
62 | }
63 |
64 | // Validate takes in a XML document and validates it against
65 | // the schema. If there are any problems, and error is
66 | // returned.
67 | func (s *Schema) Validate(d types.Document, options ...int) error {
68 | errs := clib.XMLSchemaValidateDocument(s, d, options...)
69 | if errs == nil {
70 | return nil
71 | }
72 |
73 | return SchemaValidationError{errors: errs}
74 | }
75 |
76 | // Error method fulfils the error interface
77 | func (sve SchemaValidationError) Error() string {
78 | return "schema validation failed"
79 | }
80 |
81 | // Errors returns the list of errors found
82 | func (sve SchemaValidationError) Errors() []error {
83 | return sve.errors
84 | }
85 |
--------------------------------------------------------------------------------
/xsd_test.go:
--------------------------------------------------------------------------------
1 | package libxml2_test
2 |
3 | import (
4 | "io"
5 | "net/http"
6 | "net/http/httptest"
7 | "os"
8 | "path/filepath"
9 | "testing"
10 |
11 | "github.com/lestrrat-go/libxml2"
12 | "github.com/lestrrat-go/libxml2/xsd"
13 | "github.com/stretchr/testify/assert"
14 | )
15 |
16 | func TestXSD(t *testing.T) {
17 | xsdfile := filepath.Join("test", "xmldsig-core-schema.xsd")
18 | f, err := os.Open(xsdfile)
19 | if !assert.NoError(t, err, "open schema") {
20 | return
21 | }
22 | defer f.Close()
23 |
24 | buf, err := io.ReadAll(f)
25 | if !assert.NoError(t, err, "reading from schema") {
26 | return
27 | }
28 |
29 | s, err := xsd.Parse(buf)
30 | if !assert.NoError(t, err, "parsing schema") {
31 | return
32 | }
33 | defer s.Free()
34 |
35 | func() {
36 | d, err := libxml2.ParseString(``)
37 | if !assert.NoError(t, err, "parsing XML") {
38 | return
39 | }
40 | defer d.Free()
41 |
42 | err = s.Validate(d)
43 | if !assert.Error(t, err, "s.Validate should fail") {
44 | return
45 | }
46 |
47 | serr, ok := err.(xsd.SchemaValidationError)
48 | if !assert.True(t, ok, "error is xsd.SchemaValidationErr") {
49 | return
50 | }
51 |
52 | if !assert.Len(t, serr.Errors(), 1, "there's one error") {
53 | return
54 | }
55 | for _, e := range serr.Errors() {
56 | t.Logf("err (OK): '%s'", e)
57 | }
58 | }()
59 |
60 | func() {
61 | const src = `
62 |
63 |
64 |
67 |
69 |
70 |
71 |
73 |
74 |
76 | uooqbWYa5VCqcJCbuymBKqm17vY=
77 |
78 |
79 |
80 | KedJuTob5gtvYx9qM3k3gm7kbLBwVbEQRl26S2tmXjqNND7MRGtoew==
81 |
82 |
83 |
84 |
85 |
86 | /KaCzo4Syrom78z3EQ5SbbB4sF7ey80etKII864WF64B81uRpH5t9jQTxe
87 | Eu0ImbzRMqzVDZkVG9xD7nN1kuFw==
88 |
89 | li7dzDacuo67Jg7mtqEm2TRuOMU=
90 | Z4Rxsnqc9E7pGknFFH2xqaryRPBaQ01khpMdLRQnG541Awtx/
91 | XPaF5Bpsy4pNWMOHCBiNU0NogpsQW5QvnlMpA==
92 |
93 | qV38IqrWJG0V/
94 | mZQvRVi1OHw9Zj84nDC4jO8P0axi1gb6d+475yhMjSc/
95 | BrIVC58W3ydbkK+Ri4OKbaRZlYeRA==
96 |
97 |
98 |
99 |
100 |
101 | `
102 | d, err := libxml2.ParseString(src)
103 | if !assert.NoError(t, err, "parsing XML") {
104 | return
105 | }
106 | defer d.Free()
107 |
108 | err = s.Validate(d)
109 | if !assert.NoError(t, err, "s.Validate should pass") {
110 | if serr, ok := err.(xsd.SchemaValidationError); ok {
111 | for _, e := range serr.Errors() {
112 | t.Logf("err: %s", e)
113 | }
114 | }
115 | return
116 | }
117 | }()
118 | }
119 |
120 | func TestXSDDefaultValue(t *testing.T) {
121 | const schemasrc = `
122 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 | `
143 | const docsrc = `
144 |
145 |
146 |
147 | `
148 |
149 | schema, err := xsd.Parse([]byte(schemasrc))
150 | if !assert.NoError(t, err, `xsd.Parse should succeed`) {
151 | return
152 | }
153 | defer schema.Free()
154 |
155 | doc, err := libxml2.ParseString(docsrc)
156 | if !assert.NoError(t, err, "parsing XML") {
157 | return
158 | }
159 | defer doc.Free()
160 | if !assert.NoError(t, schema.Validate(doc, xsd.ValueVCCreate), `schema.Validate should succeed`) {
161 | return
162 | }
163 |
164 | t.Logf("%s", doc.String())
165 | }
166 |
167 | func TestGHIssue67(t *testing.T) {
168 | t.Run("Local validation", func(t *testing.T) {
169 | const schemafile = "test/schema/projects/go_libxml2_local.xsd"
170 | const docfile = "test/go_libxml2_local.xml"
171 |
172 | schemasrc, err := os.ReadFile(schemafile)
173 | if !assert.NoError(t, err, `failed to read xsd file`) {
174 | return
175 | }
176 |
177 | docsrc, err := os.ReadFile(docfile)
178 | if !assert.NoError(t, err, `failed to read xml file`) {
179 | return
180 | }
181 |
182 | schema, err := xsd.Parse(schemasrc, xsd.WithPath(schemafile))
183 | if !assert.NoError(t, err, `xsd.Parse should succeed`) {
184 | return
185 | }
186 | defer schema.Free()
187 |
188 | doc, err := libxml2.Parse(docsrc)
189 | if !assert.NoError(t, err, "parsing XML") {
190 | return
191 | }
192 | defer doc.Free()
193 | if !assert.NoError(t, schema.Validate(doc, xsd.ValueVCCreate), `schema.Validate should succeed`) {
194 | return
195 | }
196 |
197 | t.Logf("%s", doc.String())
198 | })
199 | t.Run("Remote validation", func(t *testing.T) {
200 | curdir, err := os.Getwd()
201 | if !assert.NoError(t, err, `os.Getwd failed`) {
202 | return
203 | }
204 |
205 | srv := httptest.NewServer(http.FileServer(http.Dir(curdir)))
206 | defer srv.Close()
207 |
208 | var schemafile = srv.URL + "/test/schema/projects/go_libxml2_remote.xsd"
209 | const docfile = "test/go_libxml2_remote.xml"
210 |
211 | //nolint:noctx
212 | res, err := http.Get(schemafile)
213 | if !assert.NoError(t, err, `failed to fetch xsd file`) {
214 | return
215 | }
216 |
217 | schemasrc, err := io.ReadAll(res.Body)
218 | defer res.Body.Close()
219 | if !assert.NoError(t, err, `failed to read xsd file`) {
220 | return
221 | }
222 |
223 | docsrc, err := os.ReadFile(docfile)
224 | if !assert.NoError(t, err, `failed to read xml file`) {
225 | return
226 | }
227 |
228 | schema, err := xsd.Parse(schemasrc, xsd.WithURI(schemafile))
229 | if !assert.NoError(t, err, `xsd.Parse should succeed`) {
230 | return
231 | }
232 | defer schema.Free()
233 |
234 | doc, err := libxml2.Parse(docsrc)
235 | if !assert.NoError(t, err, "parsing XML") {
236 | return
237 | }
238 | defer doc.Free()
239 | if !assert.NoError(t, schema.Validate(doc, xsd.ValueVCCreate), `schema.Validate should succeed`) {
240 | return
241 | }
242 |
243 | t.Logf("%s", doc.String())
244 | })
245 | }
246 |
--------------------------------------------------------------------------------