├── .github ├── CONTRIBUTING.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ ├── feature_request.md │ └── others.md ├── auto-assign-pr.yml ├── dependabot.yml └── workflows │ ├── assign-issue.yml │ ├── assign-pr.yml │ ├── ci.yml │ ├── codeql.yml │ ├── lint.yml │ └── stale.yml ├── .gitignore ├── .golangci.yml ├── LICENSE ├── README.md ├── clib ├── clib.go ├── interface.go ├── link_dynamic.go └── link_static.go ├── dom ├── c14n_test.go ├── document_test.go ├── dom.go ├── interface.go ├── node.go ├── node_attr.go ├── node_document.go ├── node_element.go ├── node_namespace.go ├── node_test.go ├── node_text.go ├── node_wrap.go └── serialize.go ├── go.mod ├── go.sum ├── html.go ├── html_test.go ├── internal ├── cmd │ └── genwrapnode │ │ └── genwrapnode.go ├── debug │ ├── debug_off.go │ └── debug_on.go └── option │ ├── interface.go │ └── option.go ├── libxml2.go ├── libxml2_bench_test.go ├── libxml2_example_test.go ├── parser.go ├── parser ├── interface.go └── parser.go ├── parser_test.go ├── test ├── euc-jp.xml ├── feed.atom ├── go_libxml2_local.xml ├── go_libxml2_remote.xml ├── link │ └── test.go ├── schema │ ├── lib │ │ └── types │ │ │ ├── cksum.xsd │ │ │ ├── net.xsd │ │ │ ├── std.xsd │ │ │ └── unix.xsd │ └── projects │ │ ├── go_libxml2_local.xsd │ │ └── go_libxml2_remote.xsd ├── sjis.xml ├── utf-8.xml └── xmldsig-core-schema.xsd ├── types ├── interface.go └── types.go ├── xml_test.go ├── xpath ├── interface.go ├── iterator.go ├── util.go ├── xpath.go └── xpath_test.go ├── xsd ├── interface.go ├── option.go └── xsd.go └── xsd_test.go /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING 2 | 3 | ❤❤❤🎉 Thank you for considering to contribute to this project! 🎉❤❤❤ 4 | 5 | The following is a set of guidelines that we ask you to follow when you contribute to this project. 6 | 7 | # Index 8 | 9 | * [tl;dr](#tldr) 10 | * [Please Be Nice](#please-be-nice) 11 | * [Please Use Correct Medium (GitHub Issues / Discussions)](#please-use-correct-medium-github-issues--discussions) 12 | * [Please Include (Pseudo)code for Any Technical Issues](#please-include-pseudocode-for-any-technical-issues) 13 | * [Reviewer/Reviewee Guidelines](#reviewer-reviewee-guidelines) 14 | * [Brown M&M Clause](#brown-mm-clause) 15 | * [Pull Requests](#pull-requests) 16 | * [Branches](#branches) 17 | * [Generated Files](#generated-files) 18 | * [Test Cases](#test-cases) 19 | 20 | # tl;dr 21 | 22 | * 📕 Please read this Guideline in its entirety once, if at least to check the headings. 23 | * 🙋 Please be nice, and please be aware that we are not providing this software as a hobby. 24 | * 💬 Open-ended questions and inquiries go to [Discussions](https://github.com/lestrrat-go/libxml2/discussions). 25 | * 🖥️ Actionable, specific technical questions go to [Issues](https://github.com/lestrrat-go/libxml2/issues). 26 | * 📝 Please always include (pseudo)code for any technical questions/issues. 27 | * 🔒 Issues, PR, and other posts may be closed or not addressed if you do not follow these guidelines 28 | 29 | # Please Be Nice 30 | 31 | [Main source; if wording differ, the main source supersedes this copy](https://github.com/lestrrat-go/contributions/blob/main/Contributions.md) 32 | 33 | Please be nice when you contact us. 34 | 35 | We are very glad that you find this project useful, and we intend to provide software that help you. 36 | 37 | You do not have to thank us, but please bare in mind that this is an opensource project that is provided **as-is**. 38 | This means that we are **NOT** obligated to support you, work for you, do your homework/research for you, 39 | or otherwise heed to you needs. 40 | 41 | We do not owe you one bit of code, or a fix, even if it's a critical one. 42 | 43 | We write software because we're curious, we fix bugs because we have integrity. 44 | 45 | But we do not owe you anything. Please do not order us to work for you. 46 | We are not your support staff, and we are not here to do your research. 47 | We are willing to help, but only as long as you are being nice to us. 48 | 49 | # Please Use Correct Medium (GitHub Issues / Discussions) 50 | 51 | [Main source; this is a specialized version copied from the main source](https://github.com/lestrrat-go/contributions/blob/main/Contributions.md) 52 | 53 | This project uses [GitHub Issues](https://github.com/lestrrat-go/libxml2/issues) to deal with technical issues 54 | including bug reports, proposing new API, and otherwise issues that are directly actionable. 55 | 56 | Inquiries, questions about the usage, maintenance policies, and other open-ended 57 | questions/discussions should be posted to [GitHub Discussions](https://github.com/lestrrat-go/libxml2/discussions). 58 | 59 | # Please Include (Pseudo)code for Any Technical Issues 60 | 61 | [Main source; if wording differ, the main source supersedes this copy](https://github.com/lestrrat-go/contributions/blob/main/Contributions.md) 62 | 63 | Your report should contain clear, concise description of the issue that you are facing. 64 | However, at the same time please always include (pseudo)code in report. 65 | 66 | English may not be your forte, but we all should speak the common language of code. 67 | Rather than trying to write an entire essay or beat around the bush, which will 68 | more than likely cost both you and the maintainers extra roundtrips to communicate, 69 | please use code to describe _exactly_ what you are trying to achieve. 70 | 71 | Good reports should contain (in order of preference): 72 | 73 | 1. Complete Go-style test code. 74 | 1. Code snippet that clearly shows the intent of your code. 75 | 1. Pseudocode that shows how you would want the API to work. 76 | 77 | As we are dealing with code, ultimately there is 78 | no better way to convey what you are trying to do than to provide 79 | your code. 80 | 81 | Please help us help you by providing us with a reproducible code. 82 | 83 | # Reviewer/Reviewee Guidelines 84 | 85 | If you are curious about what what gets reviewed and why some decisions 86 | are made the way they are, please read [this document](https://github.com/lestrrat-go/contributions/blob/main/Reviews.md) to get some insight into the thinking process. 87 | 88 | # Brown M&M Clause 89 | 90 | If you came here from an issue/PR template, please make sure to delete 91 | the section on "Contribution Guidelines" from the template. 92 | 93 | Failure to do so may result in the maintainers assuming that you have 94 | not fully read the guidelines. 95 | 96 | [(Reference)](https://www.insider.com/van-halen-brown-m-ms-contract-2016-9) 97 | 98 | # Pull Requests 99 | 100 | ## Branches 101 | 102 | ### `vXXX` branches 103 | 104 | Stable releases, such as `v1`, `v2`, etc. Please do not work against these branches. 105 | Use the `develop/vXXX` branches instead. 106 | 107 | ### `develop/vXXX` branches 108 | 109 | Development occurs on these branches. If you are wishing to make changes against 110 | `v2`, work on `develop/v2` branch. 111 | 112 | When you make a PR, fork this branch, make your changes and create a PR against 113 | these development branches. 114 | 115 | ```mermaid 116 | sequenceDiagram 117 | autonumber 118 | participant v1/v2/.. 119 | participant develop/v1/v2/.. 120 | participant feature_branch 121 | develop/v1/v2/..->>feature_branch: Fork development branch to your feature branch 122 | Note over feature_branch: Work on your feature 123 | feature_branch->>develop/v1/v2/..: File a PR against the development branch 124 | develop/v1/v2/..->>v1/v2/..: Merge changes 125 | ``` 126 | 127 | ## Generated Files 128 | 129 | All files with file names ending in `_gen.go` are generated by a tool. These files 130 | should not be modified directly. Instead, find out the tool that is generating the 131 | file by inspecting the file. Usually the tool that generated the file is listed 132 | in the comment section at the top of the file. 133 | 134 | Usually these files are generated based on a rule file (such as a YAML file). 135 | When you craft a pull request, you should include both changes to the rule file(s) 136 | and the generated file(s). The CI will run `go generate` and make sure that 137 | there are no extra `diff`s that have not been committed. 138 | 139 | ## Test Cases 140 | 141 | In general any code change must be accompanied with test case. 142 | 143 | It is obviously very important to test the functionality. But adding test cases 144 | also gives you the opportunity to check for yourself how the new code should/can 145 | be used in practice. Test cases also act as a great way to communicate any 146 | assumptions or requirements that your code needs in order to function properly. 147 | 148 | 149 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: 4 | - lestrrat 5 | patreon: # Replace with a single Patreon username 6 | open_collective: # Replace with a single Open Collective username 7 | ko_fi: # Replace with a single Ko-fi username 8 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 9 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 10 | liberapay: # Replace with a single Liberapay username 11 | issuehunt: # Replace with a single IssueHunt username 12 | otechie: # Replace with a single Otechie username 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Contribution Guidelines** 11 | 12 | Before filing an issue, please read the contents of [CONTRIBUTING.md](https://github.com/lestrrat-go/libxml2/blob/v2/.github/CONTRIBUTING.md), and follow its instructions. 13 | 14 | **Describe the bug** 15 | 16 | A clear and concise description of what the bug is. 17 | 18 | Please attach the output of `go version` 19 | 20 | **To Reproduce / Expected behavior** 21 | Please attach a standalone Go test code that shows the problem, and what you expected to happen. 22 | 23 | If you are asking for an API change or some such which inhibits you from providing a working code, please do your best to come up with a near-valid code. 24 | 25 | **Additional context** 26 | Add any other context or screenshots about the feature request here. Please delete this section if unnecessary. 27 | 28 | **Sponsors** 29 | Are you sponsoring the authors? If so, let us know. Otherwise, please delete this section. 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Contribution Guidelines** 11 | 12 | Before filing an issue, please read the contents of [CONTRIBUTING.md](https://github.com/lestrrat-go/libxml2/blob/v2/.github/CONTRIBUTING.md), and follow its instructions. 13 | 14 | **Abstract** 15 | Please describe concisely what you want to accomplish, including prerequisite information. Please remember that if _you_ cannot articulate the problem, we cannot guess what you are thinking. 16 | 17 | **Describe the proposed solution/change** 18 | Please attach a standalone Go test code that shows the problem, and what you expected to happen. 19 | 20 | If it's a behavior change, please include a failing (or would-be failing) test case. If it's a structural or an API change, we understand that you cannot create a complete compiling code, but please do your best to produce a a near-valid code that shows exactly what you want 21 | 22 | **Analysis** 23 | Please describe alternative solutions that you have considered, and pros/cons between them. 24 | 25 | **Additional context** 26 | Add any other context or screenshots about the feature request here. Please delete this section if unnecessary. 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/others.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'Other Issues' 3 | about: 'Other types of issues' 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Contribution Guidelines** 11 | 12 | Before filing an issue, please read the contents of [CONTRIBUTING.md](https://github.com/lestrrat-go/libxml2/blob/v2/.github/CONTRIBUTING.md), and follow its instructions. 13 | -------------------------------------------------------------------------------- /.github/auto-assign-pr.yml: -------------------------------------------------------------------------------- 1 | addReviewers: true 2 | addAssignees: false 3 | reviewers: 4 | - lestrrat 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gomod" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | target-branch: "master" 13 | labels: 14 | - "go" 15 | - "dependencies" 16 | - "dependabot" 17 | - package-ecosystem: "gomod" # See documentation for possible values 18 | directory: "/" # Location of package manifests 19 | schedule: 20 | interval: "daily" 21 | target-branch: "develop/v1" 22 | labels: 23 | - "go" 24 | - "dependencies" 25 | - "dependabot" 26 | - package-ecosystem: "github-actions" 27 | directory: "/" 28 | schedule: 29 | interval: "daily" 30 | target-branch: "develop/v2" 31 | - package-ecosystem: "github-actions" 32 | directory: "/" 33 | schedule: 34 | interval: "daily" 35 | target-branch: "develop/v1" 36 | 37 | -------------------------------------------------------------------------------- /.github/workflows/assign-issue.yml: -------------------------------------------------------------------------------- 1 | name: Assign Issue 2 | on: 3 | issues: 4 | types: [opened] 5 | 6 | jobs: 7 | auto-assign: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: 'Auto-assign issue' 11 | uses: pozil/auto-assign-issue@v1 12 | with: 13 | assignees: lestrrat 14 | -------------------------------------------------------------------------------- /.github/workflows/assign-pr.yml: -------------------------------------------------------------------------------- 1 | name: 'Auto Assign' 2 | on: 3 | pull_request: 4 | types: [opened, ready_for_review] 5 | 6 | jobs: 7 | add-reviews: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: kentaro-m/auto-assign-action@v1.2.5 11 | with: 12 | configuration-path: .github/auto-assign-pr.yml 13 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | 10 | jobs: 11 | ubuntu: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | go: [ '1.21' ] 16 | link: 17 | - type: dynamic 18 | goflags: "" 19 | - type: static 20 | # On Ubuntu, libxml2 is compiled with GCC and is linked to libicu, which introduces a 21 | # stealth dependency on libstdc++ at link-time 22 | goflags: "-ldflags '-extldflags -lstdc++' -tags 'osusergo netgo static_build'" 23 | name: "Test [ Go ${{ matrix.go }}, ${{ matrix.link.type }} linking ]" 24 | steps: 25 | - name: Checkout repository 26 | uses: actions/checkout@v4 27 | - name: Cache Go modules 28 | uses: actions/cache@v3 29 | with: 30 | path: | 31 | ~/go/pkg/mod 32 | ~/.cache/go-build 33 | ~/.cache/bazel 34 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 35 | restore-keys: | 36 | ${{ runner.os }}-go- 37 | - name: Install Go stable version 38 | if: matrix.go != 'tip' 39 | uses: actions/setup-go@v4 40 | with: 41 | go-version: ${{ matrix.go }} 42 | check-latest: true 43 | - name: Run Go tests 44 | run: go test -race ${{ matrix.link.goflags }} ./... 45 | - name: Test linking capability 46 | run: | 47 | go build -o linktest ${{ matrix.link.goflags }} ./test/link 48 | file linktest | grep '${{ matrix.link.type }}ally linked' 49 | archlinux: 50 | runs-on: ubuntu-latest 51 | strategy: 52 | matrix: 53 | go: [ '1.21' ] 54 | container: 55 | image: archlinux:latest 56 | name: "Test [ Arch Linux + Go ${{ matrix.go }} ]" 57 | steps: 58 | - uses: actions/checkout@v3 59 | - name: Set Up Arch Linux 60 | run: | 61 | pacman -Syy --noconfirm 62 | pacman -Syu --noconfirm 63 | pacman -S --noconfirm base-devel 64 | pacman -S --noconfirm libxml2=2.12.7 65 | - name: Set up Go 66 | uses: actions/setup-go@v4 67 | with: 68 | go-version: ${{ matrix.go }} 69 | - name: Test 70 | run: go test ./... 71 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "master" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "master" ] 20 | schedule: 21 | - cron: '40 13 * * 5' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'go' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Use only 'java' to analyze code written in Java, Kotlin or both 38 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 39 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 40 | 41 | steps: 42 | - name: Checkout repository 43 | uses: actions/checkout@v4 44 | 45 | # Initializes the CodeQL tools for scanning. 46 | - name: Initialize CodeQL 47 | uses: github/codeql-action/init@v2 48 | with: 49 | languages: ${{ matrix.language }} 50 | # If you wish to specify custom queries, you can do so here or in a config file. 51 | # By default, queries listed here will override any specified in a config file. 52 | # Prefix the list here with "+" to use these queries and those in the config file. 53 | 54 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 55 | # queries: security-extended,security-and-quality 56 | 57 | 58 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). 59 | # If this step fails, then you should remove it and run the build manually (see below) 60 | - name: Autobuild 61 | uses: github/codeql-action/autobuild@v2 62 | 63 | # ℹ️ Command-line programs to run using the OS shell. 64 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 65 | 66 | # If the Autobuild fails above, remove it and uncomment the following three lines. 67 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 68 | 69 | # - run: | 70 | # echo "Run, Build Application using script" 71 | # ./location_of_script_within_repo/buildscript.sh 72 | 73 | - name: Perform CodeQL Analysis 74 | uses: github/codeql-action/analyze@v2 75 | with: 76 | category: "/language:${{matrix.language}}" 77 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | on: 3 | push: {} 4 | pull_request: 5 | branches: 6 | - master 7 | jobs: 8 | golangci: 9 | name: lint 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-go@v4 14 | with: 15 | go-version: '1.20' 16 | check-latest: true 17 | - uses: golangci/golangci-lint-action@v3 18 | with: 19 | version: v1.54.2 20 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues and PRs' 2 | on: 3 | schedule: 4 | - cron: '30 1 * * *' 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v8 11 | with: 12 | stale-issue-message: 'This issue is stale because it has been open 14 days with no activity. Remove stale label or comment or this will be closed in 7 days.' 13 | stale-pr-message: 'This PR is stale because it has been open 14 days with no activity. Remove stale label or comment or this will be closed in 14 days.' 14 | close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity. This does not mean your issue is rejected, but rather it is done to hide it from the view of the maintains for the time being. Feel free to reopen if you have new comments' 15 | close-pr-message: 'This PR was closed because it has been stalled for 14 days with no activity. This does not mean your PR is rejected, but rather it is done to hide it from the view of the maintainers for the time being. Feel free to reopen if you have new comments or chnages that you would like to include. ' 16 | days-before-issue-stale: 14 17 | days-before-pr-stale: 14 18 | days-before-issue-close: 7 19 | days-before-pr-close: 7 20 | exempt-issue-labels: long-term 21 | exempt-pr-labels: long-term 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | .idea 27 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | 3 | linters-settings: 4 | govet: 5 | enable-all: true 6 | disable: 7 | - shadow 8 | - fieldalignment 9 | 10 | linters: 11 | enable-all: true 12 | disable: 13 | - cyclop 14 | - deadcode # deprecated 15 | - depguard 16 | - dupl 17 | - exhaustive 18 | - exhaustivestruct 19 | - errorlint 20 | - funlen 21 | - gci 22 | - gochecknoglobals 23 | - gochecknoinits 24 | - gocognit 25 | - gocritic 26 | - gocyclo 27 | - godot 28 | - godox 29 | - goerr113 30 | - gofumpt 31 | - golint #deprecated 32 | - gomnd 33 | - gosec 34 | - govet 35 | - interfacer # deprecated 36 | - ifshort 37 | - ireturn # No, I _LIKE_ returning interfaces 38 | - lll 39 | - maintidx # Do this in code review 40 | - maligned # deprecated 41 | - makezero 42 | - nakedret 43 | - nestif 44 | - nlreturn 45 | - nonamedreturns # visit this back later 46 | - nosnakecase 47 | - paralleltest 48 | - scopelint # deprecated 49 | - structcheck # deprecated 50 | - tagliatelle 51 | - testpackage 52 | - thelper # Tests are fine 53 | - varcheck # deprecated 54 | - varnamelen # Short names are ok 55 | - wrapcheck 56 | - wsl 57 | 58 | issues: 59 | exclude-rules: 60 | # not needed 61 | - path: /*.go 62 | text: "ST1003: should not use underscores in package names" 63 | linters: 64 | - stylecheck 65 | - path: /*.go 66 | text: "don't use an underscore in package name" 67 | linters: 68 | - revive 69 | - path: /*.go 70 | linters: 71 | - contextcheck 72 | - exhaustruct 73 | - path: /main.go 74 | linters: 75 | - errcheck 76 | - path: /*_test.go 77 | linters: 78 | - errcheck 79 | - errchkjson 80 | - forcetypeassert 81 | - path: /*_example_test.go 82 | linters: 83 | - forbidigo 84 | 85 | # Maximum issues count per one linter. Set to 0 to disable. Default is 50. 86 | max-issues-per-linter: 0 87 | 88 | # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. 89 | max-same-issues: 0 90 | 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 lestrrat 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libxml2 2 | 3 | 4 | **NOTICE**: Instead of making people use this library while wondering if this module is ever going to be modified any time soon, I've opted to archive it. I may come back to it later, but for the time being I have no motivation or pressing need to work on this project. 5 | 6 | 7 | 8 | Interface to libxml2, with DOM interface. 9 | 10 | [![Build Status](https://travis-ci.org/lestrrat-go/libxml2.svg?branch=master)](https://travis-ci.org/lestrrat-go/libxml2) 11 | 12 | [![GoDoc](https://godoc.org/github.com/lestrrat-go/libxml2?status.svg)](https://godoc.org/github.com/lestrrat-go/libxml2) 13 | 14 | # Index 15 | 16 | * [Why?](#why) 17 | * [FAQ](#faq) 18 | 19 | ## Why? 20 | 21 | I needed to write [go-xmlsec](https://github.com/lestrrat-go/xmlsec). This means we need to build trees using libxml2, and then muck with it in xmlsec: Two separate packages in Go means we cannot (safely) pass around `C.xmlFooPtr` objects (also, you pay a penalty for pointer types). This package carefully avoid references to `C.xmlFooPtr` types and uses uintptr to pass data around, so other libraries that needs to interact with libxml2 can safely interact with it. 22 | 23 | ## Status 24 | 25 | * This library should be considered alpha grade. API may still change. 26 | * Much of commonly used functionalities from libxml2 that *I* use are there already, and are known to be functional 27 | 28 | ## Package Layout: 29 | 30 | | Name | Description | 31 | |---------|-------------------------------------------------------------| 32 | | libxml2 | Globally available utility functions, such as `ParseString` | 33 | | types | Common data types, such as `types.Node` | 34 | | parser | Parser routines | 35 | | dom | DOM-like manipulation of XML document/nodes | 36 | | xpath | XPath related tools | 37 | | xsd | XML Schema related tools | 38 | | clib | Wrapper around C libxml2 library - DO NOT TOUCH IF UNSURE | 39 | 40 | ## Features 41 | 42 | Create XML documents using DOM-like interface: 43 | 44 | ```go 45 | d := dom.CreateDocument() 46 | e, err := d.CreateElement("foo") 47 | if err != nil { 48 | println(err) 49 | return 50 | } 51 | d.SetDocumentElement(e) 52 | ... 53 | ``` 54 | 55 | Parse documents: 56 | 57 | ```go 58 | d, err := libxml2.ParseString(xmlstring) 59 | if err != nil { 60 | println(err) 61 | return 62 | } 63 | ``` 64 | 65 | Use XPath to extract node values: 66 | 67 | ```go 68 | text := xpath.String(node.Find("//xpath/expression")) 69 | ``` 70 | 71 | ## Examples 72 | 73 | ### Basic XML Example 74 | 75 | ```go 76 | import ( 77 | "log" 78 | "net/http" 79 | 80 | "github.com/lestrrat-go/libxml2" 81 | "github.com/lestrrat-go/libxml2/parser" 82 | "github.com/lestrrat-go/libxml2/types" 83 | "github.com/lestrrat-go/libxml2/xpath" 84 | ) 85 | 86 | func ExampleXML() { 87 | res, err := http.Get("http://blog.golang.org/feed.atom") 88 | if err != nil { 89 | panic("failed to get blog.golang.org: " + err.Error()) 90 | } 91 | 92 | p := parser.New() 93 | doc, err := p.ParseReader(res.Body) 94 | defer res.Body.Close() 95 | 96 | if err != nil { 97 | panic("failed to parse XML: " + err.Error()) 98 | } 99 | defer doc.Free() 100 | 101 | doc.Walk(func(n types.Node) error { 102 | log.Printf(n.NodeName()) 103 | return nil 104 | }) 105 | 106 | root, err := doc.DocumentElement() 107 | if err != nil { 108 | log.Printf("Failed to fetch document element: %s", err) 109 | return 110 | } 111 | 112 | ctx, err := xpath.NewContext(root) 113 | if err != nil { 114 | log.Printf("Failed to create xpath context: %s", err) 115 | return 116 | } 117 | defer ctx.Free() 118 | 119 | ctx.RegisterNS("atom", "http://www.w3.org/2005/Atom") 120 | title := xpath.String(ctx.Find("/atom:feed/atom:title/text()")) 121 | log.Printf("feed title = %s", title) 122 | } 123 | ``` 124 | 125 | ### Basic HTML Example 126 | 127 | ```go 128 | func ExampleHTML() { 129 | res, err := http.Get("http://golang.org") 130 | if err != nil { 131 | panic("failed to get golang.org: " + err.Error()) 132 | } 133 | 134 | doc, err := libxml2.ParseHTMLReader(res.Body) 135 | if err != nil { 136 | panic("failed to parse HTML: " + err.Error()) 137 | } 138 | defer doc.Free() 139 | 140 | doc.Walk(func(n types.Node) error { 141 | log.Printf(n.NodeName()) 142 | return nil 143 | }) 144 | 145 | nodes := xpath.NodeList(doc.Find(`//div[@id="menu"]/a`)) 146 | for i := 0; i < len(nodes); i++ { 147 | log.Printf("Found node: %s", nodes[i].NodeName()) 148 | } 149 | } 150 | ``` 151 | 152 | ### XSD Validation 153 | 154 | ```go 155 | import ( 156 | "io/ioutil" 157 | "log" 158 | "os" 159 | "path/filepath" 160 | 161 | "github.com/lestrrat-go/libxml2" 162 | "github.com/lestrrat-go/libxml2/xsd" 163 | ) 164 | 165 | func ExampleXSD() { 166 | xsdfile := filepath.Join("test", "xmldsig-core-schema.xsd") 167 | f, err := os.Open(xsdfile) 168 | if err != nil { 169 | log.Printf("failed to open file: %s", err) 170 | return 171 | } 172 | defer f.Close() 173 | 174 | buf, err := ioutil.ReadAll(f) 175 | if err != nil { 176 | log.Printf("failed to read file: %s", err) 177 | return 178 | } 179 | 180 | s, err := xsd.Parse(buf) 181 | if err != nil { 182 | log.Printf("failed to parse XSD: %s", err) 183 | return 184 | } 185 | defer s.Free() 186 | 187 | d, err := libxml2.ParseString(``) 188 | if err != nil { 189 | log.Printf("failed to parse XML: %s", err) 190 | return 191 | } 192 | defer d.Free() 193 | 194 | if err := s.Validate(d); err != nil { 195 | for _, e := range err.(xsd.SchemaValidationError).Errors() { 196 | log.Printf("error: %s", e.Error()) 197 | } 198 | return 199 | } 200 | 201 | log.Printf("validation successful!") 202 | } 203 | ``` 204 | 205 | ## Caveats 206 | 207 | ### Other libraries 208 | 209 | There exists many similar libraries. I want speed, I want DOM, and I want XPath.When all of these are met, I'd be happy to switch to another library. 210 | 211 | For now my closest contender was [xmlpath](https://github.com/go-xmlpath/xmlpath), but as of this writing it suffers in the speed (for xpath) area a bit: 212 | 213 | ``` 214 | shoebill% go test -v -run=none -benchmem -benchtime=5s -bench . 215 | PASS 216 | BenchmarkXmlpathXmlpath-4 500000 11737 ns/op 721 B/op 6 allocs/op 217 | BenchmarkLibxml2Xmlpath-4 1000000 7627 ns/op 368 B/op 15 allocs/op 218 | BenchmarkEncodingXMLDOM-4 2000000 4079 ns/op 4560 B/op 9 allocs/op 219 | BenchmarkLibxml2DOM-4 1000000 11454 ns/op 264 B/op 7 allocs/op 220 | ok github.com/lestrrat-go/libxml2 37.597s 221 | ``` 222 | 223 | ## FAQ 224 | 225 | ### "It won't build" 226 | 227 | The very first thing you need to be aware is that this is a _C binding_ to 228 | libxml2. You should understand how to build C programs, how to debug them, 229 | or at least be able to ask the right questions and deal with a great deal 230 | more than Go alone. 231 | 232 | Having said that, the most common causes for build errors are: 233 | 234 | 1. **You have not installed libxml2 / You installed it incorrectly** 235 | 236 | The first one is obvious, but I get this a lot. You have to install libxml2. 237 | If you are installing via some sort of package manager like apt/apk, remember 238 | that you need to install the "development" files as well. The name of the 239 | package differs in each environment, but it's usually something like "libxml2-dev". 240 | 241 | The second is more subtle, and tends to happen when you install your libxml2 242 | in a non-standard location. This causes problems for other tools such as 243 | your C compiler or pkg-config. See more below 244 | 245 | 2. **Your header files are not in the search path** 246 | 247 | If you don't understand what header files are or how they work, this is where 248 | you should either look for your local C-guru, or study how these things work 249 | before filing an issue on this repository. 250 | 251 | Your C compiler, which is invoked via Go, needs to be able to find the libxml2 252 | header files. If you installed them in a non-standard location, for example, 253 | such as outside of /usr/include and /usr/local/include, you _may_ have to 254 | configure them yourself. 255 | 256 | How to configure them depends greatly on your environment, and again, if you 257 | don't understand how you can fix it, you should consult your local C-guru 258 | about it, not this repository. 259 | 260 | 3. **Your pkg-config files are not in the search path** 261 | 262 | If you don't understand what pkg-config does, this is where you should either 263 | look for your local sysadmin friend, or study how these things work 264 | before filing an issue on this repository. 265 | 266 | pkg-config provides metadata about a installed components, such as build flags 267 | that are required. Go uses it to figure out how to build and link Go programs 268 | that needs to interact with things written in C. 269 | 270 | However, pkg-config is merely a thin frontend to extract information from 271 | file(s) that each component provided upon installation. 272 | pkg-config itself needs to know where to find these files. 273 | 274 | Make sure that the output of the following command contains `libxml-2.0`. 275 | If not, and you don't understand how to fix this yourself, you should consult 276 | your local sysadmin friend about it, not this repository 277 | 278 | ``` 279 | pkg-config --list-all 280 | ``` 281 | 282 | ### "Fatal error: 'libxml/HTMLparser.h' file not found" 283 | 284 | See the first FAQ entry. 285 | 286 | ### I can't statically link this module to libxml2 287 | 288 | Use the `static_build` tag when building this module, for example: 289 | 290 | ```sh 291 | go build -tags static_build 292 | ``` 293 | 294 | ## See Also 295 | 296 | * https://github.com/lestrrat-go/xmlsec 297 | 298 | ## Credits 299 | 300 | * Work on this library was generously sponsored by HDE Inc (https://www.hde.co.jp) 301 | -------------------------------------------------------------------------------- /clib/interface.go: -------------------------------------------------------------------------------- 1 | package clib 2 | 3 | import "errors" 4 | 5 | const ( 6 | MaxEncodingLength = 256 7 | MaxAttributeNameLength = 1024 8 | MaxElementNameLength = 1024 9 | MaxNamespaceURILength = 4096 10 | MaxValueBufferSize = 4096 11 | MaxXPathExpressionLength = 4096 12 | ) 13 | 14 | // C14NMode represents the C14N mode supported by libxml2 15 | type C14NMode int 16 | 17 | // PtrSource is the single interface that connects the rest of 18 | // libxml2 package with this package. The clib packages does not 19 | // really care what sort of object you pass to these low-level 20 | // functions, as long as the arguments fulfill this interface. 21 | // 22 | // Obviously this causes problems if you pass the an Element node 23 | // where a Document node is expected, but it is the caller's 24 | // responsibility to align the argument list. 25 | type PtrSource interface { 26 | Pointer() uintptr 27 | } 28 | 29 | // XMLNodeType identifies the type of the underlying C struct 30 | type XMLNodeType int 31 | 32 | const ( 33 | ElementNode XMLNodeType = iota + 1 34 | AttributeNode 35 | TextNode 36 | CDataSectionNode 37 | EntityRefNode 38 | EntityNode 39 | PiNode 40 | CommentNode 41 | DocumentNode 42 | DocumentTypeNode 43 | DocumentFragNode 44 | NotationNode 45 | HTMLDocumentNode 46 | DTDNode 47 | ElementDecl 48 | AttributeDecl 49 | EntityDecl 50 | NamespaceDecl 51 | XIncludeStart 52 | XIncludeEnd 53 | DocbDocumentNode 54 | ) 55 | 56 | var ( 57 | ErrAttributeNotFound = errors.New("attribute not found") 58 | ErrAttributeNameTooLong = errors.New("attribute name too long") 59 | ErrElementNameTooLong = errors.New("element name too long") 60 | ErrNamespaceURITooLong = errors.New("namespace uri too long") 61 | ErrValueTooLong = errors.New("value too long") 62 | ErrXPathExpressionTooLong = errors.New("xpath expression too long") 63 | // ErrInvalidAttribute is returned when the Attribute struct (probably 64 | // the pointer to the underlying C struct is not valid) 65 | ErrInvalidAttribute = errors.New("invalid attribute") 66 | ErrInvalidArgument = errors.New("invalid argument") 67 | // ErrInvalidDocument is returned when the Document struct (probably 68 | // the pointer to the underlying C struct is not valid) 69 | ErrInvalidDocument = errors.New("invalid document") 70 | // ErrInvalidParser is returned when the Parser struct (probably 71 | // the pointer to the underlying C struct is not valid) 72 | ErrInvalidParser = errors.New("invalid parser") 73 | // ErrInvalidNamespace is returned when the Namespace struct (probably 74 | // the pointer to the underlying C struct is not valid) 75 | ErrInvalidNamespace = errors.New("invalid namespace") 76 | // ErrInvalidNode is returned when the Node struct (probably 77 | // the pointer to the underlying C struct is not valid) 78 | ErrInvalidNode = errors.New("invalid node") 79 | ErrInvalidNodeName = errors.New("invalid node name") 80 | // ErrInvalidXPathContext is returned when the XPathContext struct (probably 81 | // the pointer to the underlying C struct is not valid) 82 | ErrInvalidXPathContext = errors.New("invalid xpath context") 83 | // ErrInvalidXPathExpression is returned when the XPathExpression struct (probably 84 | // the pointer to the underlying C struct is not valid) 85 | ErrInvalidXPathExpression = errors.New("invalid xpath expression") 86 | // ErrInvalidXPathObject is returned when the XPathObject struct (probably 87 | // the pointer to the underlying C struct is not valid) 88 | ErrInvalidXPathObject = errors.New("invalid xpath object") 89 | // ErrInvalidSchema is returned when the Schema struct (probably 90 | // the pointer to the underlying C struct is not valid) 91 | ErrInvalidSchema = errors.New("invalid schema") 92 | ErrNodeNotFound = errors.New("node not found") 93 | ErrXPathEmptyResult = errors.New("empty xpath result") 94 | ErrXPathCompileFailure = errors.New("xpath compilation failed") 95 | ErrXPathNamespaceRegisterFailure = errors.New("cannot register namespace") 96 | ) 97 | 98 | //nolint:errname 99 | type ErrNamespaceNotFound struct { 100 | Target string 101 | } 102 | 103 | func (e ErrNamespaceNotFound) Error() string { 104 | return "namespace not found: " + e.Target 105 | } 106 | 107 | type XPathObjectType int 108 | 109 | const ( 110 | XPathUndefinedType XPathObjectType = iota 111 | XPathNodeSetType 112 | XPathBooleanType 113 | XPathNumberType 114 | XPathStringType 115 | XPathPointType 116 | XPathRangeType 117 | XPathLocationSetType 118 | XPathUsersType 119 | XPathXSLTTreeType 120 | ) 121 | -------------------------------------------------------------------------------- /clib/link_dynamic.go: -------------------------------------------------------------------------------- 1 | //go:build !static_build 2 | // +build !static_build 3 | 4 | package clib 5 | 6 | // #cgo pkg-config: libxml-2.0 7 | import "C" 8 | -------------------------------------------------------------------------------- /clib/link_static.go: -------------------------------------------------------------------------------- 1 | //go:build static_build 2 | // +build static_build 3 | 4 | package clib 5 | 6 | // #cgo pkg-config: --static libxml-2.0 7 | // #cgo LDFLAGS: -static 8 | import "C" 9 | -------------------------------------------------------------------------------- /dom/c14n_test.go: -------------------------------------------------------------------------------- 1 | package dom_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/lestrrat-go/libxml2" 7 | "github.com/lestrrat-go/libxml2/dom" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestC14N(t *testing.T) { 12 | expected := ` 14 | Hello, world! 15 | 16 | 17 | ` 18 | 19 | doc, err := libxml2.ParseString(` 20 | 22 | 23 | Hello, world! 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | `) 33 | 34 | if !assert.NoError(t, err, "Parse document should succeed") { 35 | return 36 | } 37 | 38 | s, err := dom.C14NSerialize{Mode: dom.C14NExclusive1_0, WithComments: true}.Serialize(doc) 39 | if !assert.NoError(t, err, "C14N should succeed") { 40 | return 41 | } 42 | t.Logf("C14N -> %s", s) 43 | t.Logf("expected -> %s", expected) 44 | 45 | if !assert.Equal(t, expected, s, "C14N content matches") { 46 | return 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /dom/document_test.go: -------------------------------------------------------------------------------- 1 | package dom_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/lestrrat-go/libxml2/clib" 8 | "github.com/lestrrat-go/libxml2/dom" 9 | "github.com/lestrrat-go/libxml2/types" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | // Tests for DOM Level 3 14 | 15 | func TestDocumentAttributes(t *testing.T) { 16 | doc := dom.CreateDocument() 17 | defer doc.Free() 18 | if doc.Encoding() != "" { 19 | t.Errorf("Encoding should be empty string at first, got '%s'", doc.Encoding()) 20 | } 21 | 22 | if doc.Version() != "1.0" { 23 | t.Errorf("Version should be 1.0 by default, got '%s'", doc.Version()) 24 | } 25 | 26 | if doc.Standalone() != -1 { 27 | t.Errorf("Standalone should be -1 by default, got '%d'", doc.Standalone()) 28 | } 29 | 30 | for _, enc := range []string{"utf-8", "euc-jp", "sjis", "iso-8859-1"} { 31 | doc.SetEncoding(enc) 32 | if doc.Encoding() != enc { 33 | t.Errorf("Expected encoding '%s', got '%s'", enc, doc.Encoding()) 34 | } 35 | } 36 | 37 | for _, v := range []string{"1.5", "4.12", "12.5"} { 38 | doc.SetVersion(v) 39 | if doc.Version() != v { 40 | t.Errorf("Expected version '%s', got '%s'", v, doc.Version()) 41 | } 42 | } 43 | 44 | doc.SetStandalone(1) 45 | if doc.Standalone() != 1 { 46 | t.Errorf("Expected standalone 1, got '%d'", doc.Standalone()) 47 | } 48 | 49 | doc.SetBaseURI("localhost/here.xml") 50 | if doc.URI() != "localhost/here.xml" { 51 | t.Errorf("Expected URI 'localhost/here.xml', got '%s'", doc.URI()) 52 | } 53 | } 54 | 55 | func checkElement(t *testing.T, e types.Element, assertName, testCase string) { 56 | if e == nil { 57 | t.Errorf("%s: Element is nil", testCase) 58 | return 59 | } 60 | 61 | if e.NodeType() != clib.ElementNode { 62 | t.Errorf("%s: Expected node type 'ElementNode', got '%s'", testCase, e.NodeType()) 63 | return 64 | } 65 | 66 | if e.NodeName() != assertName { 67 | t.Errorf("%s: Expected NodeName '%s', got '%s'", testCase, assertName, e.NodeName()) 68 | return 69 | } 70 | } 71 | 72 | func createElementAndCheck(t *testing.T, doc *dom.Document, name, assertName, testCase string) { 73 | node, err := doc.CreateElement(name) 74 | if err != nil { 75 | t.Errorf("Failed to create new element '%s': %s", name, err) 76 | return 77 | } 78 | checkElement(t, node, assertName, testCase) 79 | } 80 | 81 | func withDocument(cb func(*dom.Document)) { 82 | doc := dom.CreateDocument() 83 | defer doc.Free() 84 | 85 | cb(doc) 86 | } 87 | 88 | func TestDocumentCreateElements(t *testing.T) { 89 | withDocument(func(d *dom.Document) { 90 | createElementAndCheck(t, d, "foo", "foo", "Simple Element") 91 | }) 92 | 93 | withDocument(func(d *dom.Document) { 94 | d.SetEncoding("iso-8859-1") 95 | createElementAndCheck(t, d, "foo", "foo", "Create element with document with encoding") 96 | }) 97 | 98 | withDocument(func(d *dom.Document) { 99 | caseName := "Create element with namespace" 100 | const prefix = "foo" 101 | const localName = "bar" 102 | e, err := d.CreateElementNS("http://kungfoo", fmt.Sprintf("%s:%s", prefix, localName)) 103 | if err != nil { 104 | t.Errorf("failed to create namespaced element: %s", err) 105 | return 106 | } 107 | 108 | checkElement(t, e, "foo:bar", caseName) 109 | 110 | if e.Prefix() != prefix { 111 | t.Errorf("%s: Expected prefix '%s', got '%s'", caseName, prefix, e.Prefix()) 112 | } 113 | if e.LocalName() != localName { 114 | t.Errorf("%s: Expected local name '%s', got '%s'", caseName, localName, e.LocalName()) 115 | } 116 | if e.NamespaceURI() != "http://kungfoo" { 117 | t.Errorf("%s: Expected namespace uri '%s', got '%s'", caseName, "http://kungfoo", e.NamespaceURI()) 118 | } 119 | }) 120 | 121 | // Bad elements 122 | withDocument(func(d *dom.Document) { 123 | badnames := []string{";", "&", "<><", "/", "1A"} 124 | for _, name := range badnames { 125 | if _, err := d.CreateElement(name); err == nil { 126 | t.Errorf("Creation of element name '%s' should fail", name) 127 | } 128 | } 129 | }) 130 | } 131 | 132 | func TestDocumentCreateText(t *testing.T) { 133 | withDocument(func(d *dom.Document) { 134 | const nodeName = "foo" 135 | node, err := d.CreateTextNode(nodeName) 136 | if err != nil { 137 | t.Errorf("Failed to create text node: %s", err) 138 | return 139 | } 140 | 141 | if node.NodeType() != clib.TextNode { 142 | t.Errorf("Expected NodeType '%s', got '%s'", clib.TextNode, node.NodeType()) 143 | return 144 | } 145 | 146 | if node.NodeValue() != nodeName { 147 | t.Errorf("Expeted NodeValue '%s', got '%s'", nodeName, node.NodeValue()) 148 | return 149 | } 150 | }) 151 | } 152 | 153 | func TestDocumentCreateComment(t *testing.T) { 154 | withDocument(func(d *dom.Document) { 155 | const nodeName = "foo" 156 | node, err := d.CreateCommentNode(nodeName) 157 | if err != nil { 158 | t.Errorf("Failed to create Comment node: %s", err) 159 | return 160 | } 161 | 162 | if node.NodeType() != clib.CommentNode { 163 | t.Errorf("Expected NodeType '%s', got '%s'", clib.CommentNode, node.NodeType()) 164 | return 165 | } 166 | 167 | if node.NodeValue() != nodeName { 168 | t.Errorf("Expeted NodeValue '%s', got '%s'", nodeName, node.NodeValue()) 169 | return 170 | } 171 | 172 | if node.String() != "" { 173 | t.Errorf("Expeted String() to return 'foo', got '%s'", node.String()) 174 | return 175 | } 176 | }) 177 | } 178 | 179 | func TestDocumentCreateCDataSection(t *testing.T) { 180 | withDocument(func(d *dom.Document) { 181 | const name = "foo" 182 | node, err := d.CreateCDataSection(name) 183 | if err != nil { 184 | t.Errorf("Failed to create CDataSection node: %s", err) 185 | return 186 | } 187 | 188 | if node.NodeType() != clib.CDataSectionNode { 189 | t.Errorf("Expected NodeType '%s', got '%s'", clib.CDataSectionNode, node.NodeType()) 190 | return 191 | } 192 | 193 | if node.NodeValue() != name { 194 | t.Errorf("Expeted NodeValue '%s', got '%s'", name, node.NodeValue()) 195 | return 196 | } 197 | 198 | if node.String() != "" { 199 | t.Errorf("Expeted String() to return 'foo', got '%s'", node.String()) 200 | return 201 | } 202 | }) 203 | } 204 | 205 | func TestDocumentCreateAttribute(t *testing.T) { 206 | withDocument(func(d *dom.Document) { 207 | node, err := d.CreateAttribute("foo", "bar") 208 | if err != nil { 209 | t.Errorf("Failed to create Attribute node: %s", err) 210 | return 211 | } 212 | 213 | if node.NodeType() != clib.AttributeNode { 214 | t.Errorf("Expected NodeType '%s', got '%s'", clib.AttributeNode, node.NodeType()) 215 | return 216 | } 217 | 218 | if node.NodeName() != "foo" { 219 | t.Errorf("Expeted NodeName 'foo', got '%s'", node.NodeName()) 220 | return 221 | } 222 | 223 | if node.NodeValue() != "bar" { 224 | t.Errorf("Expeted NodeValue 'foo', got '%s'", node.NodeValue()) 225 | return 226 | } 227 | 228 | if node.String() != ` foo="bar"` { 229 | t.Errorf(`Expeted String() to return ' foo="bar"', got '%s'`, node.String()) 230 | return 231 | } 232 | 233 | if node.HasChildNodes() { 234 | t.Errorf("Expected HashChildNodes to return false") 235 | return 236 | } 237 | 238 | // Attribute nodes claim to not have any child nodes, but they do?! 239 | content, err := node.FirstChild() 240 | if !assert.NoError(t, err, "Expected FirstChild to return a node") { 241 | return 242 | } 243 | 244 | if content.NodeType() != clib.TextNode { 245 | t.Errorf("Expected content node NodeType '%s', got '%s'", clib.TextNode, content.NodeType()) 246 | return 247 | } 248 | }) 249 | 250 | // Bad elements 251 | withDocument(func(d *dom.Document) { 252 | badnames := []string{";", "&", "<><", "/", "1A"} 253 | for _, name := range badnames { 254 | if _, err := d.CreateAttribute(name, "bar"); err == nil { 255 | t.Errorf("Creation of attribute name '%s' should fail", name) 256 | } 257 | } 258 | }) 259 | } 260 | 261 | func TestDocumentCreateAttributeNS(t *testing.T) { 262 | withDocument(func(d *dom.Document) { 263 | elem, err := d.CreateElement("foo") 264 | if err != nil { 265 | t.Errorf("Failed to create Element node: %s", err) 266 | return 267 | } 268 | d.SetDocumentElement(elem) 269 | 270 | attr, err := d.CreateAttribute("attr", "e & f") 271 | if err != nil { 272 | t.Errorf("Failed to create Attribute node: %s", err) 273 | return 274 | } 275 | elem.AddChild(attr) 276 | 277 | if elem.String() != `` { 278 | t.Errorf(`Expected String '', got '%s'`, elem.String()) 279 | return 280 | } 281 | elem.RemoveAttribute("attr") 282 | 283 | attr, err = d.CreateAttributeNS("", "attr2", "a & b") 284 | if err != nil { 285 | t.Errorf("Failed to create Attribute node: %s", err) 286 | return 287 | } 288 | elem.AddChild(attr) 289 | 290 | if elem.String() != `` { 291 | t.Errorf(`Expected String '', got '%s'`, elem.String()) 292 | return 293 | } 294 | elem.RemoveAttribute("attr2") 295 | 296 | attr, err = d.CreateAttributeNS("http://kungfoo", "foo:attr3", "g & h") 297 | if err != nil { 298 | t.Errorf("Failed to create Attribute node: %s", err) 299 | return 300 | } 301 | elem.AddChild(attr) 302 | 303 | if elem.String() != `` { 304 | t.Errorf(`Expected String '', got '%s'`, elem.String()) 305 | return 306 | } 307 | }) 308 | 309 | withDocument(func(d *dom.Document) { 310 | _, err := d.CreateAttributeNS("http://kungfoo", "kung:foo", "bar") 311 | if err == nil { 312 | t.Errorf("Creating Attribute node w/o root node should have failed") 313 | return 314 | } 315 | 316 | elem, err := d.CreateElement("foo") 317 | if err != nil { 318 | t.Errorf("Failed to create Element node: %s", err) 319 | return 320 | } 321 | d.SetDocumentElement(elem) 322 | 323 | attr, err := d.CreateAttributeNS("http://kungfoo", "kung:foo", "bar") 324 | if err != nil { 325 | t.Errorf("Failed to create Attribute node: %s", err) 326 | return 327 | } 328 | 329 | if attr.NodeName() != "kung:foo" { 330 | t.Errorf("Expected NodeName 'kung:foo', got '%s'", attr.NodeName()) 331 | return 332 | } 333 | 334 | if attr.LocalName() != "foo" { 335 | t.Errorf("Expected LocalName 'foo', got '%s'", attr.LocalName()) 336 | return 337 | } 338 | 339 | if attr.NodeValue() != "bar" { 340 | t.Errorf("Expected NodeValue() 'bar', got '%s'", attr.NodeValue()) 341 | return 342 | } 343 | 344 | attr.SetNodeValue(`bar&`) 345 | if attr.NodeValue() != `bar&` { 346 | t.Errorf("Expected NodeValue() 'bar&', got '%s'", attr.NodeValue()) 347 | return 348 | } 349 | }) 350 | 351 | // Bad elements 352 | withDocument(func(d *dom.Document) { 353 | elem, err := d.CreateElement("foo") 354 | if err != nil { 355 | t.Errorf("Failed to create Element node: %s", err) 356 | return 357 | } 358 | d.SetDocumentElement(elem) 359 | 360 | badnames := []string{";", "&", "<><", "/", "1A"} 361 | for _, name := range badnames { 362 | if _, err := d.CreateAttributeNS("http://kungfoo", name, "bar"); err == nil { 363 | t.Errorf("Creation of attribute name '%s' should fail", name) 364 | } 365 | } 366 | }) 367 | } 368 | -------------------------------------------------------------------------------- /dom/dom.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/lestrrat-go/libxml2/xpath" 7 | ) 8 | 9 | var docPool sync.Pool 10 | 11 | func init() { 12 | SetupXPathCallback() 13 | docPool = sync.Pool{} 14 | docPool.New = func() interface{} { 15 | return &Document{} 16 | } 17 | } 18 | 19 | func SetupXPathCallback() { 20 | xpath.WrapNodeFunc = WrapNode 21 | } 22 | 23 | func WrapDocument(n uintptr) *Document { 24 | //nolint:forcetypeassert 25 | doc := docPool.Get().(*Document) 26 | doc.mortal = false 27 | doc.ptr = n 28 | return doc 29 | } 30 | -------------------------------------------------------------------------------- /dom/interface.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/lestrrat-go/libxml2/clib" 7 | ) 8 | 9 | var ( 10 | ErrAttributeNotFound = clib.ErrAttributeNotFound 11 | ErrInvalidNodeType = errors.New("invalid node type") 12 | ) 13 | 14 | // XMLNodeType identifies the type of the underlying C struct 15 | type XMLNodeType clib.XMLNodeType 16 | 17 | const ( 18 | ElementNode = clib.ElementNode 19 | AttributeNode = clib.AttributeNode 20 | TextNode = clib.TextNode 21 | CDataSectionNode = clib.CDataSectionNode 22 | EntityRefNode = clib.EntityRefNode 23 | EntityNode = clib.EntityNode 24 | PiNode = clib.PiNode 25 | CommentNode = clib.CommentNode 26 | DocumentNode = clib.DocumentNode 27 | DocumentTypeNode = clib.DocumentTypeNode 28 | DocumentFragNode = clib.DocumentFragNode 29 | NotationNode = clib.NotationNode 30 | HTMLDocumentNode = clib.HTMLDocumentNode 31 | DTDNode = clib.DTDNode 32 | ElementDecl = clib.ElementDecl 33 | AttributeDecl = clib.AttributeDecl 34 | EntityDecl = clib.EntityDecl 35 | NamespaceDecl = clib.NamespaceDecl 36 | XIncludeStart = clib.XIncludeStart 37 | XIncludeEnd = clib.XIncludeEnd 38 | DocbDocumentNode = clib.DocbDocumentNode 39 | ) 40 | 41 | type XMLNode struct { 42 | ptr uintptr // *C.xmlNode 43 | mortal bool 44 | } 45 | 46 | type Attribute struct { 47 | XMLNode 48 | } 49 | 50 | type CDataSection struct { 51 | XMLNode 52 | } 53 | 54 | type Pi struct { 55 | XMLNode 56 | } 57 | 58 | type Comment struct { 59 | XMLNode 60 | } 61 | 62 | type Element struct { 63 | XMLNode 64 | } 65 | 66 | type Document struct { 67 | ptr uintptr // *C.xmlDoc 68 | mortal bool 69 | } 70 | 71 | type Text struct { 72 | XMLNode 73 | } 74 | 75 | type Namespace struct { 76 | XMLNode 77 | } 78 | 79 | type Serializer interface { 80 | Serialize(interface{}) (string, error) 81 | } 82 | 83 | // note: Serialize takes an interface because some serializers only allow 84 | // Document, whereas others might allow Nodes 85 | 86 | // C14NMode represents the C14N mode supported by libxml2 87 | type C14NMode int 88 | 89 | //nolint:revive,stylecheck 90 | const ( 91 | C14N1_0 C14NMode = iota 92 | C14NExclusive1_0 93 | C14N1_1 94 | ) 95 | 96 | // C14NSerialize implements the Serializer interface, and generates 97 | // XML in C14N format. 98 | type C14NSerialize struct { 99 | Mode C14NMode 100 | WithComments bool 101 | } 102 | -------------------------------------------------------------------------------- /dom/node.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "github.com/lestrrat-go/libxml2/clib" 5 | "github.com/lestrrat-go/libxml2/types" 6 | "github.com/lestrrat-go/libxml2/xpath" 7 | "github.com/pkg/errors" 8 | ) 9 | 10 | // ChildNodes returns the child nodes 11 | func (n *XMLNode) ChildNodes() (types.NodeList, error) { 12 | list, err := clib.XMLChildNodes(n) 13 | if err != nil { 14 | return nil, errors.Wrap(err, "failed to get child node pointers") 15 | } 16 | 17 | ret := make(types.NodeList, len(list)) 18 | for i, x := range list { 19 | ret[i], err = WrapNode(x) 20 | if err != nil { 21 | return nil, errors.Wrap(err, "failed to wrap node pointer") 22 | } 23 | } 24 | return ret, nil 25 | } 26 | 27 | func (n *XMLNode) RemoveChild(t types.Node) error { 28 | return clib.XMLRemoveChild(n, t) 29 | } 30 | 31 | // Pointer returns the pointer to the underlying C struct 32 | func (n *XMLNode) Pointer() uintptr { 33 | return n.ptr 34 | } 35 | 36 | // String returns the string representation 37 | func (n *XMLNode) String() string { 38 | return n.ToString(0, false) 39 | } 40 | 41 | // OwnerDocument returns the Document that this node belongs to 42 | func (n *XMLNode) OwnerDocument() (types.Document, error) { 43 | ptr, err := clib.XMLOwnerDocument(n) 44 | if err != nil { 45 | return nil, errors.Wrap(err, "failed to get valid owner document") 46 | } 47 | 48 | if ptr == 0 { 49 | return nil, errors.Wrap(clib.ErrInvalidDocument, "failed to get valid owner document") 50 | } 51 | return WrapDocument(ptr), nil 52 | } 53 | 54 | // NodeName returns the node name 55 | func (n *XMLNode) NodeName() string { 56 | s, err := clib.XMLNodeName(n) 57 | if err != nil { 58 | return "" 59 | } 60 | return s 61 | } 62 | 63 | // NodeValue returns the node value 64 | func (n *XMLNode) NodeValue() string { 65 | s, err := clib.XMLNodeValue(n) 66 | if err != nil { 67 | return "" 68 | } 69 | return s 70 | } 71 | 72 | // Literal returns the literal string value 73 | func (n XMLNode) Literal() (string, error) { 74 | return n.String(), nil 75 | } 76 | 77 | // IsSameNode returns true if two nodes point to the same node 78 | func (n *XMLNode) IsSameNode(other types.Node) bool { 79 | return n.Pointer() == other.Pointer() 80 | } 81 | 82 | // Copy creates a copy of the node 83 | func (n *XMLNode) Copy() (types.Node, error) { 84 | doc, err := n.OwnerDocument() 85 | if err != nil { 86 | return nil, errors.Wrap(err, "failed to get owner document") 87 | } 88 | nptr, err := clib.XMLDocCopyNode(n, doc, 1) 89 | if err != nil { 90 | return nil, errors.Wrap(err, "failed to copy document nodes") 91 | } 92 | return WrapNode(nptr) 93 | } 94 | 95 | // SetDocument sets the document of this node and its descendants 96 | func (n *XMLNode) SetDocument(d types.Document) error { 97 | return clib.XMLSetTreeDoc(n, d) 98 | } 99 | 100 | // ParseInContext parses a chunk of XML in the context of the current 101 | // node. This makes it safe to append the resulting node to the current 102 | // node or other nodes in the same document. 103 | func (n *XMLNode) ParseInContext(s string, o int) (types.Node, error) { 104 | nptr, err := clib.XMLParseInNodeContext(n, s, o) 105 | if err != nil { 106 | return nil, errors.Wrap(err, "failed to parse input") 107 | } 108 | return WrapNode(nptr) 109 | } 110 | 111 | // Find evaluates the xpath expression and returns the matching nodes 112 | func (n *XMLNode) Find(expr string) (types.XPathResult, error) { 113 | ctx, err := xpath.NewContext(n) 114 | if err != nil { 115 | return nil, errors.Wrap(err, "failed to create new XPath context") 116 | } 117 | defer ctx.Free() 118 | 119 | return ctx.Find(expr) 120 | } 121 | 122 | // FindExpr evalues the pre-compiled xpath expression and returns the matching nodes 123 | func (n *XMLNode) FindExpr(expr *xpath.Expression) (types.XPathResult, error) { 124 | ctx, err := xpath.NewContext(n) 125 | if err != nil { 126 | return nil, errors.Wrap(err, "failed to create new XPath context") 127 | } 128 | defer ctx.Free() 129 | 130 | return ctx.FindExpr(expr) 131 | } 132 | 133 | // HasChildNodes returns true if the node contains children 134 | func (n *XMLNode) HasChildNodes() bool { 135 | return clib.XMLHasChildNodes(n) 136 | } 137 | 138 | // FirstChild reutrns the first child node 139 | func (n *XMLNode) FirstChild() (types.Node, error) { 140 | ptr, err := clib.XMLFirstChild(n) 141 | if err != nil { 142 | return nil, errors.Wrap(err, "failed to get valid pointer to first child") 143 | } 144 | return WrapNode(ptr) 145 | } 146 | 147 | // LastChild returns the last child node 148 | func (n *XMLNode) LastChild() (types.Node, error) { 149 | ptr, err := clib.XMLLastChild(n) 150 | if err != nil { 151 | return nil, errors.Wrap(err, "failed to get valid pointer to first child") 152 | } 153 | return WrapNode(ptr) 154 | } 155 | 156 | // LocalName returns the local name 157 | func (n *XMLNode) LocalName() string { 158 | return clib.XMLLocalName(n) 159 | } 160 | 161 | // NamespaceURI returns the namespace URI associated with this node 162 | func (n *XMLNode) NamespaceURI() string { 163 | return clib.XMLNamespaceURI(n) 164 | } 165 | 166 | // NextSibling returns the next sibling 167 | func (n *XMLNode) NextSibling() (types.Node, error) { 168 | ptr, err := clib.XMLNextSibling(n) 169 | if err != nil { 170 | return nil, errors.Wrap(err, "failed to get valid pointer to next child") 171 | } 172 | if ptr == 0 { 173 | return nil, nil 174 | } 175 | return WrapNode(ptr) 176 | } 177 | 178 | // ParentNode returns the parent node 179 | func (n *XMLNode) ParentNode() (types.Node, error) { 180 | ptr, err := clib.XMLParentNode(n) 181 | if err != nil { 182 | return nil, errors.Wrap(err, "failed to get valid pointer to parent node") 183 | } 184 | 185 | return WrapNode(ptr) 186 | } 187 | 188 | // Prefix returns the prefix from the node name, if any 189 | func (n *XMLNode) Prefix() string { 190 | return clib.XMLPrefix(n) 191 | } 192 | 193 | // PreviousSibling returns the previous sibling 194 | func (n *XMLNode) PreviousSibling() (types.Node, error) { 195 | ptr, err := clib.XMLPreviousSibling(n) 196 | if err != nil { 197 | return nil, errors.Wrap(err, "failed to get valid pointer to previous child") 198 | } 199 | 200 | return WrapNode(ptr) 201 | } 202 | 203 | // SetNodeName sets the node name 204 | func (n *XMLNode) SetNodeName(name string) { 205 | _ = clib.XMLSetNodeName(n, name) 206 | } 207 | 208 | // SetNodeValue sets the node value 209 | func (n *XMLNode) SetNodeValue(value string) { 210 | _ = clib.XMLSetNodeValue(n, value) 211 | } 212 | 213 | // AddChild appends the node 214 | func (n *XMLNode) AddChild(child types.Node) error { 215 | return clib.XMLAddChild(n, child) 216 | } 217 | 218 | // TextContent returns the text content 219 | func (n *XMLNode) TextContent() string { 220 | return clib.XMLTextContent(n) 221 | } 222 | 223 | // ToString returns the string representation. (But it should probably 224 | // be deprecated) 225 | func (n *XMLNode) ToString(format int, docencoding bool) string { 226 | return clib.XMLToString(n, format, docencoding) 227 | } 228 | 229 | // LookupNamespacePrefix returns the prefix associated with the given URL 230 | func (n *XMLNode) LookupNamespacePrefix(href string) (string, error) { 231 | return clib.XMLLookupNamespacePrefix(n, href) 232 | } 233 | 234 | // LookupNamespaceURI returns the URI associated with the given prefix 235 | func (n *XMLNode) LookupNamespaceURI(prefix string) (string, error) { 236 | return clib.XMLLookupNamespaceURI(n, prefix) 237 | } 238 | 239 | // NodeType returns the XMLNodeType 240 | func (n *XMLNode) NodeType() clib.XMLNodeType { 241 | return clib.XMLGetNodeType(n) 242 | } 243 | 244 | // MakeMortal flags the node so that `AutoFree` calls Free() 245 | // to release the underlying C resources. 246 | func (n *XMLNode) MakeMortal() { 247 | n.mortal = true 248 | } 249 | 250 | // MakePersistent flags the node so that `AutoFree` becomes a no-op. 251 | // Make sure to call this if you used `MakeMortal` and `AutoFree`, 252 | // but you then decided to keep the node around. 253 | func (n *XMLNode) MakePersistent() { 254 | n.mortal = false 255 | } 256 | 257 | // Free releases the underlying C struct 258 | func (n *XMLNode) Free() { 259 | _ = clib.XMLFreeNode(n) 260 | n.ptr = 0 261 | } 262 | 263 | func walk(n types.Node, fn func(types.Node) error) error { 264 | if err := fn(n); err != nil { 265 | return errors.Wrap(err, "failed to call callback") 266 | } 267 | children, err := n.ChildNodes() 268 | if err != nil { 269 | return errors.Wrap(err, "failed to fetch child nodes") 270 | } 271 | for _, c := range children { 272 | if err := walk(c, fn); err != nil { 273 | return errors.Wrap(err, "failed to walk to child nodes") 274 | } 275 | } 276 | return nil 277 | } 278 | 279 | // Walk traverses through all of the nodes 280 | func (n *XMLNode) Walk(fn func(types.Node) error) error { 281 | return walk(n, fn) 282 | } 283 | 284 | // AutoFree allows you to free the underlying C resources. It is 285 | // meant to be called from defer. If you don't call `MakeMortal()` or 286 | // do call `MakePersistent()`, AutoFree is a no-op. 287 | func (n *XMLNode) AutoFree() { 288 | if !n.mortal { 289 | return 290 | } 291 | n.Free() 292 | } 293 | -------------------------------------------------------------------------------- /dom/node_attr.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "github.com/lestrrat-go/libxml2/clib" 5 | ) 6 | 7 | // Free releases the underlying C struct 8 | func (n *Attribute) Free() { 9 | _ = clib.XMLFreeProp(n) 10 | } 11 | 12 | // HasChildNodes returns true if the node contains any child nodes. 13 | // By definition attributes cannot have children, so this always 14 | // returns false 15 | func (n *Attribute) HasChildNodes() bool { 16 | return false 17 | } 18 | 19 | // Value returns the value of the attribute. 20 | func (n *Attribute) Value() string { 21 | v, err := clib.XMLNodeValue(n) 22 | if err != nil { 23 | return "" 24 | } 25 | return v 26 | } 27 | -------------------------------------------------------------------------------- /dom/node_document.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "github.com/lestrrat-go/libxml2/clib" 5 | "github.com/lestrrat-go/libxml2/types" 6 | "github.com/pkg/errors" 7 | ) 8 | 9 | // CreateDocument creates a new document with version="1.0", and no encoding 10 | func CreateDocument() *Document { 11 | return NewDocument("1.0", "") 12 | } 13 | 14 | // NewDocument creates a new document 15 | func NewDocument(version, encoding string) *Document { 16 | ptr := clib.XMLCreateDocument(version, encoding) 17 | return WrapDocument(ptr) 18 | } 19 | 20 | // Pointer returns the pointer to the underlying C struct 21 | func (d *Document) Pointer() uintptr { 22 | return d.ptr 23 | } 24 | 25 | // AutoFree calls Free() if the document is moral. 26 | func (d *Document) AutoFree() { 27 | if !d.mortal { 28 | return 29 | } 30 | d.Free() 31 | } 32 | 33 | // MakeMortal sets the flag 34 | func (d *Document) MakeMortal() { 35 | d.mortal = true 36 | } 37 | 38 | // MakePersistent unsets the flag 39 | func (d *Document) MakePersistent() { 40 | d.mortal = false 41 | } 42 | 43 | // IsSameNode checks if the underlying C pointer points to the same C struct 44 | func (d *Document) IsSameNode(n types.Node) bool { 45 | return d.ptr == n.Pointer() 46 | } 47 | 48 | // HasChildNodes returns true if the document node is available 49 | func (d *Document) HasChildNodes() bool { 50 | _, err := d.DocumentElement() 51 | return err != nil 52 | } 53 | 54 | // FirstChild returns the document element 55 | func (d *Document) FirstChild() (types.Node, error) { 56 | root, err := d.DocumentElement() 57 | if err != nil { 58 | return nil, errors.Wrap(err, "failed to get document element") 59 | } 60 | 61 | return root, nil 62 | } 63 | 64 | // LastChild returns the document element 65 | func (d *Document) LastChild() (types.Node, error) { 66 | root, err := d.DocumentElement() 67 | if err != nil { 68 | return nil, errors.Wrap(err, "failed to get document element") 69 | } 70 | 71 | return root, nil 72 | } 73 | 74 | // NextSibling always returns nil for Document 75 | func (d *Document) NextSibling() (types.Node, error) { 76 | return nil, errors.New("document has no siblings") 77 | } 78 | 79 | // PreviousSibling always returns nil for Document 80 | func (d *Document) PreviousSibling() (types.Node, error) { 81 | return nil, errors.New("document has no siblings") 82 | } 83 | 84 | // NodeName always returns an empty string for Document 85 | func (d *Document) NodeName() string { 86 | return "" 87 | } 88 | 89 | // SetNodeName is a no op for document 90 | func (d *Document) SetNodeName(_ string) { 91 | // return errors.New("cannot set node name on a document") 92 | } 93 | 94 | // NodeValue always returns an empty string for Document 95 | func (d *Document) NodeValue() string { 96 | return "" 97 | } 98 | 99 | // SetNodeValue is a no op for document 100 | func (d *Document) SetNodeValue(_ string) { 101 | // return errors.New("cannot set node value on a document") 102 | } 103 | 104 | // OwnerDocument always returns the document itself 105 | func (d *Document) OwnerDocument() (types.Document, error) { 106 | return d, nil 107 | } 108 | 109 | // SetDocument always returns an error for a document 110 | func (d *Document) SetDocument(_ types.Document) error { 111 | return errors.New("cannot set document on a document") 112 | } 113 | 114 | // ParentNode always returns an error for a document 115 | func (d *Document) ParentNode() (types.Node, error) { 116 | return nil, errors.New("document has no parent node") 117 | } 118 | 119 | // ParseInContext is currently unimplemented 120 | func (d *Document) ParseInContext(_ string, _ int) (types.Node, error) { 121 | return nil, errors.New("unimplemented") 122 | } 123 | 124 | // Literal is currently just an alias to Dump(false) 125 | func (d *Document) Literal() (string, error) { 126 | return d.Dump(false), nil 127 | } 128 | 129 | // TextContent returns the text content 130 | func (d *Document) TextContent() string { 131 | return clib.XMLTextContent(d) 132 | } 133 | 134 | // ToString is currently just an alias to Dump(false) 135 | func (d *Document) ToString(_ int, b bool) string { 136 | return d.Dump(b) 137 | } 138 | 139 | // ChildNodes returns the document element 140 | func (d *Document) ChildNodes() (types.NodeList, error) { 141 | root, err := d.DocumentElement() 142 | if err != nil { 143 | return nil, errors.Wrap(err, "failed to get document element") 144 | } 145 | 146 | return []types.Node{root}, nil 147 | } 148 | 149 | // Copy is currently unimplemented 150 | func (d *Document) Copy() (types.Node, error) { 151 | // Unimplemented 152 | return nil, errors.New("unimplemented") 153 | } 154 | 155 | // AddChild is a no op for Document 156 | func (d *Document) AddChild(_ types.Node) error { 157 | return errors.New("method AddChild is not available for Document node") 158 | } 159 | 160 | // CreateAttribute creates a new attribute 161 | func (d *Document) CreateAttribute(k, v string) (*Attribute, error) { 162 | attr, err := clib.XMLNewDocProp(d, k, v) 163 | if err != nil { 164 | return nil, errors.Wrap(err, "failed to get document property") 165 | } 166 | return wrapAttributeNode(attr), nil 167 | } 168 | 169 | // CreateAttributeNS creates a new attribute with the given XML namespace 170 | func (d *Document) CreateAttributeNS(nsuri, k, v string) (*Attribute, error) { 171 | if nsuri == "" { 172 | return d.CreateAttribute(k, v) 173 | } 174 | 175 | ptr, err := clib.XMLCreateAttributeNS(d, nsuri, k, v) 176 | if err != nil { 177 | return nil, errors.Wrap(err, "failed to create attribute") 178 | } 179 | return wrapAttributeNode(ptr), nil 180 | } 181 | 182 | // CreateCDataSection creates a new CDATA section node 183 | func (d *Document) CreateCDataSection(txt string) (*CDataSection, error) { 184 | cdata, err := clib.XMLNewCDataBlock(d, txt) 185 | if err != nil { 186 | return nil, errors.Wrap(err, "failed to create CDATA block") 187 | } 188 | return wrapCDataSectionNode(cdata), nil 189 | } 190 | 191 | // CreateCommentNode creates a new comment node 192 | func (d *Document) CreateCommentNode(txt string) (*Comment, error) { 193 | ptr, err := clib.XMLNewComment(txt) 194 | if err != nil { 195 | return nil, errors.Wrap(err, "failed to create comment") 196 | } 197 | return wrapCommentNode(ptr), nil 198 | } 199 | 200 | // CreateElement creates a new element node 201 | func (d *Document) CreateElement(name string) (types.Element, error) { 202 | ptr, err := clib.XMLCreateElement(d, name) 203 | if err != nil { 204 | return nil, errors.Wrap(err, "failed to create element") 205 | } 206 | return wrapElementNode(ptr), nil 207 | } 208 | 209 | // CreateElementNS creates a new element node in the given XML namespace 210 | func (d *Document) CreateElementNS(nsuri, name string) (types.Element, error) { 211 | ptr, err := clib.XMLCreateElementNS(d, nsuri, name) 212 | if err != nil { 213 | return nil, errors.Wrap(err, "failed to create element") 214 | } 215 | return wrapElementNode(ptr), nil 216 | } 217 | 218 | // CreateTextNode creates a new text node 219 | func (d *Document) CreateTextNode(txt string) (*Text, error) { 220 | ptr, err := clib.XMLNewText(txt) 221 | if err != nil { 222 | return nil, errors.Wrap(err, "failed to create text node") 223 | } 224 | return wrapTextNode(ptr), nil 225 | } 226 | 227 | // DocumentElement returns the root node of the document 228 | func (d *Document) DocumentElement() (types.Node, error) { 229 | n, err := clib.XMLDocumentElement(d) 230 | if err != nil { 231 | return nil, errors.Wrap(err, "failed to get document element") 232 | } 233 | return WrapNode(n) 234 | } 235 | 236 | // Find returns the nodes that can be selected with the 237 | // given xpath string 238 | func (d *Document) Find(xpath string) (types.XPathResult, error) { 239 | root, err := d.DocumentElement() 240 | if err != nil { 241 | return nil, errors.Wrap(err, "failed to get document element") 242 | } 243 | return root.Find(xpath) 244 | } 245 | 246 | // Encoding returns the d 247 | func (d *Document) Encoding() string { 248 | return clib.XMLDocumentEncoding(d) 249 | } 250 | 251 | // Free releases the underlying C struct 252 | func (d *Document) Free() { 253 | _ = clib.XMLFreeDoc(d) 254 | d.ptr = 0 255 | docPool.Put(d) 256 | } 257 | 258 | // String formats the document, always without formatting. 259 | func (d *Document) String() string { 260 | return clib.XMLDocumentString(d, d.Encoding(), false) 261 | } 262 | 263 | // Dump formats the document with or withour formatting. 264 | func (d *Document) Dump(format bool) string { 265 | return clib.XMLDocumentString(d, d.Encoding(), format) 266 | } 267 | 268 | // NodeType returns the XMLNodeType 269 | func (d *Document) NodeType() clib.XMLNodeType { 270 | return DocumentNode 271 | } 272 | 273 | // SetBaseURI sets the base URI 274 | func (d *Document) SetBaseURI(s string) { 275 | clib.XMLNodeSetBase(d, s) 276 | } 277 | 278 | // SetDocumentElement sets the document element 279 | func (d *Document) SetDocumentElement(n types.Node) error { 280 | return clib.XMLSetDocumentElement(d, n) 281 | } 282 | 283 | // SetEncoding sets the encoding of the document 284 | func (d *Document) SetEncoding(e string) { 285 | clib.XMLSetDocumentEncoding(d, e) 286 | } 287 | 288 | // SetStandalone sets the standalone flag 289 | func (d *Document) SetStandalone(v int) { 290 | clib.XMLSetDocumentStandalone(d, v) 291 | } 292 | 293 | // SetVersion sets the version of the document 294 | func (d *Document) SetVersion(v string) { 295 | clib.XMLSetDocumentVersion(d, v) 296 | } 297 | 298 | // Standalone returns the value of the standalone flag 299 | func (d *Document) Standalone() int { 300 | return clib.XMLDocumentStandalone(d) 301 | } 302 | 303 | // URI returns the document URI 304 | func (d *Document) URI() string { 305 | return clib.XMLDocumentURI(d) 306 | } 307 | 308 | // Version returns the version of the document 309 | func (d *Document) Version() string { 310 | return clib.XMLDocumentVersion(d) 311 | } 312 | 313 | // Walk traverses the nodes in the document 314 | func (d *Document) Walk(fn func(types.Node) error) error { 315 | root, err := d.DocumentElement() 316 | if err != nil { 317 | return errors.Wrap(err, "failed to get document element") 318 | } 319 | return walk(root, fn) 320 | } 321 | 322 | // LookupNamespacePrefix looks for a namespace prefix that matches 323 | // the given namespace URI 324 | func (d *Document) LookupNamespacePrefix(href string) (string, error) { 325 | root, err := d.DocumentElement() 326 | if err != nil { 327 | return "", errors.Wrap(err, "failed to get document element") 328 | } 329 | 330 | return root.LookupNamespacePrefix(href) 331 | } 332 | 333 | // LookupNamespaceURI looks for a namespace uri that matches 334 | // the given namespace prefix 335 | func (d *Document) LookupNamespaceURI(prefix string) (string, error) { 336 | root, err := d.DocumentElement() 337 | if err != nil { 338 | return "", errors.Wrap(err, "failed to get document element") 339 | } 340 | 341 | return root.LookupNamespaceURI(prefix) 342 | } 343 | 344 | func (d *Document) RemoveChild(n types.Node) error { 345 | root, err := d.DocumentElement() 346 | if err != nil { 347 | return errors.Wrap(err, "failed to get document element") 348 | } 349 | return root.RemoveChild(n) 350 | } 351 | -------------------------------------------------------------------------------- /dom/node_element.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "strings" 7 | 8 | "github.com/lestrrat-go/libxml2/clib" 9 | "github.com/lestrrat-go/libxml2/types" 10 | ) 11 | 12 | // SetNamespace sets up a new namespace on the given node. 13 | // An XML namespace declaration is explicitly created only if 14 | // the activate flag is enabled, and the namespace is not 15 | // declared in a previous tree hierarchy. 16 | func (n *Element) SetNamespace(uri, prefix string, activate ...bool) error { 17 | var activateflag bool 18 | if len(activate) < 1 { 19 | activateflag = true 20 | } else { 21 | activateflag = activate[0] 22 | } 23 | 24 | if uri == "" && prefix == "" { 25 | // Empty namespace 26 | doc, err := n.OwnerDocument() 27 | if err != nil { 28 | return err 29 | } 30 | nsptr, err := clib.XMLSearchNs(doc, n, "") 31 | if err != nil { 32 | return err 33 | } 34 | 35 | ns := wrapNamespaceNode(nsptr) 36 | if ns.URI() != "" { 37 | if activateflag { 38 | _ = clib.XMLSetNs(n, nil) 39 | } 40 | } 41 | return nil 42 | } 43 | 44 | if uri == "" { 45 | return errors.New("missing uri for SetNamespace") 46 | } 47 | 48 | ns, err := clib.XMLNewNs(n, uri, prefix) 49 | if err != nil { 50 | return err 51 | } 52 | 53 | if activateflag { 54 | if err := clib.XMLSetNs(n, wrapNamespaceNode(ns)); err != nil { 55 | return err 56 | } 57 | } 58 | return nil 59 | } 60 | 61 | // AppendText adds a new text node 62 | func (n *Element) AppendText(s string) error { 63 | return clib.XMLAppendText(n, s) 64 | } 65 | 66 | // SetAttribute sets an attribute 67 | func (n *Element) SetAttribute(name, value string) error { 68 | return clib.XMLSetProp(n, name, value) 69 | } 70 | 71 | // GetAttribute retrieves the value of an attribute 72 | func (n *Element) GetAttribute(name string) (types.Attribute, error) { 73 | attrNode, err := clib.XMLElementGetAttributeNode(n, name) 74 | if err != nil { 75 | return nil, err 76 | } 77 | return wrapAttributeNode(attrNode), nil 78 | } 79 | 80 | // Attributes returns a list of attributes on a node 81 | func (n *Element) Attributes() ([]types.Attribute, error) { 82 | attrs, err := clib.XMLElementAttributes(n) 83 | if err != nil { 84 | return nil, err 85 | } 86 | ret := make([]types.Attribute, len(attrs)) 87 | for i, attr := range attrs { 88 | ret[i] = wrapAttributeNode(attr) 89 | } 90 | return ret, nil 91 | } 92 | 93 | // RemoveAttribute completely removes an attribute from the node 94 | func (n *Element) RemoveAttribute(name string) error { 95 | i := strings.IndexByte(name, ':') 96 | if i == -1 { 97 | return clib.XMLUnsetProp(n, name) 98 | } 99 | 100 | // look for the prefix 101 | doc, err := n.OwnerDocument() 102 | if err != nil { 103 | return err 104 | } 105 | ns, err := clib.XMLSearchNs(doc, n, name[:i]) 106 | if err != nil { 107 | return ErrAttributeNotFound 108 | } 109 | 110 | return clib.XMLUnsetNsProp(n, wrapNamespaceNode(ns), name) 111 | } 112 | 113 | // GetNamespaces returns Namespace objects associated with this 114 | // element. WARNING: This method currently returns namespace 115 | // objects which allocates C structures for each namespace. 116 | // Therefore you MUST free the structures, or otherwise you 117 | // WILL leak memory. 118 | func (n *Element) GetNamespaces() ([]types.Namespace, error) { 119 | list, err := clib.XMLElementNamespaces(n) 120 | if err != nil { 121 | return nil, err 122 | } 123 | ret := make([]types.Namespace, len(list)) 124 | for i, nsptr := range list { 125 | ret[i] = wrapNamespaceNode(nsptr) 126 | } 127 | return ret, nil 128 | } 129 | 130 | // Literal returns a stringified version of this node and its 131 | // children, inclusive. 132 | func (n Element) Literal() (string, error) { 133 | buf := bytes.Buffer{} 134 | children, err := n.ChildNodes() 135 | if err != nil { 136 | return "", err 137 | } 138 | for _, c := range children { 139 | l, err := c.Literal() 140 | if err != nil { 141 | return "", err 142 | } 143 | buf.WriteString(l) 144 | } 145 | return buf.String(), nil 146 | } 147 | -------------------------------------------------------------------------------- /dom/node_namespace.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "github.com/lestrrat-go/libxml2/clib" 5 | ) 6 | 7 | // URI returns the namespace URL 8 | func (n *Namespace) URI() string { 9 | return clib.XMLNamespaceHref(n) 10 | } 11 | 12 | // Prefix returns the prefix for this namespace 13 | func (n *Namespace) Prefix() string { 14 | return clib.XMLNamespacePrefix(n) 15 | } 16 | 17 | // Free releases the underlying C struct 18 | func (n *Namespace) Free() { 19 | clib.XMLNamespaceFree(n) 20 | n.ptr = 0 21 | } 22 | 23 | // String returns the stringified Namespace 24 | func (n *Namespace) String() string { 25 | prefix := n.Prefix() 26 | if prefix != "" { 27 | prefix = ":" + prefix 28 | } 29 | return "xmlns" + prefix + `="` + n.URI() + `"` 30 | } 31 | -------------------------------------------------------------------------------- /dom/node_test.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/lestrrat-go/libxml2/clib" 8 | "github.com/lestrrat-go/libxml2/types" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func init() { 13 | clib.ReportErrors(false) 14 | } 15 | 16 | type XMLNodeTypeToString struct { 17 | v clib.XMLNodeType 18 | e string 19 | } 20 | 21 | func TestXMLNodeTypeStringer(t *testing.T) { 22 | values := []XMLNodeTypeToString{ 23 | { 24 | v: ElementNode, 25 | e: "ElementNode", 26 | }, 27 | { 28 | v: AttributeNode, 29 | e: "AttributeNode", 30 | }, 31 | { 32 | v: TextNode, 33 | e: "TextNode", 34 | }, 35 | { 36 | v: CDataSectionNode, 37 | e: "CDataSectionNode", 38 | }, 39 | { 40 | v: EntityRefNode, 41 | e: "EntityRefNode", 42 | }, 43 | { 44 | v: EntityNode, 45 | e: "EntityNode", 46 | }, 47 | { 48 | v: PiNode, 49 | e: "PiNode", 50 | }, 51 | { 52 | v: CommentNode, 53 | e: "CommentNode", 54 | }, 55 | { 56 | v: DocumentNode, 57 | e: "DocumentNode", 58 | }, 59 | { 60 | v: DocumentTypeNode, 61 | e: "DocumentTypeNode", 62 | }, 63 | { 64 | v: DocumentFragNode, 65 | e: "DocumentFragNode", 66 | }, 67 | { 68 | v: NotationNode, 69 | e: "NotationNode", 70 | }, 71 | { 72 | v: HTMLDocumentNode, 73 | e: "HTMLDocumentNode", 74 | }, 75 | { 76 | v: DTDNode, 77 | e: "DTDNode", 78 | }, 79 | { 80 | v: ElementDecl, 81 | e: "ElementDecl", 82 | }, 83 | { 84 | v: AttributeDecl, 85 | e: "AttributeDecl", 86 | }, 87 | { 88 | v: EntityDecl, 89 | e: "EntityDecl", 90 | }, 91 | { 92 | v: NamespaceDecl, 93 | e: "NamespaceDecl", 94 | }, 95 | { 96 | v: XIncludeStart, 97 | e: "XIncludeStart", 98 | }, 99 | { 100 | v: XIncludeEnd, 101 | e: "XIncludeEnd", 102 | }, 103 | { 104 | v: DocbDocumentNode, 105 | e: "DocbDocumentNode", 106 | }, 107 | } 108 | 109 | for _, d := range values { 110 | if d.v.String() != d.e { 111 | t.Errorf("e '%s', got '%s'", d.e, d.v.String()) 112 | } 113 | } 114 | } 115 | 116 | func TestDOM(t *testing.T) { 117 | doc := CreateDocument() 118 | defer doc.Free() 119 | 120 | root, err := doc.CreateElement("root") 121 | if err != nil { 122 | t.Errorf("Failed to create root element: %s", err) 123 | return 124 | } 125 | 126 | doc.SetDocumentElement(root) 127 | var toRemove types.Node 128 | for i := 1; i <= 3; i++ { 129 | child, err := doc.CreateElement(fmt.Sprintf("child%d", i)) 130 | if !assert.NoError(t, err, "dom.CreateElement(child%d) should succeed", i) { 131 | return 132 | } 133 | child.AppendText(fmt.Sprintf("text%d", i)) 134 | root.AddChild(child) 135 | 136 | if i == 2 { 137 | toRemove = child 138 | } 139 | } 140 | 141 | // Temporary test 142 | expected := ` 143 | text1text2text3 144 | ` 145 | if !assert.Equal(t, expected, doc.String(), "Failed to create XML document") { 146 | return 147 | } 148 | 149 | if !assert.NoError(t, root.RemoveChild(toRemove), "RemoveChild should succeed") { 150 | return 151 | } 152 | expected = ` 153 | text1text3 154 | ` 155 | if !assert.Equal(t, expected, doc.String(), "XML should match") { 156 | return 157 | } 158 | } 159 | 160 | func TestNode_StandaloneWithNamespaces(t *testing.T) { 161 | uri := "http://kungfoo" 162 | prefix := "foo" 163 | name := "bar" 164 | 165 | doc := CreateDocument() 166 | elem, err := doc.CreateElementNS(uri, prefix+":"+name) 167 | if !assert.NoError(t, err, "CreateElementNS snould succeed") { 168 | return 169 | } 170 | 171 | lookedup, err := elem.LookupNamespaceURI(prefix) 172 | if !assert.NoError(t, err, "LookupNamespaceURI should succeed") { 173 | return 174 | } 175 | if !assert.Equal(t, uri, lookedup, "LookupNamespaceURI succeeds") { 176 | return 177 | } 178 | 179 | lookedup, err = elem.LookupNamespacePrefix(uri) 180 | if !assert.NoError(t, err, "LookupNamespacePrefix should succeed") { 181 | return 182 | } 183 | if !assert.Equal(t, prefix, lookedup, "LookupNamespacePrefix succeeds") { 184 | return 185 | } 186 | 187 | nslist, err := elem.GetNamespaces() 188 | if !assert.NoError(t, err, "GetNamespaces succeeds") { 189 | return 190 | } 191 | 192 | defer func() { 193 | for _, ns := range nslist { 194 | ns.Free() 195 | } 196 | }() 197 | 198 | if !assert.Len(t, nslist, 1, "GetNamespaces returns 1 namespace") { 199 | return 200 | } 201 | } 202 | 203 | func TestAttribute(t *testing.T) { 204 | doc := CreateDocument() 205 | attr, err := doc.CreateAttribute("foo", "bar") 206 | if !assert.NoError(t, err, "attribute created") { 207 | return 208 | } 209 | 210 | if !assert.NotPanics(t, func() { attr.Free() }, "free should not panic") { 211 | return 212 | } 213 | } 214 | 215 | func TestCreateElementNS(t *testing.T) { 216 | doc := CreateDocument() 217 | root, err := doc.CreateElementNS("http://foo.bar.baz", "foo:root") 218 | if !assert.NoError(t, err, "CreateElementNS should succeed") { 219 | return 220 | } 221 | doc.SetDocumentElement(root) 222 | 223 | n1, err := doc.CreateElementNS("http://foo.bar.baz", "foo:n1") 224 | if !assert.NoError(t, err, "CreateElementNS should succeed") { 225 | return 226 | } 227 | root.AddChild(n1) 228 | 229 | n2, err := doc.CreateElementNS("http://foo.bar.baz", "bar:n2") 230 | if !assert.NoError(t, err, "CreateElementNS should succeed") { 231 | return 232 | } 233 | root.AddChild(n2) 234 | 235 | _, err = doc.CreateElementNS("http://foo.bar.baz.quux", "foo:n3") 236 | if !assert.Error(t, err, "CreateElementNS should fail") { 237 | return 238 | } 239 | 240 | t.Logf("%s", doc.Dump(false)) 241 | } 242 | -------------------------------------------------------------------------------- /dom/node_text.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "github.com/lestrrat-go/libxml2/clib" 5 | ) 6 | 7 | func (n *CDataSection) Literal() (string, error) { 8 | return clib.XMLNodeValue(n) 9 | } 10 | 11 | // Data returns the content associated with this node 12 | func (n *Text) Data() string { 13 | return clib.XMLTextData(n) 14 | } 15 | -------------------------------------------------------------------------------- /dom/node_wrap.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | // Auto-generated by internal/cmd/genwrapnode/genwrapnode.go. DO NOT EDIT! 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/lestrrat-go/libxml2/clib" 9 | "github.com/lestrrat-go/libxml2/types" 10 | ) 11 | 12 | func wrapNamespaceNode(ptr uintptr) *Namespace { 13 | var n Namespace 14 | n.ptr = ptr 15 | return &n 16 | } 17 | 18 | func wrapAttributeNode(ptr uintptr) *Attribute { 19 | var n Attribute 20 | n.ptr = ptr 21 | return &n 22 | } 23 | 24 | func wrapCDataSectionNode(ptr uintptr) *CDataSection { 25 | var n CDataSection 26 | n.ptr = ptr 27 | return &n 28 | } 29 | 30 | func wrapCommentNode(ptr uintptr) *Comment { 31 | var n Comment 32 | n.ptr = ptr 33 | return &n 34 | } 35 | 36 | func wrapElementNode(ptr uintptr) *Element { 37 | var n Element 38 | n.ptr = ptr 39 | return &n 40 | } 41 | 42 | func wrapTextNode(ptr uintptr) *Text { 43 | var n Text 44 | n.ptr = ptr 45 | return &n 46 | } 47 | 48 | func wrapPiNode(ptr uintptr) *Pi { 49 | var n Pi 50 | n.ptr = ptr 51 | return &n 52 | } 53 | 54 | // WrapNode is a function created with the sole purpose of allowing 55 | // go-libxml2 consumers that can generate a C.xmlNode pointer to 56 | // create libxml2.Node types, e.g. go-xmlsec. 57 | func WrapNode(n uintptr) (types.Node, error) { 58 | switch typ := clib.XMLGetNodeTypeRaw(n); typ { 59 | case clib.AttributeNode: 60 | return wrapAttributeNode(n), nil 61 | case clib.CDataSectionNode: 62 | return wrapCDataSectionNode(n), nil 63 | case clib.CommentNode: 64 | return wrapCommentNode(n), nil 65 | case clib.ElementNode: 66 | return wrapElementNode(n), nil 67 | case clib.TextNode: 68 | return wrapTextNode(n), nil 69 | case clib.PiNode: 70 | return wrapPiNode(n), nil 71 | default: 72 | return nil, fmt.Errorf("unknown node: %d", typ) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /dom/serialize.go: -------------------------------------------------------------------------------- 1 | package dom 2 | 3 | import ( 4 | "github.com/lestrrat-go/libxml2/clib" 5 | "github.com/lestrrat-go/libxml2/types" 6 | ) 7 | 8 | // Serialize produces serialization of the document, canonicalized. 9 | func (s C14NSerialize) Serialize(n types.Node) (string, error) { 10 | /* 11 | * Below document is taken from libxml2 directly. Pay special attention 12 | * to the required settings when parsing the document to be canonicalized. 13 | * 14 | * --- 15 | * Canonical form of an XML document could be created if and only if 16 | * a) default attributes (if any) are added to all nodes 17 | * b) all character and parsed entity references are resolved 18 | * In order to achieve this in libxml2 the document MUST be loaded with 19 | * following global setings: 20 | * 21 | * xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS; 22 | * xmlSubstituteEntitiesDefault(1); 23 | * 24 | * or corresponding parser context setting: 25 | * xmlParserCtxtPtr ctxt; 26 | * 27 | * ... 28 | * ctxt->loadsubset = XML_DETECT_IDS | XML_COMPLETE_ATTRS; 29 | * ctxt->replaceEntities = 1; 30 | * ... 31 | * --- 32 | * 33 | * In go-libxml2, this translates to: 34 | * 35 | * options = XMLParserDTDLoad | XMLParserDTDAttr | XMLParserNoEnt 36 | * 37 | */ 38 | switch n.(type) { 39 | case *Document: 40 | default: 41 | return "", ErrInvalidNodeType 42 | } 43 | 44 | return clib.XMLC14NDocDumpMemory(n, int(s.Mode), s.WithComments) 45 | } 46 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lestrrat-go/libxml2 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/pkg/errors v0.9.1 7 | github.com/stretchr/testify v1.8.4 8 | gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7 9 | ) 10 | 11 | require ( 12 | github.com/davecgh/go-spew v1.1.1 // indirect 13 | github.com/pmezard/go-difflib v1.0.0 // indirect 14 | gopkg.in/yaml.v3 v3.0.1 // indirect 15 | launchpad.net/gocheck v0.0.0-20140225173054-000000000087 // indirect 16 | launchpad.net/xmlpath v0.0.0-20130614043138-000000000004 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 4 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 8 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 11 | gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7 h1:zibSPXbkfB1Dwl76rJgLa68xcdHu42qmFTe6vAnU4wA= 12 | gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7/go.mod h1:wo0SW5T6XqIKCCAge330Cd5sm+7VI6v85OrQHIk50KM= 13 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 14 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 15 | launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54= 16 | launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM= 17 | launchpad.net/xmlpath v0.0.0-20130614043138-000000000004 h1:B8nNZBUrx8YufDCAJjvO/lVs4GxXMQHyrjwJdJzXMFg= 18 | launchpad.net/xmlpath v0.0.0-20130614043138-000000000004/go.mod h1:vqyExLOM3qBx7mvYRkoxjSCF945s0mbe7YynlKYXtsA= 19 | -------------------------------------------------------------------------------- /html.go: -------------------------------------------------------------------------------- 1 | package libxml2 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | 7 | "github.com/lestrrat-go/libxml2/clib" 8 | "github.com/lestrrat-go/libxml2/dom" 9 | "github.com/lestrrat-go/libxml2/parser" 10 | "github.com/lestrrat-go/libxml2/types" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | // ParseHTML parses an HTML document. You can omit the options 15 | // argument, or you can provide one bitwise-or'ed option 16 | func ParseHTML(content []byte, options ...parser.HTMLOption) (types.Document, error) { 17 | return ParseHTMLString(string(content), options...) 18 | } 19 | 20 | // ParseHTMLString parses an HTML document. You can omit the options 21 | // argument, or you can provide one bitwise-or'ed option 22 | func ParseHTMLString(content string, options ...parser.HTMLOption) (types.Document, error) { 23 | var option parser.HTMLOption 24 | if len(options) > 0 { 25 | option = options[0] 26 | } else { 27 | option = parser.DefaultHTMLOptions 28 | } 29 | docptr, err := clib.HTMLReadDoc(content, "", "", int(option)) 30 | if err != nil { 31 | return nil, errors.Wrap(err, "failed to read document") 32 | } 33 | 34 | if docptr == 0 { 35 | return nil, errors.Wrap(clib.ErrInvalidDocument, "failed to get valid document pointer") 36 | } 37 | return dom.WrapDocument(docptr), nil 38 | } 39 | 40 | // ParseHTMLReader parses an HTML document. You can omit the options 41 | // argument, or you can provide one bitwise-or'ed option 42 | func ParseHTMLReader(in io.Reader, options ...parser.HTMLOption) (types.Document, error) { 43 | buf := &bytes.Buffer{} 44 | if _, err := buf.ReadFrom(in); err != nil { 45 | return nil, errors.Wrap(err, "failed to rea from io.Reader") 46 | } 47 | 48 | return ParseHTMLString(buf.String(), options...) 49 | } 50 | -------------------------------------------------------------------------------- /html_test.go: -------------------------------------------------------------------------------- 1 | package libxml2_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/lestrrat-go/libxml2" 7 | "github.com/lestrrat-go/libxml2/xpath" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestParseHTML(t *testing.T) { 12 | doc, err := libxml2.ParseHTMLString(`

Hello, World!

Lorem Ipsum

`) 13 | if err != nil { 14 | t.Errorf("Failed to parse: %s", err) 15 | return 16 | } 17 | defer doc.Free() 18 | 19 | root, err := doc.DocumentElement() 20 | if !assert.NoError(t, err, "DocumentElement() should succeed") { 21 | return 22 | } 23 | if !assert.True(t, root.IsSameNode(root), "root == root") { 24 | return 25 | } 26 | 27 | nodes := xpath.NodeList(doc.Find("/html/body/h1")) 28 | if len(nodes) != 1 { 29 | t.Errorf("Could not find matching nodes") 30 | return 31 | } 32 | 33 | if nodes[0].TextContent() != "Hello, World!" { 34 | t.Errorf("h1 content is not 'Hello, World!', got %s", nodes[0].TextContent()) 35 | return 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /internal/cmd/genwrapnode/genwrapnode.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "go/format" 7 | "io" 8 | "log" 9 | "os" 10 | "strconv" 11 | 12 | "github.com/pkg/errors" 13 | ) 14 | 15 | func main() { 16 | if err := _main(); err != nil { 17 | log.Printf("%s", err) 18 | os.Exit(1) 19 | } 20 | } 21 | 22 | func _main() error { 23 | var buf bytes.Buffer 24 | 25 | buf.WriteString("package dom") 26 | buf.WriteString("\n\n// Auto-generated by internal/cmd/genwrapnode/genwrapnode.go. DO NOT EDIT!") 27 | buf.WriteString("\n\nimport (") 28 | buf.WriteString("\n\"fmt\"\n") 29 | for _, lib := range []string{"github.com/lestrrat-go/libxml2/clib", "github.com/lestrrat-go/libxml2/types"} { 30 | fmt.Fprintf(&buf, "\n%s", strconv.Quote(lib)) 31 | } 32 | buf.WriteString("\n)") 33 | 34 | nodeTypes := []string{ 35 | `Namespace`, 36 | `Attribute`, 37 | `CDataSection`, 38 | `Comment`, 39 | `Element`, 40 | `Text`, 41 | `Pi`, 42 | } 43 | 44 | for _, typ := range nodeTypes { 45 | fmt.Fprintf(&buf, "\n\nfunc wrap%sNode(ptr uintptr) *%s {", typ, typ) 46 | fmt.Fprintf(&buf, "\nvar n %s", typ) 47 | buf.WriteString("\nn.ptr = ptr") 48 | buf.WriteString("\nreturn &n") 49 | buf.WriteString("\n}") 50 | } 51 | 52 | buf.WriteString("\n\n// WrapNode is a function created with the sole purpose of allowing") 53 | buf.WriteString("\n// go-libxml2 consumers that can generate a C.xmlNode pointer to") 54 | buf.WriteString("\n// create libxml2.Node types, e.g. go-xmlsec.") 55 | buf.WriteString("\nfunc WrapNode(n uintptr) (types.Node, error) {") 56 | buf.WriteString("\nswitch typ := clib.XMLGetNodeTypeRaw(n); typ {") 57 | 58 | for _, typ := range nodeTypes { 59 | // XXX hmm, this never existed. don't have time to debug right now. 60 | // possibly an omission bug? 61 | if typ == "Namespace" { 62 | continue 63 | } 64 | fmt.Fprintf(&buf, "\ncase clib.%sNode:", typ) 65 | fmt.Fprintf(&buf, "\nreturn wrap%sNode(n), nil", typ) 66 | } 67 | 68 | buf.WriteString("\ndefault:") 69 | buf.WriteString("\nreturn nil, fmt.Errorf(\"unknown node: %%d\", typ)") 70 | buf.WriteString("\n}") 71 | buf.WriteString("\n}") 72 | 73 | src, err := format.Source(buf.Bytes()) 74 | if err != nil { 75 | log.Printf("%s", buf.Bytes()) 76 | return err 77 | } 78 | 79 | var out io.Writer = os.Stdout 80 | args := os.Args 81 | if len(args) > 2 && args[1] == "--" { 82 | args = append(append([]string(nil), args[1:]...), args[2:]...) 83 | } 84 | 85 | if len(args) > 1 { 86 | f, err := os.OpenFile(args[1], os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 87 | if err != nil { 88 | return errors.Wrapf(err, `failed to open %s`, args[1]) 89 | } 90 | defer f.Close() 91 | out = f 92 | } 93 | 94 | _, _ = out.Write(src) 95 | return nil 96 | } 97 | -------------------------------------------------------------------------------- /internal/debug/debug_off.go: -------------------------------------------------------------------------------- 1 | //go:build !debug 2 | // +build !debug 3 | 4 | package debug 5 | 6 | const Enabled = false 7 | 8 | // Printf is no op unless you compile with the `debug` tag 9 | func Printf(_ string, _ ...interface{}) {} 10 | -------------------------------------------------------------------------------- /internal/debug/debug_on.go: -------------------------------------------------------------------------------- 1 | //+build debug 2 | 3 | package debug 4 | 5 | import ( 6 | "log" 7 | "os" 8 | ) 9 | 10 | const Enabled = true 11 | 12 | var logger = log.New(os.Stdout, "|DEBUG| ", 0) 13 | 14 | // Printf prints debug messages. Only available if compiled with "debug" tag 15 | func Printf(f string, args ...interface{}) { 16 | logger.Printf(f, args...) 17 | } 18 | -------------------------------------------------------------------------------- /internal/option/interface.go: -------------------------------------------------------------------------------- 1 | package option 2 | 3 | const ( 4 | OptKeyWithURI = `with-uri` 5 | ) 6 | -------------------------------------------------------------------------------- /internal/option/option.go: -------------------------------------------------------------------------------- 1 | package option 2 | 3 | type Interface interface { 4 | Name() string 5 | Value() interface{} 6 | } 7 | 8 | type Option struct { 9 | name string 10 | value interface{} 11 | } 12 | 13 | func New(name string, value interface{}) *Option { 14 | return &Option{ 15 | name: name, 16 | value: value, 17 | } 18 | } 19 | 20 | func (o *Option) Name() string { 21 | return o.name 22 | } 23 | func (o *Option) Value() interface{} { 24 | return o.value 25 | } 26 | -------------------------------------------------------------------------------- /libxml2.go: -------------------------------------------------------------------------------- 1 | //go:generate go run internal/cmd/genwrapnode/genwrapnode.go -- dom/node_wrap.go 2 | 3 | /* 4 | Package libxml2 is an interface to libxml2 library, providing XML and HTML parsers 5 | with DOM interface. The inspiration is Perl5's XML::LibXML module. 6 | 7 | This library is still in very early stages of development. API may still change 8 | without notice. 9 | 10 | For the time being, the API is being written so that thye are as close as we 11 | can get to DOM Layer 3, but some methods will, for the time being, be punted 12 | and aliases for simpler methods that don't necessarily check for the DOM's 13 | correctness will be used. 14 | 15 | Also, the return values are still shaky -- I'm still debating how to handle error cases gracefully. 16 | */ 17 | package libxml2 18 | -------------------------------------------------------------------------------- /libxml2_bench_test.go: -------------------------------------------------------------------------------- 1 | // This file is build-tag protected because it involves loading an external 2 | // library (xmlpath) 3 | package libxml2_test 4 | 5 | import ( 6 | "bytes" 7 | "encoding/xml" 8 | "os" 9 | "path/filepath" 10 | "testing" 11 | 12 | "github.com/lestrrat-go/libxml2" 13 | "github.com/lestrrat-go/libxml2/dom" 14 | "github.com/lestrrat-go/libxml2/xpath" 15 | "github.com/stretchr/testify/assert" 16 | "gopkg.in/xmlpath.v1" 17 | ) 18 | 19 | var xmlfile = filepath.Join("test", "feed.atom") 20 | 21 | func BenchmarkXmlpathXmlpath(b *testing.B) { 22 | f, err := os.Open(xmlfile) 23 | if err != nil { 24 | b.Fatalf("%s", err) 25 | } 26 | 27 | root, err := xmlpath.Parse(f) 28 | if err != nil { 29 | b.Fatalf("%s", err) 30 | } 31 | for i := 0; i < b.N; i++ { 32 | p, err := xmlpath.Compile(`//entry`) 33 | if err != nil { 34 | b.Fatalf("%s", err) 35 | } 36 | it := p.Iter(root) 37 | for it.Next() { 38 | n := it.Node() 39 | _ = n 40 | } 41 | } 42 | } 43 | 44 | func TestBenchmarkLibxml2Xmlpath(t *testing.T) { 45 | f, err := os.Open(xmlfile) 46 | if !assert.NoError(t, err, "os.Open succeeds") { 47 | return 48 | } 49 | 50 | doc, err := libxml2.ParseReader(f) 51 | if !assert.NoError(t, err, "ParseReader succeeds") { 52 | return 53 | } 54 | 55 | xpc, err := xpath.NewContext(doc) 56 | if !assert.NoError(t, err, "xpath.NewContext succeeds") { 57 | return 58 | } 59 | xpc.RegisterNS("atom", "http://www.w3.org/2005/Atom") 60 | 61 | res, err := xpc.Find(`//atom:entry`) 62 | if !assert.NoError(t, err, "xpc.Find succeeds") { 63 | return 64 | } 65 | defer res.Free() 66 | 67 | iter := res.NodeIter() 68 | if !assert.NotEmpty(t, iter, "res.NodeIter succeeds") { 69 | return 70 | } 71 | 72 | count := 0 73 | for iter.Next() { 74 | n := iter.Node() 75 | if !assert.NotEmpty(t, n, "iter.Node returns something") { 76 | return 77 | } 78 | count++ 79 | } 80 | if !assert.True(t, count > 0, "there's at least 1 node") { 81 | return 82 | } 83 | } 84 | 85 | func BenchmarkLibxml2Xmlpath(b *testing.B) { 86 | f, err := os.Open(xmlfile) 87 | if err != nil { 88 | b.Fatalf("%s", err) 89 | } 90 | 91 | doc, err := libxml2.ParseReader(f) 92 | if err != nil { 93 | b.Fatalf("%s", err) 94 | } 95 | 96 | xpc, err := xpath.NewContext(doc) 97 | if err != nil { 98 | b.Fatalf("%s", err) 99 | } 100 | xpc.RegisterNS("atom", "http://www.w3.org/2005/Atom") 101 | for i := 0; i < b.N; i++ { 102 | iter := xpath.NodeIter(xpc.Find(`//atom:entry`)) 103 | for iter.Next() { 104 | n := iter.Node() 105 | _ = n 106 | } 107 | } 108 | } 109 | 110 | //nolint:musttag 111 | type Foo struct { 112 | XMLName xml.Name `xml:"https://github.com/lestrrat-go/libxml2/foo foo:foo"` 113 | Field1 string 114 | Field2 string `xml:",attr"` 115 | } 116 | 117 | func BenchmarkEncodingXMLDOM(b *testing.B) { 118 | var buf bytes.Buffer 119 | f := Foo{ 120 | Field1: "Hello, World!", 121 | Field2: "Hello, Attribute!", 122 | } 123 | for i := 0; i < b.N; i++ { 124 | buf.Reset() 125 | enc := xml.NewEncoder(&buf) 126 | enc.Encode(f) 127 | } 128 | } 129 | 130 | func BenchmarkLibxml2DOM(b *testing.B) { 131 | var buf bytes.Buffer 132 | const nsuri = `https://github.com/lestrrat-go/libxml2/foo` 133 | f := Foo{ 134 | Field1: "Hello, World!", 135 | Field2: "Hello, Attribute!", 136 | } 137 | for i := 0; i < b.N; i++ { 138 | d := dom.CreateDocument() 139 | 140 | root, err := d.CreateElementNS(nsuri, "foo:foo") 141 | if err != nil { 142 | d.Free() 143 | panic(err) 144 | } 145 | d.SetDocumentElement(root) 146 | 147 | f1xml, err := d.CreateElement("Field1") 148 | if err != nil { 149 | d.Free() 150 | panic(err) 151 | } 152 | root.AddChild(f1xml) 153 | 154 | f1xml.SetAttribute("Field2", f.Field2) 155 | 156 | f1xml.AppendText(f.Field1) 157 | buf.Reset() 158 | buf.WriteString(d.Dump(false)) 159 | d.Free() 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /libxml2_example_test.go: -------------------------------------------------------------------------------- 1 | package libxml2_test 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | 7 | "github.com/lestrrat-go/libxml2" 8 | "github.com/lestrrat-go/libxml2/parser" 9 | "github.com/lestrrat-go/libxml2/types" 10 | "github.com/lestrrat-go/libxml2/xpath" 11 | ) 12 | 13 | //nolint:testableexamples 14 | func ExampleXML() { 15 | //nolint:noctx 16 | res, err := http.Get("http://blog.golang.org/feed.atom") 17 | if err != nil { 18 | panic("failed to get blog.golang.org: " + err.Error()) 19 | } 20 | 21 | p := parser.New() 22 | doc, err := p.ParseReader(res.Body) 23 | defer res.Body.Close() 24 | 25 | if err != nil { 26 | panic("failed to parse XML: " + err.Error()) 27 | } 28 | defer doc.Free() 29 | 30 | doc.Walk(func(n types.Node) error { 31 | log.Println(n.NodeName()) 32 | return nil 33 | }) 34 | 35 | root, err := doc.DocumentElement() 36 | if err != nil { 37 | log.Printf("Failed to fetch document element: %s", err) 38 | return 39 | } 40 | 41 | ctx, err := xpath.NewContext(root) 42 | if err != nil { 43 | log.Printf("Failed to create xpath context: %s", err) 44 | return 45 | } 46 | defer ctx.Free() 47 | 48 | ctx.RegisterNS("atom", "http://www.w3.org/2005/Atom") 49 | title := xpath.String(ctx.Find("/atom:feed/atom:title/text()")) 50 | log.Printf("feed title = %s", title) 51 | } 52 | 53 | //nolint:testableexamples 54 | func ExampleHTML() { 55 | //nolint:noctx 56 | res, err := http.Get("http://golang.org") 57 | if err != nil { 58 | panic("failed to get golang.org: " + err.Error()) 59 | } 60 | defer res.Body.Close() 61 | 62 | doc, err := libxml2.ParseHTMLReader(res.Body) 63 | if err != nil { 64 | panic("failed to parse HTML: " + err.Error()) 65 | } 66 | defer doc.Free() 67 | 68 | doc.Walk(func(n types.Node) error { 69 | log.Println(n.NodeName()) 70 | return nil 71 | }) 72 | 73 | nodes := xpath.NodeList(doc.Find(`//div[@id="menu"]/a`)) 74 | for i := 0; i < len(nodes); i++ { 75 | log.Printf("Found node: %s", nodes[i].NodeName()) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | package libxml2 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/lestrrat-go/libxml2/parser" 7 | "github.com/lestrrat-go/libxml2/types" 8 | ) 9 | 10 | // Parse parses the given buffer and returns a Document. 11 | func Parse(buf []byte, o ...parser.Option) (types.Document, error) { 12 | p := parser.New(o...) 13 | return p.Parse(buf) 14 | } 15 | 16 | // ParseString parses the given string and returns a Document. 17 | func ParseString(s string, o ...parser.Option) (types.Document, error) { 18 | p := parser.New(o...) 19 | return p.ParseString(s) 20 | } 21 | 22 | // ParseReader parses XML from the given io.Reader and returns a Document. 23 | func ParseReader(rdr io.Reader, o ...parser.Option) (types.Document, error) { 24 | p := parser.New(o...) 25 | return p.ParseReader(rdr) 26 | } 27 | -------------------------------------------------------------------------------- /parser/interface.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import "errors" 4 | 5 | var ( 6 | // ErrMalformedXML is returned when the XML source is malformed 7 | ErrMalformedXML = errors.New("malformed XML") 8 | ) 9 | 10 | // HTMLOption represents the HTML parser options that 11 | // can be used when parsing HTML 12 | type HTMLOption int 13 | 14 | const ( 15 | // HTMLParseRecover enables relaxed parsing 16 | HTMLParseRecover HTMLOption = 1 << 0 17 | // HTMLParseNoDefDTD disables using a default doctype when absent 18 | HTMLParseNoDefDTD = 1 << 2 19 | // HTMLParseNoError suppresses error reports 20 | HTMLParseNoError = 1 << 5 21 | // HTMLParseNoWarning suppresses warning reports 22 | HTMLParseNoWarning = 1 << 6 23 | // HTMLParsePedantic enables pedantic error reporting 24 | HTMLParsePedantic = 1 << 7 25 | // HTMLParseNoBlanks removes blank nodes 26 | HTMLParseNoBlanks = 1 << 8 27 | // HTMLParseNoNet forbids network access during parsing 28 | HTMLParseNoNet = 1 << 11 29 | // HTMLParseNoImplied disables implied html/body elements 30 | HTMLParseNoImplied = 1 << 13 31 | // HTMLParseCompact enables compaction of small text nodes 32 | HTMLParseCompact = 1 << 16 33 | // HTMLParseIgnoreEnc ignores internal document encoding hints 34 | HTMLParseIgnoreEnc = 1 << 21 35 | ) 36 | 37 | // DefaultHTMLOptions represents the default set of options 38 | // used in the ParseHTML* functions 39 | const DefaultHTMLOptions = HTMLParseCompact | HTMLParseNoBlanks | HTMLParseNoError | HTMLParseNoWarning 40 | 41 | // Option represents the parser option bit 42 | type Option int 43 | 44 | const ( 45 | XMLParseRecover Option = 1 << iota /* recover on errors */ 46 | XMLParseNoEnt /* substitute entities */ 47 | XMLParseDTDLoad /* load the external subset */ 48 | XMLParseDTDAttr /* default DTD attributes */ 49 | XMLParseDTDValid /* validate with the DTD */ 50 | XMLParseNoError /* suppress error reports */ 51 | XMLParseNoWarning /* suppress warning reports */ 52 | XMLParsePedantic /* pedantic error reporting */ 53 | XMLParseNoBlanks /* remove blank nodes */ 54 | XMLParseSAX1 /* use the SAX1 interface internally */ 55 | XMLParseXInclude /* Implement XInclude substitution */ 56 | XMLParseNoNet /* Forbid network access */ 57 | XMLParseNoDict /* Do not reuse the context dictionary */ 58 | XMLParseNsclean /* remove redundant namespaces declarations */ 59 | XMLParseNoCDATA /* merge CDATA as text nodes */ 60 | XMLParseNoXIncNode /* do not generate XINCLUDE START/END nodes */ 61 | XMLParseCompact /* compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree) */ 62 | XMLParseOld10 /* parse using XML-1.0 before update 5 */ 63 | XMLParseNoBaseFix /* do not fixup XINCLUDE xml:base uris */ 64 | XMLParseHuge /* relax any hardcoded limit from the parser */ 65 | XMLParseOldSAX /* parse using SAX2 interface before 2.7.0 */ 66 | XMLParseIgnoreEnc /* ignore internal document encoding hint */ 67 | XMLParseBigLines /* Store big lines numbers in text PSVI field */ 68 | XMLParseMax 69 | XMLParseEmptyOption Option = 0 70 | ) 71 | 72 | // Ctxt represents the Parser context. You normally should be using 73 | // Parser, but if you for some reason need to do more low-level 74 | // magic you will have to tinker with this struct 75 | type Ctxt struct { 76 | ptr uintptr // *C.xmlParserCtxt 77 | } 78 | 79 | // Parser represents the high-level parser. 80 | type Parser struct { 81 | Options Option 82 | } 83 | -------------------------------------------------------------------------------- /parser/parser.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | 7 | "github.com/lestrrat-go/libxml2/clib" 8 | "github.com/lestrrat-go/libxml2/dom" 9 | "github.com/lestrrat-go/libxml2/types" 10 | "github.com/pkg/errors" 11 | ) 12 | 13 | const _OptionName = "RecoverNoEntDTDLoadDTDAttrDTDValidNoErrorNoWarningPedanticNoBlanksSAX1XIncludeNoNetNoDictNscleanNoCDATANoXIncNodeCompactOld10NoBaseFixHugeOldSAXIgnoreEncBigLines" 14 | 15 | var _OptionMap = map[int]string{ 16 | 1: _OptionName[0:7], 17 | 2: _OptionName[7:12], 18 | 4: _OptionName[12:19], 19 | 8: _OptionName[19:26], 20 | 16: _OptionName[26:34], 21 | 32: _OptionName[34:41], 22 | 64: _OptionName[41:50], 23 | 128: _OptionName[50:58], 24 | 256: _OptionName[58:66], 25 | 512: _OptionName[66:70], 26 | 1024: _OptionName[70:78], 27 | 2048: _OptionName[78:83], 28 | 4096: _OptionName[83:89], 29 | 8192: _OptionName[89:96], 30 | 16384: _OptionName[96:103], 31 | 32768: _OptionName[103:113], 32 | 65536: _OptionName[113:120], 33 | 131072: _OptionName[120:125], 34 | 262144: _OptionName[125:134], 35 | 524288: _OptionName[134:138], 36 | 1048576: _OptionName[138:144], 37 | 2097152: _OptionName[144:153], 38 | 4194304: _OptionName[153:161], 39 | } 40 | 41 | // Set flips the option bit in the given Option 42 | func (o *Option) Set(options ...Option) { 43 | v := int(*o) // current value 44 | for _, i := range options { 45 | v = v | int(i) 46 | } 47 | *o = Option(v) 48 | } 49 | 50 | // String creates a string representation of the Option 51 | func (o Option) String() string { 52 | if o == XMLParseEmptyOption { 53 | return "[]" 54 | } 55 | 56 | i := int(o) 57 | b := bytes.Buffer{} 58 | b.Write([]byte{'['}) 59 | for x := 1; x < int(XMLParseMax); x = x << 1 { 60 | if (i & x) == x { 61 | v, ok := _OptionMap[x] 62 | if !ok { 63 | v = "Option(Unknown)" 64 | } 65 | b.WriteString(v) 66 | b.Write([]byte{'|'}) 67 | } 68 | } 69 | x := b.Bytes() 70 | if x[len(x)-1] == '|' { 71 | x[len(x)-1] = ']' 72 | } else { 73 | x = append(x, ']') 74 | } 75 | return string(x) 76 | } 77 | 78 | // New creates a new Parser with the given options. 79 | func New(opts ...Option) *Parser { 80 | var o Option 81 | 82 | for _, opt := range opts { 83 | o = o | opt 84 | } 85 | 86 | return &Parser{ 87 | Options: o, 88 | } 89 | } 90 | 91 | // Parse parses XML from the given byte buffer 92 | func (p *Parser) Parse(buf []byte) (types.Document, error) { 93 | return p.ParseString(string(buf)) 94 | } 95 | 96 | // ParseString parses XML from the given string 97 | func (p *Parser) ParseString(s string) (types.Document, error) { 98 | ctx, err := NewCtxt(s, p.Options) 99 | if err != nil { 100 | return nil, errors.Wrap(err, "failed to create parse context") 101 | } 102 | defer func() { _ = ctx.Free() }() 103 | 104 | docptr, err := clib.XMLCtxtReadMemory(ctx, s, "", "", int(p.Options)) 105 | if err != nil { 106 | return nil, errors.Wrap(err, "failed to create parse input") 107 | } 108 | 109 | if docptr != 0 { 110 | return dom.WrapDocument(docptr), nil 111 | } 112 | return nil, errors.New("failed to generate document pointer") 113 | } 114 | 115 | // ParseReader parses XML from the given io.Reader 116 | func (p *Parser) ParseReader(in io.Reader) (types.Document, error) { 117 | buf := &bytes.Buffer{} 118 | if _, err := buf.ReadFrom(in); err != nil { 119 | return nil, errors.Wrap(err, "failed to read from reader") 120 | } 121 | 122 | return p.ParseString(buf.String()) 123 | } 124 | 125 | // NewCtxt creates a new Parser context 126 | func NewCtxt(s string, o Option) (*Ctxt, error) { 127 | ctxptr, err := clib.XMLCreateMemoryParserCtxt(s, int(o)) 128 | if err != nil { 129 | return nil, errors.Wrap(err, "failed to execute XMLCreateMemoryParserCtxt") 130 | } 131 | return &Ctxt{ptr: ctxptr}, nil 132 | } 133 | 134 | // Pointer returns the underlying C struct 135 | func (ctx Ctxt) Pointer() uintptr { 136 | return ctx.ptr 137 | } 138 | 139 | // Parse starts the parsing on the Ctxt 140 | func (ctx Ctxt) Parse() error { 141 | return clib.XMLParseDocument(ctx) 142 | } 143 | 144 | // Free releases the underlying C struct 145 | func (ctx *Ctxt) Free() error { 146 | if err := clib.XMLFreeParserCtxt(ctx); err != nil { 147 | return errors.Wrap(err, "failed to free parser context") 148 | } 149 | 150 | ctx.ptr = 0 151 | return nil 152 | } 153 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package libxml2 2 | 3 | import ( 4 | "regexp" 5 | "testing" 6 | 7 | "github.com/lestrrat-go/libxml2/dom" 8 | "github.com/lestrrat-go/libxml2/types" 9 | 10 | "github.com/lestrrat-go/libxml2/clib" 11 | "github.com/lestrrat-go/libxml2/parser" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | const stdXMLDecl = `` + "\n" 16 | 17 | var ( 18 | goodWFNSStrings = []string{ 19 | stdXMLDecl + `` + "\n", 20 | stdXMLDecl + `` + "\n", 21 | stdXMLDecl + `` + "\n", 22 | stdXMLDecl + `` + "\n", 23 | stdXMLDecl + `` + "\n", 24 | } 25 | goodWFStrings = []string{ 26 | ``, 27 | ``, 28 | ``, 29 | `` + "\n" + ``, 30 | `` + "\n" + ``, 31 | stdXMLDecl + ` ` + "\n", 32 | stdXMLDecl + ` `, 33 | stdXMLDecl + ` `, 34 | stdXMLDecl + `&"\` + "`" + `]]>`, 35 | stdXMLDecl + `<>&"'`, 36 | stdXMLDecl + `  `, 37 | stdXMLDecl + `foo`, 38 | stdXMLDecl + `foo`, 39 | stdXMLDecl + `foo`, 40 | stdXMLDecl + ``, 41 | stdXMLDecl + `"/>`, 42 | } 43 | goodWFDTDStrings = []string{ 44 | stdXMLDecl + `` + "\n" + `]>` + "\n" + `&foo;`, 45 | stdXMLDecl + `]>&foo;`, 46 | stdXMLDecl + `]>&foo;>`, 47 | stdXMLDecl + `]>&foo;>`, 48 | stdXMLDecl + `]>&foo;>`, 49 | stdXMLDecl + `]>`, 50 | stdXMLDecl + `]>`, 51 | } 52 | badWFStrings = []string{ 53 | "", // totally empty document 54 | stdXMLDecl, // only XML Declaration 55 | "", // comment only is like an empty document 56 | `]>`, // no good either ... 57 | "", // single tag (tag mismatch) 58 | "foo", // trailing junk 59 | "foo", // leading junk 60 | "", // bad attribute 61 | `&", // bad char 63 | `&//0x20;`, // bad chart 64 | "r/>", // bad encoding 65 | "&foo;", // undefind entity 66 | ">", // unterminated entity 67 | stdXMLDecl + `]>`, // bad placed entity 68 | stdXMLDecl + `]>`, // even worse 69 | "", // bad comment 70 | "", // bad either... (is this conform with the spec????) 71 | } 72 | ) 73 | 74 | func parseShouldSucceed(t *testing.T, opts parser.Option, inputs []string) { 75 | t.Logf("Test parsing with parser %v", opts) 76 | for _, s := range inputs { 77 | d, err := ParseString(s, opts) 78 | if !assert.NoError(t, err, "Parse should succeed") { 79 | return 80 | } 81 | d.Free() 82 | } 83 | } 84 | 85 | func parseShouldFail(t *testing.T, opts parser.Option, inputs []string) { 86 | for _, s := range inputs { 87 | d, err := ParseString(s, opts) 88 | if err == nil { 89 | d.Free() 90 | t.Errorf("Expected failure to parse '%s'", s) 91 | } 92 | } 93 | } 94 | 95 | type ParseOptionToString struct { 96 | v parser.Option 97 | e string 98 | } 99 | 100 | func TestParseOptionStringer(t *testing.T) { 101 | values := []ParseOptionToString{ 102 | { 103 | v: parser.XMLParseRecover, 104 | e: "Recover", 105 | }, 106 | { 107 | v: parser.XMLParseNoEnt, 108 | e: "NoEnt", 109 | }, 110 | { 111 | v: parser.XMLParseDTDLoad, 112 | e: "DTDLoad", 113 | }, 114 | { 115 | v: parser.XMLParseDTDAttr, 116 | e: "DTDAttr", 117 | }, 118 | { 119 | v: parser.XMLParseDTDValid, 120 | e: "DTDValid", 121 | }, 122 | { 123 | v: parser.XMLParseNoError, 124 | e: "NoError", 125 | }, 126 | { 127 | v: parser.XMLParseNoWarning, 128 | e: "NoWarning", 129 | }, 130 | { 131 | v: parser.XMLParsePedantic, 132 | e: "Pedantic", 133 | }, 134 | { 135 | v: parser.XMLParseNoBlanks, 136 | e: "NoBlanks", 137 | }, 138 | { 139 | v: parser.XMLParseSAX1, 140 | e: "SAX1", 141 | }, 142 | { 143 | v: parser.XMLParseXInclude, 144 | e: "XInclude", 145 | }, 146 | { 147 | v: parser.XMLParseNoNet, 148 | e: "NoNet", 149 | }, 150 | { 151 | v: parser.XMLParseNoDict, 152 | e: "NoDict", 153 | }, 154 | { 155 | v: parser.XMLParseNsclean, 156 | e: "Nsclean", 157 | }, 158 | { 159 | v: parser.XMLParseNoCDATA, 160 | e: "NoCDATA", 161 | }, 162 | { 163 | v: parser.XMLParseNoXIncNode, 164 | e: "NoXIncNode", 165 | }, 166 | { 167 | v: parser.XMLParseCompact, 168 | e: "Compact", 169 | }, 170 | { 171 | v: parser.XMLParseOld10, 172 | e: "Old10", 173 | }, 174 | { 175 | v: parser.XMLParseNoBaseFix, 176 | e: "NoBaseFix", 177 | }, 178 | { 179 | v: parser.XMLParseHuge, 180 | e: "Huge", 181 | }, 182 | { 183 | v: parser.XMLParseOldSAX, 184 | e: "OldSAX", 185 | }, 186 | { 187 | v: parser.XMLParseIgnoreEnc, 188 | e: "IgnoreEnc", 189 | }, 190 | { 191 | v: parser.XMLParseBigLines, 192 | e: "BigLines", 193 | }, 194 | } 195 | 196 | for _, d := range values { 197 | if d.v.String() != "["+d.e+"]" { 198 | t.Errorf("e '%s', got '%s'", d.e, d.v.String()) 199 | } 200 | } 201 | } 202 | 203 | func TestParseEmpty(t *testing.T) { 204 | doc, err := ParseString(``) 205 | if err == nil { 206 | t.Errorf("Parse of empty string should fail") 207 | defer doc.Free() 208 | } 209 | } 210 | 211 | func TestParse(t *testing.T) { 212 | inputs := [][]string{ 213 | goodWFStrings, 214 | goodWFNSStrings, 215 | goodWFDTDStrings, 216 | } 217 | 218 | for _, input := range inputs { 219 | parseShouldSucceed(t, 0, input) 220 | } 221 | } 222 | 223 | func TestParseBad(t *testing.T) { 224 | clib.ReportErrors(false) 225 | defer clib.ReportErrors(true) 226 | 227 | inputs := [][]string{ 228 | badWFStrings, 229 | } 230 | 231 | for _, input := range inputs { 232 | parseShouldFail(t, 0, input) 233 | } 234 | } 235 | 236 | func TestParseNoBlanks(t *testing.T) { 237 | inputs := [][]string{ 238 | goodWFStrings, 239 | goodWFNSStrings, 240 | goodWFDTDStrings, 241 | } 242 | for _, input := range inputs { 243 | parseShouldSucceed(t, parser.XMLParseNoBlanks, input) 244 | } 245 | } 246 | 247 | func TestRoundtripNoBlanks(t *testing.T) { 248 | doc, err := ParseString(` `, parser.XMLParseNoBlanks) 249 | if err != nil { 250 | t.Errorf("failed to parse string: %s", err) 251 | return 252 | } 253 | 254 | if !assert.Regexp(t, regexp.MustCompile(``), doc.Dump(false), "stringified xml should have no blanks") { 255 | return 256 | } 257 | } 258 | 259 | func TestOptionsShouldCombine(t *testing.T) { 260 | opts := map[parser.Option][]parser.Option{ 261 | parser.Option(64): {parser.XMLParseNoWarning}, 262 | parser.Option(96): {parser.XMLParseNoWarning, parser.XMLParseNoError}, 263 | } 264 | 265 | for expected, options := range opts { 266 | p := parser.New(options...) 267 | assert.Equal(t, expected, p.Options) 268 | } 269 | } 270 | 271 | func TestGHIssue23(t *testing.T) { 272 | const src = ` 273 | 274 | Hello 275 | Goodbye! 276 | ` 277 | 278 | doc, err := ParseString(src, parser.XMLParseRecover, parser.XMLParseNoWarning, parser.XMLParseNoError) 279 | if !assert.NoError(t, err, "should pass") { 280 | return 281 | } 282 | doc.Free() 283 | } 284 | 285 | func TestCommentWrapNodeIssue(t *testing.T) { 286 | // should wrap comment node 287 | const testHTML = "

" 288 | 289 | doc, err := ParseHTMLString(testHTML, parser.HTMLParseRecover) 290 | if err != nil { 291 | t.Fatalf("Got error when parsing HTML: %v", err) 292 | } 293 | 294 | bodyRes, err := doc.Find("//body") 295 | if err != nil { 296 | t.Fatalf("Got error when grabbing body: %v", err) 297 | } 298 | 299 | bodyChildren, err := bodyRes.NodeList().First().ChildNodes() 300 | if err != nil { 301 | t.Fatalf("Got error when grabbing body's children: %v", err) 302 | } 303 | 304 | if str := bodyChildren.String(); str != testHTML { 305 | t.Fatalf("HTML did not convert back correctly, expected: %v, got: %v.", testHTML, str) 306 | } 307 | } 308 | 309 | func TestPiWrapNodeIssue(t *testing.T) { 310 | // should wrap Pi node 311 | const textXML = "\ntest \n" 312 | doc, err := ParseString(textXML) 313 | if err != nil { 314 | t.Fatalf("Got error when parsing xml: %v", err) 315 | } 316 | 317 | nodes, err := doc.ChildNodes() 318 | if err != nil { 319 | t.Fatalf("Got error when getting childnodes: %v", err) 320 | } 321 | 322 | for _, node := range nodes { 323 | if node.HasChildNodes() { 324 | if _, err := node.ChildNodes(); err != nil { 325 | t.Fatalf("Got error when getting childnodes of childnodes: %v", err) 326 | } 327 | } 328 | } 329 | 330 | if str := doc.String(); str != textXML { 331 | t.Fatalf("XML did not convert back correctly, expected: %v, got: %v", textXML, str) 332 | } 333 | } 334 | 335 | func TestGetNonexistentAttributeReturnsRecoverableError(t *testing.T) { 336 | const src = `` 337 | doc, err := ParseString(src) 338 | if !assert.NoError(t, err, "Should parse") { 339 | return 340 | } 341 | defer doc.Free() 342 | 343 | rootNode, err := doc.DocumentElement() 344 | if !assert.NoError(t, err, "Should find root element") { 345 | return 346 | } 347 | 348 | el, ok := rootNode.(types.Element) 349 | if !ok { 350 | t.Fatalf("Root node was not an element") 351 | } 352 | 353 | _, err = el.GetAttribute("non-existent") 354 | if err != dom.ErrAttributeNotFound { 355 | t.Fatalf("GetAttribute() error not comparable to existing library") 356 | } 357 | } 358 | -------------------------------------------------------------------------------- /test/euc-jp.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lestrrat-go/libxml2/c934e3fcb9d356b6842830761f72b4802d40c60a/test/euc-jp.xml -------------------------------------------------------------------------------- /test/go_libxml2_local.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 4cfcf843c66979eb1df2bd0c52817edb753a52ba 8 | 9 | 10 | this is a test string only. 11 | be218408a748759fb98363593b8f544eb054171bced856ca98bd972823dec0b07b205453fc3c46f23c934d0959f1e05b609c011b6ada84a7050ad7c910b24bf1 12 | 13 | 14 | foobar 15 | f7b34871a562283ee92bbda00485eb45 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /test/go_libxml2_remote.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 4cfcf843c66979eb1df2bd0c52817edb753a52ba 8 | 9 | 10 | this is a test string only. 11 | be218408a748759fb98363593b8f544eb054171bced856ca98bd972823dec0b07b205453fc3c46f23c934d0959f1e05b609c011b6ada84a7050ad7c910b24bf1 12 | 13 | 14 | foobar 15 | f7b34871a562283ee92bbda00485eb45 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /test/link/test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/lestrrat-go/libxml2" 5 | ) 6 | 7 | func main() { 8 | doc, err := libxml2.ParseHTMLString(`

Hello, World!

Lorem Ipsum

`) 9 | if err != nil { 10 | panic(err) 11 | } 12 | doc.Free() 13 | } 14 | -------------------------------------------------------------------------------- /test/schema/lib/types/cksum.xsd: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /test/schema/lib/types/net.xsd: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /test/schema/lib/types/std.xsd: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /test/schema/lib/types/unix.xsd: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /test/schema/projects/go_libxml2_local.xsd: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /test/schema/projects/go_libxml2_remote.xsd: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /test/sjis.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lestrrat-go/libxml2/c934e3fcb9d356b6842830761f72b4802d40c60a/test/sjis.xml -------------------------------------------------------------------------------- /test/utf-8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | はろー、わーるど! 4 | 5 | -------------------------------------------------------------------------------- /test/xmldsig-core-schema.xsd: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | ]> 11 | 12 | 27 | 28 | 29 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 216 | 217 | 218 | 219 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | -------------------------------------------------------------------------------- /types/interface.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import "github.com/lestrrat-go/libxml2/clib" 4 | 5 | // PtrSource defines the interface for things that is backed by 6 | // a C backend 7 | type PtrSource interface { 8 | // Pointer returns the underlying C pointer. This is an exported 9 | // method to allow various internal go-libxml2 packages to interoperate 10 | // on each other. End users are STRONGLY advised not to touch this 11 | // method or its return values 12 | Pointer() uintptr 13 | 14 | // Free releases the underlying resources 15 | Free() 16 | } 17 | 18 | // XPathExpression defines the interface for XPath expression 19 | type XPathExpression interface { 20 | PtrSource 21 | } 22 | 23 | // XPathResult defines the interface for result of calling Find(). 24 | type XPathResult interface { 25 | Bool() bool 26 | Free() 27 | NodeList() NodeList 28 | NodeIter() NodeIter 29 | Number() float64 30 | String() string 31 | Type() clib.XPathObjectType 32 | } 33 | 34 | // Document defines the interface for XML document 35 | type Document interface { 36 | Node 37 | CreateElement(string) (Element, error) 38 | CreateElementNS(string, string) (Element, error) 39 | DocumentElement() (Node, error) 40 | Dump(bool) string 41 | Encoding() string 42 | } 43 | 44 | // Attribute defines the interface for XML attribute 45 | type Attribute interface { 46 | Node 47 | Value() string 48 | } 49 | 50 | // Element defines the interface for XML element 51 | // 52 | //nolint:interfacebloat 53 | type Element interface { 54 | Node 55 | AppendText(string) error 56 | Attributes() ([]Attribute, error) 57 | GetAttribute(string) (Attribute, error) 58 | GetNamespaces() ([]Namespace, error) 59 | LocalName() string 60 | NamespaceURI() string 61 | Prefix() string 62 | RemoveAttribute(string) error 63 | SetAttribute(string, string) error 64 | SetNamespace(string, string, ...bool) error 65 | } 66 | 67 | // Namespace defines the interface for XML namespace 68 | type Namespace interface { 69 | Node 70 | Prefix() string 71 | URI() string 72 | } 73 | 74 | // Node defines the basic DOM interface 75 | // 76 | //nolint:interfacebloat 77 | type Node interface { 78 | PtrSource 79 | 80 | ParseInContext(string, int) (Node, error) 81 | 82 | AddChild(Node) error 83 | ChildNodes() (NodeList, error) 84 | Copy() (Node, error) 85 | OwnerDocument() (Document, error) 86 | Find(string) (XPathResult, error) 87 | FirstChild() (Node, error) 88 | HasChildNodes() bool 89 | IsSameNode(Node) bool 90 | LastChild() (Node, error) 91 | // Literal is almost the same as String(), except for things like Element 92 | // and Attribute nodes. String() will return the XML stringification of 93 | // these, but Literal() will return the "value" associated with them. 94 | Literal() (string, error) 95 | LookupNamespacePrefix(string) (string, error) 96 | LookupNamespaceURI(string) (string, error) 97 | NextSibling() (Node, error) 98 | NodeName() string 99 | NodeType() clib.XMLNodeType 100 | NodeValue() string 101 | ParentNode() (Node, error) 102 | PreviousSibling() (Node, error) 103 | RemoveChild(Node) error 104 | SetDocument(d Document) error 105 | SetNodeName(string) 106 | SetNodeValue(string) 107 | String() string 108 | TextContent() string 109 | ToString(int, bool) string 110 | Walk(func(Node) error) error 111 | 112 | MakeMortal() 113 | MakePersistent() 114 | AutoFree() 115 | } 116 | 117 | type NodeIter interface { 118 | Next() bool 119 | Node() Node 120 | } 121 | 122 | // NodeList is a set of Nodes 123 | type NodeList []Node 124 | -------------------------------------------------------------------------------- /types/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package types exist to provide with common types that are used 3 | through out in go-libxml2. This package contains mainly interfaces 4 | to things that are implemented else. It is in its own package 5 | so that any package can refer to these interfaces without introducing 6 | circular dependecy 7 | */ 8 | package types 9 | 10 | import "bytes" 11 | 12 | // String returns the string representation of the NodeList 13 | func (n NodeList) String() string { 14 | buf := bytes.Buffer{} 15 | for _, x := range n { 16 | buf.WriteString(x.String()) 17 | } 18 | return buf.String() 19 | } 20 | 21 | // NodeValue returns the concatenation of NodeValue within the nodes in NodeList 22 | func (n NodeList) NodeValue() string { 23 | buf := bytes.Buffer{} 24 | for _, x := range n { 25 | buf.WriteString(x.NodeValue()) 26 | } 27 | return buf.String() 28 | } 29 | 30 | // Literal returns the string representation of the NodeList (using Literal()) 31 | func (n NodeList) Literal() (string, error) { 32 | buf := bytes.Buffer{} 33 | for _, x := range n { 34 | l, err := x.Literal() 35 | if err != nil { 36 | return "", err 37 | } 38 | buf.WriteString(l) 39 | } 40 | return buf.String(), nil 41 | } 42 | 43 | // First returns the first node in the list, or nil otherwise. 44 | func (n NodeList) First() Node { 45 | if n == nil { 46 | return nil 47 | } 48 | 49 | if len(n) > 0 { 50 | return n[0] 51 | } 52 | 53 | return nil 54 | } 55 | -------------------------------------------------------------------------------- /xml_test.go: -------------------------------------------------------------------------------- 1 | package libxml2 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "testing" 7 | 8 | "github.com/lestrrat-go/libxml2/dom" 9 | "github.com/lestrrat-go/libxml2/parser" 10 | "github.com/lestrrat-go/libxml2/types" 11 | "github.com/lestrrat-go/libxml2/xpath" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestEncoding(t *testing.T) { 16 | for _, enc := range []string{`utf-8`, `sjis`, `euc-jp`} { 17 | fn := fmt.Sprintf(`test/%s.xml`, enc) 18 | f, err := os.Open(fn) 19 | if err != nil { 20 | t.Errorf("Failed to open %s: %s", fn, err) 21 | return 22 | } 23 | defer f.Close() 24 | 25 | p := parser.New() 26 | doc, err := p.ParseReader(f) 27 | if err != nil { 28 | t.Errorf("Failed to parse %s: %s", fn, err) 29 | return 30 | } 31 | 32 | if doc.Encoding() != enc { 33 | t.Errorf("Expected encoding %s, got %s", enc, doc.Encoding()) 34 | return 35 | } 36 | } 37 | } 38 | 39 | func TestNamespacedReconciliation(t *testing.T) { 40 | d := dom.CreateDocument() 41 | root, err := d.CreateElement("foo") 42 | if !assert.NoError(t, err, "failed to create document") { 43 | return 44 | } 45 | d.SetDocumentElement(root) 46 | if !assert.NoError(t, root.SetNamespace("http://default", "root"), "SetNamespace should succeed") { 47 | return 48 | } 49 | 50 | if !assert.NoError(t, root.SetNamespace("http://children", "child", false), "SetNamespace (no-activate) should succeed") { 51 | return 52 | } 53 | 54 | n, err := d.CreateElementNS("http://default", "branch") 55 | if !assert.NoError(t, err, "CreateElementNS should succeed") { 56 | return 57 | } 58 | root.AddChild(n) 59 | 60 | _, err = n.GetAttribute("xmlns") 61 | if !assert.Error(t, err, "GetAttribute should fail with not found") || 62 | !assert.Equal(t, "attribute not found", err.Error(), "error matches") { 63 | return 64 | } 65 | 66 | var c types.Element 67 | for _, name := range []string{"a", "b", "c"} { 68 | child, err := d.CreateElementNS("http://children", "child:"+name) 69 | if !assert.NoError(t, err, "CreateElementNS should succeed") { 70 | return 71 | } 72 | if name == "c" { 73 | c = child 74 | } 75 | n.AddChild(child) 76 | _, err = n.GetAttribute("xmlns:child") 77 | if !assert.Error(t, err, "GetAttribute should fail with not found") || 78 | !assert.Equal(t, "attribute not found", err.Error(), "error matches") { 79 | return 80 | } 81 | } 82 | 83 | if !assert.NoError(t, c.SetAttribute("xmlns:foo", "http://children"), "SetAttribute should succeed") { 84 | return 85 | } 86 | 87 | attr, err := c.GetAttribute("xmlns:foo") 88 | if !assert.NoError(t, err, "xmlns:foo should exist") { 89 | return 90 | } 91 | if !assert.Equal(t, "http://children", attr.Value(), "attribute matches") { 92 | return 93 | } 94 | 95 | child, err := d.CreateElementNS("http://other", "branch") 96 | if !assert.NoError(t, err, "creating element with default namespace") { 97 | return 98 | } 99 | n.AddChild(child) 100 | 101 | // XXX This still fails 102 | /* 103 | attr, err = child.GetAttribute("xmlns") 104 | if !assert.NoError(t, err, "GetAttribute should succeed") { 105 | return 106 | } 107 | if !assert.Equal(t, "http://other", attr.Value(), "attribute matches") { 108 | return 109 | } 110 | */ 111 | 112 | t.Logf("%s", d.String()) 113 | } 114 | 115 | func TestRegressionGH7(t *testing.T) { 116 | doc, err := ParseHTMLString(` 117 | 118 | 119 |
120 | 122 | 1234 123 |
124 | 125 | `) 126 | 127 | if !assert.NoError(t, err, "ParseHTMLString should succeed") { 128 | return 129 | } 130 | 131 | nodes := xpath.NodeList(doc.Find(`./body/div`)) 132 | if !assert.NotEmpty(t, nodes, "Find should succeed") { 133 | return 134 | } 135 | 136 | v, err := nodes.Literal() 137 | if !assert.NoError(t, err, "Literal() should succeed") { 138 | return 139 | } 140 | if !assert.NotEmpty(t, v, "Literal() should return some string") { 141 | return 142 | } 143 | t.Logf("v = '%s'", v) 144 | } 145 | 146 | func TestGHIssue43(t *testing.T) { 147 | d := dom.CreateDocument() 148 | r, _ := d.CreateElement("root") 149 | r.SetNamespace("http://some.uri", "pfx", true) 150 | d.SetDocumentElement(r) 151 | e, _ := d.CreateElement("elem") 152 | e.SetNamespace("http://other.uri", "", true) 153 | r.AddChild(e) 154 | s := d.ToString(1, true) 155 | 156 | if !assert.Contains(t, s, `= n.nlen { 28 | return false 29 | } 30 | 31 | n.cur++ 32 | node, err := WrapNodeFunc(n.nodes[n.cur]) 33 | if err != nil { 34 | n.nlen = 0 35 | return false 36 | } 37 | n.curnode = node 38 | return true 39 | } 40 | 41 | func (n *NodeIterator) Node() types.Node { 42 | return n.curnode 43 | } 44 | -------------------------------------------------------------------------------- /xpath/util.go: -------------------------------------------------------------------------------- 1 | package xpath 2 | 3 | import "github.com/lestrrat-go/libxml2/types" 4 | 5 | // String returns the string component of the result, and as a side effect 6 | // releases the Result by calling Free() on it. Use this if you do not 7 | // really care about the error value from Find() 8 | func String(r types.XPathResult, err error) string { 9 | if err != nil { 10 | return "" 11 | } 12 | 13 | defer r.Free() 14 | return r.String() 15 | } 16 | 17 | // Bool returns the boolean component of the result, and as a side effect 18 | // releases the Result by calling Free() on it. Use this if you do not 19 | // really care about the error value from Find() 20 | func Bool(r types.XPathResult, err error) bool { 21 | if err != nil { 22 | return false 23 | } 24 | 25 | defer r.Free() 26 | return r.Bool() 27 | } 28 | 29 | // Number returns the numeric component of the result, and as a side effect 30 | // releases the Result by calling Free() on it. Use this if you do not 31 | // really care about the error value from Find() 32 | func Number(r types.XPathResult, err error) float64 { 33 | if err != nil { 34 | return 0 35 | } 36 | 37 | defer r.Free() 38 | return r.Number() 39 | } 40 | 41 | // NodeList returns the nodes associated with this result, and as a side effect 42 | // releases the result by calling Free() on it. Use this if you do not 43 | // really care about the error value from Find(). 44 | func NodeList(r types.XPathResult, err error) types.NodeList { 45 | if err != nil { 46 | return nil 47 | } 48 | 49 | defer r.Free() 50 | return r.NodeList() 51 | } 52 | 53 | // NodeIter returns an iterator that will return the nodes assocaied with 54 | // this reult, and as a side effect releases the result by calling Free() 55 | // on it. Use this if you do not really care about the error value from Find(). 56 | func NodeIter(r types.XPathResult, err error) types.NodeIter { 57 | if err != nil { 58 | return NewNodeIterator(nil) 59 | } 60 | 61 | defer r.Free() 62 | return r.NodeIter() 63 | } 64 | -------------------------------------------------------------------------------- /xpath/xpath.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package xpath contains tools to handle XPath evaluation. 3 | 4 | Because of a very quirky dependency between this package and the 5 | github.com/lestrrat/libxml2/dom package, you MUST import both 6 | packages to properly use it. 7 | 8 | import ( 9 | "github.com/lestrrat-go/libxml2/dom" 10 | "github.com/lestrrat-go/libxml2/xpath" 11 | ) 12 | 13 | Or, if you have no use for dom package in your program, and you 14 | don't want to use the magical "_" import, you can do the initialization 15 | yourself just to appease the compiler: 16 | 17 | func init() { 18 | dom.SetupXPathCallback() 19 | } 20 | */ 21 | package xpath 22 | 23 | import ( 24 | "fmt" 25 | 26 | "github.com/lestrrat-go/libxml2/clib" 27 | "github.com/lestrrat-go/libxml2/types" 28 | "github.com/pkg/errors" 29 | ) 30 | 31 | // Pointer returns the underlying C struct 32 | func (x Object) Pointer() uintptr { 33 | return x.ptr 34 | } 35 | 36 | // Type returns the clib.XPathObjectType 37 | func (x Object) Type() clib.XPathObjectType { 38 | return clib.XMLXPathObjectType(x) 39 | } 40 | 41 | // Number returns the floatval component of the Object as float64 42 | func (x Object) Number() float64 { 43 | return clib.XMLXPathObjectFloat64(x) 44 | } 45 | 46 | // Bool returns the boolval component of the Object 47 | func (x Object) Bool() bool { 48 | return clib.XMLXPathObjectBool(x) 49 | } 50 | 51 | // WrapNodeFunc is a function that gets called when Object.NodeList() 52 | // is called. This is necessary because during the call to NodeList(), 53 | // the underlying C pointers are materialized to objects in a different 54 | // package ("github.com/lestrrat-go/libxml2/dom"), and said package 55 | // uses this package... Yes, a circular dependency. 56 | // 57 | // Normally this means that both pacckages should live under the same 58 | // unified package, but in this case they are independent enough that 59 | // we have decided they warrant to be separated. 60 | // 61 | // So this WrapNodeFunc is our workaround for this problem: when 62 | // github.com/lestrrat-go/libxml2/dom is loaded, it automatically 63 | // initializes this function to an appropriate function on the fly. 64 | var WrapNodeFunc func(uintptr) (types.Node, error) 65 | 66 | // NodeList returns the list of nodes included in this Object 67 | func (x Object) NodeList() types.NodeList { 68 | if WrapNodeFunc == nil { 69 | panic("WarapNodeFunc not initialized. read XXX for details") 70 | } 71 | 72 | nl, err := clib.XMLXPathObjectNodeList(x) 73 | if err != nil { 74 | return nil 75 | } 76 | 77 | ret := make([]types.Node, len(nl)) 78 | for i, p := range nl { 79 | n, err := WrapNodeFunc(p) 80 | if err != nil { 81 | return nil 82 | } 83 | ret[i] = n 84 | } 85 | return ret 86 | } 87 | 88 | func (x Object) NodeIter() types.NodeIter { 89 | nl, err := clib.XMLXPathObjectNodeList(x) 90 | if err != nil { 91 | return NewNodeIterator(nil) 92 | } 93 | return NewNodeIterator(nl) 94 | } 95 | 96 | // String returns the stringified value of the nodes included in 97 | // this Object. If the Object is anything other than a 98 | // NodeSet, then we fallback to using fmt.Sprintf to generate 99 | // some sort of readable output 100 | func (x Object) String() string { 101 | switch x.Type() { 102 | case NodeSetType: 103 | nl := x.NodeList() 104 | if nl == nil { 105 | return "" 106 | } 107 | if x.ForceLiteral { 108 | s, err := nl.Literal() 109 | if err == nil { 110 | return s 111 | } 112 | return "" 113 | } 114 | return nl.NodeValue() 115 | default: 116 | return fmt.Sprintf("%T", x) 117 | } 118 | } 119 | 120 | // Free releases the underlying C structs 121 | func (x *Object) Free() { 122 | clib.XMLXPathFreeObject(x) 123 | } 124 | 125 | // NewExpression compiles the given XPath expression string 126 | func NewExpression(s string) (*Expression, error) { 127 | ptr, err := clib.XMLXPathCompile(s) 128 | if err != nil { 129 | return nil, errors.Wrap(err, "failed to compile expression") 130 | } 131 | 132 | return &Expression{ptr: ptr, expr: s}, nil 133 | } 134 | 135 | // Pointer returns the underlying C struct 136 | func (x *Expression) Pointer() uintptr { 137 | return x.ptr 138 | } 139 | 140 | // String returns the expression as it was given to NewExpression 141 | func (x Expression) String() string { 142 | return x.expr 143 | } 144 | 145 | // Free releases the underlying C structs in the Expression 146 | func (x *Expression) Free() { 147 | _ = clib.XMLXPathFreeCompExpr(x) 148 | } 149 | 150 | // NewContext creates a new Context, optionally providing 151 | // with a context node. 152 | // 153 | // Note that although we are specifying `n... Node` for the argument, 154 | // only the first, node is considered for the context node 155 | func NewContext(n ...types.Node) (*Context, error) { 156 | var node types.Node 157 | if len(n) > 0 { 158 | node = n[0] 159 | } 160 | 161 | ctxptr, err := clib.XMLXPathNewContext(node) 162 | if err != nil { 163 | return nil, errors.Wrap(err, "failed to get valid xpath context") 164 | } 165 | 166 | return &Context{ptr: ctxptr}, nil 167 | } 168 | 169 | // Pointer returns a pointer to the underlying C struct 170 | func (x *Context) Pointer() uintptr { 171 | return x.ptr 172 | } 173 | 174 | // SetContextNode sets or resets the context node which 175 | // XPath expressions will be evaluated against. 176 | func (x *Context) SetContextNode(n types.Node) error { 177 | return clib.XMLXPathContextSetContextNode(x, n) 178 | } 179 | 180 | // Exists compiles and evaluates the xpath expression, and returns 181 | // true if a corresponding node exists 182 | func (x *Context) Exists(xpath string) bool { 183 | list := NodeList(x.Find(xpath)) 184 | if list == nil { 185 | return false 186 | } 187 | 188 | return len(list) > 0 189 | } 190 | 191 | // Free releases the underlying C structs in the XPath 192 | func (x *Context) Free() { 193 | _ = clib.XMLXPathFreeContext(x) 194 | } 195 | 196 | // Find evaluates the expression s against the nodes registered 197 | // in x. It returns the resulting data evaluated to an Result. 198 | // 199 | // You MUST call Free() on the Result, or you will leak memory 200 | // If you don't really care for errors and just want to grab the 201 | // value of Result, checkout xpath.String(), xpath.Number(), xpath.Bool() 202 | // et al. 203 | func (x *Context) Find(s string) (types.XPathResult, error) { 204 | expr, err := NewExpression(s) 205 | if err != nil { 206 | return nil, errors.Wrap(err, "failed to compile expression") 207 | } 208 | defer expr.Free() 209 | 210 | return x.FindExpr(expr) 211 | } 212 | 213 | // FindExpr evaluates the given XPath expression and returns an Object. 214 | // You must call `Free()` on this returned object 215 | // 216 | // You MUST call Free() on the Result, or you will leak memory 217 | func (x *Context) FindExpr(expr types.XPathExpression) (types.XPathResult, error) { 218 | res, err := clib.XMLEvalXPath(x, expr) 219 | if err != nil { 220 | return nil, errors.Wrap(err, "failed to evaluate expression") 221 | } 222 | 223 | return &Object{ptr: res}, nil 224 | } 225 | 226 | // LookupNamespaceURI looksup the namespace URI associated with prefix 227 | func (x *Context) LookupNamespaceURI(prefix string) (string, error) { 228 | return clib.XMLXPathNSLookup(x, prefix) 229 | } 230 | 231 | // RegisterNS registers a namespace so it can be used in an Expression 232 | func (x *Context) RegisterNS(name, nsuri string) error { 233 | return clib.XMLXPathRegisterNS(x, name, nsuri) 234 | } 235 | -------------------------------------------------------------------------------- /xpath/xpath_test.go: -------------------------------------------------------------------------------- 1 | package xpath_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/lestrrat-go/libxml2" 7 | "github.com/lestrrat-go/libxml2/xpath" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestXPathContext(t *testing.T) { 12 | doc, err := libxml2.ParseString(``) 13 | if err != nil { 14 | t.Errorf("Failed to parse string: %s", err) 15 | } 16 | defer doc.Free() 17 | 18 | root, err := doc.DocumentElement() 19 | if !assert.NoError(t, err, "DocumentElement should succeed") { 20 | return 21 | } 22 | 23 | ctx, err := xpath.NewContext(root) 24 | if err != nil { 25 | t.Errorf("Failed to initialize XPathContext: %s", err) 26 | return 27 | } 28 | defer ctx.Free() 29 | 30 | // Use a string 31 | exprString := `/*` 32 | nodes := xpath.NodeList(ctx.Find(exprString)) 33 | if len(nodes) != 1 { 34 | t.Errorf("Expected 1 nodes, got %d", len(nodes)) 35 | return 36 | } 37 | 38 | iter := xpath.NodeIter(ctx.Find(exprString)) 39 | { 40 | count := 0 41 | for iter.Next() { 42 | iter.Node() 43 | count++ 44 | } 45 | if !assert.Equal(t, 1, count, "got 1 nodes from iterator") { 46 | return 47 | } 48 | } 49 | 50 | // Use an explicitly compiled expression 51 | expr, err := xpath.NewExpression(exprString) 52 | if err != nil { 53 | t.Errorf("Failed to compile xpath: %s", err) 54 | return 55 | } 56 | defer expr.Free() 57 | 58 | nodes = xpath.NodeList(ctx.FindExpr(expr)) 59 | if len(nodes) != 1 { 60 | t.Errorf("Expected 1 nodes, got %d", len(nodes)) 61 | return 62 | } 63 | } 64 | 65 | func TestXPathContextExpression_Number(t *testing.T) { 66 | ctx, err := xpath.NewContext() 67 | if err != nil { 68 | t.Errorf("Failed to initialize XPathContext: %s", err) 69 | return 70 | } 71 | defer ctx.Free() 72 | 73 | if !assert.Equal(t, float64(2), xpath.Number(ctx.Find("1+1")), "XPath evaluates to 2") { 74 | return 75 | } 76 | if !assert.Equal(t, float64(0), xpath.Number(ctx.Find("1<>1")), "XPath evaluates to 0") { 77 | return 78 | } 79 | } 80 | 81 | func TestXPathContextExpression_Boolean(t *testing.T) { 82 | ctx, err := xpath.NewContext() 83 | if err != nil { 84 | t.Errorf("Failed to initialize XPathContext: %s", err) 85 | return 86 | } 87 | defer ctx.Free() 88 | 89 | if !assert.True(t, xpath.Bool(ctx.Find("1=1")), "XPath evaluates to true") { 90 | return 91 | } 92 | if !assert.False(t, xpath.Bool(ctx.Find("1<>1")), "XPath evaluates to false") { 93 | return 94 | } 95 | } 96 | 97 | func TestXPathContextExpression_NodeList(t *testing.T) { 98 | doc, err := libxml2.ParseString(`bazquux`) 99 | if err != nil { 100 | t.Errorf("Failed to parse string: %s", err) 101 | } 102 | defer doc.Free() 103 | 104 | root, err := doc.DocumentElement() 105 | if !assert.NoError(t, err, "DocumentElement should succeed") { 106 | return 107 | } 108 | 109 | ctx, err := xpath.NewContext(root) 110 | if err != nil { 111 | t.Errorf("Failed to initialize XPathContext: %s", err) 112 | return 113 | } 114 | defer ctx.Free() 115 | 116 | if !assert.Len(t, xpath.NodeList(ctx.Find("/foo/bar")), 2, "XPath evaluates to 2 nodes") { 117 | return 118 | } 119 | 120 | if !assert.Len(t, xpath.NodeList(ctx.Find("/foo/bar[bogus")), 0, "XPath evaluates to 0 nodes") { 121 | return 122 | } 123 | 124 | if !assert.Equal(t, "bazquux", xpath.String(ctx.Find("/foo/bar")), "XPath evaluates to 'bazquux'") { 125 | return 126 | } 127 | 128 | if !assert.Equal(t, "", xpath.String(ctx.Find("/[bogus")), "XPath evaluates to ''") { 129 | return 130 | } 131 | } 132 | 133 | func TestXPathContextExpression_Namespaces(t *testing.T) { 134 | doc, err := libxml2.ParseString(``) 135 | if err != nil { 136 | t.Errorf("Failed to parse string: %s", err) 137 | } 138 | defer doc.Free() 139 | 140 | root, err := doc.DocumentElement() 141 | if !assert.NoError(t, err, "DocumentElement() should succeed") { 142 | return 143 | } 144 | 145 | ctx, err := xpath.NewContext(root) 146 | if err != nil { 147 | t.Errorf("Failed to initialize XPathContext: %s", err) 148 | return 149 | } 150 | defer ctx.Free() 151 | 152 | prefix := `xxx` 153 | nsuri := `http://example.com/foobar` 154 | if err := ctx.RegisterNS(prefix, nsuri); err != nil { 155 | t.Errorf("Failed to register namespace: %s", err) 156 | return 157 | } 158 | 159 | nodes := xpath.NodeList(ctx.Find(`/xxx:foo`)) 160 | if len(nodes) != 1 { 161 | t.Errorf(`Expected 1 node, got %d`, len(nodes)) 162 | return 163 | } 164 | if nodes[0].NodeName() != "foo" { 165 | t.Errorf(`Expected NodeName() "foo", got "%s"`, nodes[0].NodeName()) 166 | return 167 | } 168 | 169 | gotns, err := ctx.LookupNamespaceURI(prefix) 170 | if err != nil { 171 | t.Errorf(`LookupNamespaceURI failed: %s`, err) 172 | return 173 | } 174 | 175 | if gotns != nsuri { 176 | t.Errorf(`Expected LookupNamespaceURI("%s") "%s", got "%s"`, prefix, nsuri, gotns) 177 | return 178 | } 179 | 180 | if !ctx.Exists(`//xxx:bar/@a`) { 181 | t.Errorf(`Expected "//xxx:bar/@a" to exist`) 182 | return 183 | } 184 | if ctx.Exists(`//xxx:bar/@b`) { 185 | t.Errorf(`Expected "//xxx:bar/@b" to NOT exist`) 186 | return 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /xsd/interface.go: -------------------------------------------------------------------------------- 1 | package xsd 2 | 3 | import "github.com/lestrrat-go/libxml2/internal/option" 4 | 5 | // Schema represents an XML schema. 6 | type Schema struct { 7 | ptr uintptr // *C.xmlSchema 8 | } 9 | 10 | // SchemaValidationError is returned when the Validate() function 11 | // finds errors. When there are multiple errors, you may access 12 | // them using the Errors() method 13 | type SchemaValidationError struct { 14 | errors []error 15 | } 16 | 17 | type Option = option.Interface 18 | -------------------------------------------------------------------------------- /xsd/option.go: -------------------------------------------------------------------------------- 1 | package xsd 2 | 3 | import ( 4 | "net/url" 5 | "os" 6 | "path/filepath" 7 | 8 | "github.com/lestrrat-go/libxml2/internal/option" 9 | ) 10 | 11 | // WithPath provides a hint to the XSD parser as to where the 12 | // document being parsed is located at. 13 | // 14 | // This is useful when you must resolve relative paths inside a 15 | // document, because to use relative paths the parser needs to 16 | // know the reference location (i.e. location of the document 17 | // being parsed). In case where you are parsing using `ParseFromFile()` 18 | // this is handled automatically by the `ParseFromFile` method, 19 | // but if you are using `Parse` method this is required 20 | // 21 | // If the path is provided as a relative path, the current directory 22 | // should be obtainable via `os.Getwd` when this call is made, otherwise 23 | // path resolution may fail in weird ways. 24 | func WithPath(path string) Option { 25 | if !filepath.IsAbs(path) { 26 | if curdir, err := os.Getwd(); err == nil { 27 | path = filepath.Join(curdir, path) 28 | } 29 | } 30 | 31 | return WithURI( 32 | (&url.URL{ 33 | Scheme: `file`, 34 | Path: path, 35 | }).String(), 36 | ) 37 | } 38 | 39 | func WithURI(v string) Option { 40 | return option.New(option.OptKeyWithURI, v) 41 | } 42 | -------------------------------------------------------------------------------- /xsd/xsd.go: -------------------------------------------------------------------------------- 1 | // Package xsd contains some of the tools available from libxml2 2 | // that allows you to validate your XML against an XSD 3 | // 4 | // This is basically all you need to do: 5 | // 6 | // schema, err := xsd.Parse(xsdsrc) 7 | // if err != nil { 8 | // panic(err) 9 | // } 10 | // defer schema.Free() 11 | // if err := schema.Validate(doc); err != nil{ 12 | // for _, e := range err.(SchemaValidationErr).Error() { 13 | // println(e.Error()) 14 | // } 15 | // } 16 | package xsd 17 | 18 | import ( 19 | "github.com/lestrrat-go/libxml2/clib" 20 | "github.com/lestrrat-go/libxml2/types" 21 | "github.com/pkg/errors" 22 | ) 23 | 24 | const ValueVCCreate = 1 25 | 26 | // Parse is used to parse an XML Schema Document to produce a 27 | // Schema instance. Make sure to call Free() on the instance 28 | // when you are done with it. 29 | 30 | func Parse(buf []byte, options ...Option) (*Schema, error) { 31 | // xsd.WithURI(...) 32 | sptr, err := clib.XMLSchemaParse(buf, options...) 33 | if err != nil { 34 | return nil, errors.Wrap(err, "failed to parse input") 35 | } 36 | 37 | return &Schema{ptr: sptr}, nil 38 | } 39 | 40 | // ParseFromFile is used to parse an XML schema using only the file path. 41 | // Make sure to call Free() on the instance when you are done with it. 42 | func ParseFromFile(path string) (*Schema, error) { 43 | sptr, err := clib.XMLSchemaParseFromFile(path) 44 | if err != nil { 45 | return nil, errors.Wrap(err, "failed to parse input from file") 46 | } 47 | 48 | return &Schema{ptr: sptr}, nil 49 | } 50 | 51 | // Pointer returns the underlying C struct 52 | func (s *Schema) Pointer() uintptr { 53 | return s.ptr 54 | } 55 | 56 | // Free frees the underlying C struct 57 | func (s *Schema) Free() { 58 | if err := clib.XMLSchemaFree(s); err != nil { 59 | return 60 | } 61 | s.ptr = 0 62 | } 63 | 64 | // Validate takes in a XML document and validates it against 65 | // the schema. If there are any problems, and error is 66 | // returned. 67 | func (s *Schema) Validate(d types.Document, options ...int) error { 68 | errs := clib.XMLSchemaValidateDocument(s, d, options...) 69 | if errs == nil { 70 | return nil 71 | } 72 | 73 | return SchemaValidationError{errors: errs} 74 | } 75 | 76 | // Error method fulfils the error interface 77 | func (sve SchemaValidationError) Error() string { 78 | return "schema validation failed" 79 | } 80 | 81 | // Errors returns the list of errors found 82 | func (sve SchemaValidationError) Errors() []error { 83 | return sve.errors 84 | } 85 | -------------------------------------------------------------------------------- /xsd_test.go: -------------------------------------------------------------------------------- 1 | package libxml2_test 2 | 3 | import ( 4 | "io" 5 | "net/http" 6 | "net/http/httptest" 7 | "os" 8 | "path/filepath" 9 | "testing" 10 | 11 | "github.com/lestrrat-go/libxml2" 12 | "github.com/lestrrat-go/libxml2/xsd" 13 | "github.com/stretchr/testify/assert" 14 | ) 15 | 16 | func TestXSD(t *testing.T) { 17 | xsdfile := filepath.Join("test", "xmldsig-core-schema.xsd") 18 | f, err := os.Open(xsdfile) 19 | if !assert.NoError(t, err, "open schema") { 20 | return 21 | } 22 | defer f.Close() 23 | 24 | buf, err := io.ReadAll(f) 25 | if !assert.NoError(t, err, "reading from schema") { 26 | return 27 | } 28 | 29 | s, err := xsd.Parse(buf) 30 | if !assert.NoError(t, err, "parsing schema") { 31 | return 32 | } 33 | defer s.Free() 34 | 35 | func() { 36 | d, err := libxml2.ParseString(``) 37 | if !assert.NoError(t, err, "parsing XML") { 38 | return 39 | } 40 | defer d.Free() 41 | 42 | err = s.Validate(d) 43 | if !assert.Error(t, err, "s.Validate should fail") { 44 | return 45 | } 46 | 47 | serr, ok := err.(xsd.SchemaValidationError) 48 | if !assert.True(t, ok, "error is xsd.SchemaValidationErr") { 49 | return 50 | } 51 | 52 | if !assert.Len(t, serr.Errors(), 1, "there's one error") { 53 | return 54 | } 55 | for _, e := range serr.Errors() { 56 | t.Logf("err (OK): '%s'", e) 57 | } 58 | }() 59 | 60 | func() { 61 | const src = ` 62 | 63 | 64 | 67 | 69 | 70 | 71 | 73 | 74 | 76 | uooqbWYa5VCqcJCbuymBKqm17vY= 77 | 78 | 79 | 80 | KedJuTob5gtvYx9qM3k3gm7kbLBwVbEQRl26S2tmXjqNND7MRGtoew== 81 | 82 | 83 | 84 | 85 |

86 | /KaCzo4Syrom78z3EQ5SbbB4sF7ey80etKII864WF64B81uRpH5t9jQTxe 87 | Eu0ImbzRMqzVDZkVG9xD7nN1kuFw== 88 |

89 | li7dzDacuo67Jg7mtqEm2TRuOMU= 90 | Z4Rxsnqc9E7pGknFFH2xqaryRPBaQ01khpMdLRQnG541Awtx/ 91 | XPaF5Bpsy4pNWMOHCBiNU0NogpsQW5QvnlMpA== 92 | 93 | qV38IqrWJG0V/ 94 | mZQvRVi1OHw9Zj84nDC4jO8P0axi1gb6d+475yhMjSc/ 95 | BrIVC58W3ydbkK+Ri4OKbaRZlYeRA== 96 | 97 |
98 |
99 |
100 |
101 | ` 102 | d, err := libxml2.ParseString(src) 103 | if !assert.NoError(t, err, "parsing XML") { 104 | return 105 | } 106 | defer d.Free() 107 | 108 | err = s.Validate(d) 109 | if !assert.NoError(t, err, "s.Validate should pass") { 110 | if serr, ok := err.(xsd.SchemaValidationError); ok { 111 | for _, e := range serr.Errors() { 112 | t.Logf("err: %s", e) 113 | } 114 | } 115 | return 116 | } 117 | }() 118 | } 119 | 120 | func TestXSDDefaultValue(t *testing.T) { 121 | const schemasrc = ` 122 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | ` 143 | const docsrc = ` 144 | 145 | 146 | 147 | ` 148 | 149 | schema, err := xsd.Parse([]byte(schemasrc)) 150 | if !assert.NoError(t, err, `xsd.Parse should succeed`) { 151 | return 152 | } 153 | defer schema.Free() 154 | 155 | doc, err := libxml2.ParseString(docsrc) 156 | if !assert.NoError(t, err, "parsing XML") { 157 | return 158 | } 159 | defer doc.Free() 160 | if !assert.NoError(t, schema.Validate(doc, xsd.ValueVCCreate), `schema.Validate should succeed`) { 161 | return 162 | } 163 | 164 | t.Logf("%s", doc.String()) 165 | } 166 | 167 | func TestGHIssue67(t *testing.T) { 168 | t.Run("Local validation", func(t *testing.T) { 169 | const schemafile = "test/schema/projects/go_libxml2_local.xsd" 170 | const docfile = "test/go_libxml2_local.xml" 171 | 172 | schemasrc, err := os.ReadFile(schemafile) 173 | if !assert.NoError(t, err, `failed to read xsd file`) { 174 | return 175 | } 176 | 177 | docsrc, err := os.ReadFile(docfile) 178 | if !assert.NoError(t, err, `failed to read xml file`) { 179 | return 180 | } 181 | 182 | schema, err := xsd.Parse(schemasrc, xsd.WithPath(schemafile)) 183 | if !assert.NoError(t, err, `xsd.Parse should succeed`) { 184 | return 185 | } 186 | defer schema.Free() 187 | 188 | doc, err := libxml2.Parse(docsrc) 189 | if !assert.NoError(t, err, "parsing XML") { 190 | return 191 | } 192 | defer doc.Free() 193 | if !assert.NoError(t, schema.Validate(doc, xsd.ValueVCCreate), `schema.Validate should succeed`) { 194 | return 195 | } 196 | 197 | t.Logf("%s", doc.String()) 198 | }) 199 | t.Run("Remote validation", func(t *testing.T) { 200 | curdir, err := os.Getwd() 201 | if !assert.NoError(t, err, `os.Getwd failed`) { 202 | return 203 | } 204 | 205 | srv := httptest.NewServer(http.FileServer(http.Dir(curdir))) 206 | defer srv.Close() 207 | 208 | var schemafile = srv.URL + "/test/schema/projects/go_libxml2_remote.xsd" 209 | const docfile = "test/go_libxml2_remote.xml" 210 | 211 | //nolint:noctx 212 | res, err := http.Get(schemafile) 213 | if !assert.NoError(t, err, `failed to fetch xsd file`) { 214 | return 215 | } 216 | 217 | schemasrc, err := io.ReadAll(res.Body) 218 | defer res.Body.Close() 219 | if !assert.NoError(t, err, `failed to read xsd file`) { 220 | return 221 | } 222 | 223 | docsrc, err := os.ReadFile(docfile) 224 | if !assert.NoError(t, err, `failed to read xml file`) { 225 | return 226 | } 227 | 228 | schema, err := xsd.Parse(schemasrc, xsd.WithURI(schemafile)) 229 | if !assert.NoError(t, err, `xsd.Parse should succeed`) { 230 | return 231 | } 232 | defer schema.Free() 233 | 234 | doc, err := libxml2.Parse(docsrc) 235 | if !assert.NoError(t, err, "parsing XML") { 236 | return 237 | } 238 | defer doc.Free() 239 | if !assert.NoError(t, schema.Validate(doc, xsd.ValueVCCreate), `schema.Validate should succeed`) { 240 | return 241 | } 242 | 243 | t.Logf("%s", doc.String()) 244 | }) 245 | } 246 | --------------------------------------------------------------------------------