├── .github ├── dependabot.yml └── workflows │ ├── CI.yml │ ├── gh-pages.yml │ └── windows.yml ├── .gitignore ├── .rustfmt.toml ├── .travis.yml ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── appveyor.yml ├── benches └── parsing_benchmarks.rs ├── build.rs ├── examples ├── schema_example.rs ├── tree_example.rs └── xpath_example.rs ├── scripts └── doc-upload.sh ├── src ├── bindings.rs ├── c_helpers.rs ├── default_bindings.rs ├── error.rs ├── lib.rs ├── parser.rs ├── readonly.rs ├── readonly │ └── tree.rs ├── schemas │ ├── common.rs │ ├── mod.rs │ ├── parser.rs │ ├── schema.rs │ └── validation.rs ├── tree │ ├── document.rs │ ├── mod.rs │ ├── namespace.rs │ ├── node.rs │ └── nodetype.rs ├── wrapper.h └── xpath.rs └── tests ├── VALGRIND.md ├── base_tests.rs ├── codec_tests.rs ├── mutability_guards.rs ├── readonly_tests.rs ├── resources ├── as_html.xml ├── as_html_result.xml ├── empty_tags.xml ├── empty_tags_result.xml ├── example.html ├── file01.xml ├── file01_ns.xml ├── file01_utf16be.xml ├── file01_utf16be_bom.xml ├── file01_utf16le.xml ├── file01_utf16le_bom.xml ├── file01_utf8_bom.xml ├── file01_🔥🔥🔥.xml ├── file02.xml ├── ids.xml ├── schema.xml ├── schema.xsd ├── simple_namespaces.xml └── unformatted.xml ├── results └── README.md ├── schema_tests.rs ├── tree_tests.rs └── xpath_tests.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: CI Linux 4 | 5 | jobs: 6 | test: 7 | name: rust-libxml CI 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | with_default_bindings: [false, true] 12 | steps: 13 | - name: install dependencies 14 | uses: ryankurte/action-apt@v0.2.0 15 | with: 16 | packages: "libxml2-dev" 17 | - name: Set up LIBXML2 env var if compiling with the default bindings 18 | run: echo "LIBXML2=$(pkg-config libxml-2.0 --variable=libdir)/libxml2.so" >> "$GITHUB_ENV" 19 | if: ${{ matrix.with_default_bindings }} 20 | - uses: actions/checkout@v2 21 | - uses: actions-rs/toolchain@v1 22 | with: 23 | profile: minimal 24 | toolchain: stable 25 | override: true 26 | - name: run tests 27 | uses: actions-rs/cargo@v1 28 | with: 29 | command: test 30 | 31 | test-newer-libxml2: 32 | strategy: 33 | matrix: 34 | libxml_version: ["2.12.9", "2.13.8","2.14.1"] 35 | name: With libxml ${{ matrix.libxml_version }} 36 | runs-on: ubuntu-latest 37 | steps: 38 | - name: install dependencies 39 | uses: ryankurte/action-apt@v0.2.0 40 | with: 41 | packages: "libpython3-dev" 42 | - uses: actions/checkout@v2 43 | - name: Install libxml ${{ matrix.libxml_version }} by hand 44 | run: | 45 | wget https://download.gnome.org/sources/libxml2/$(echo ${{ matrix.libxml_version }} | sed -e 's/\.[0-9]*$//')/libxml2-${{ matrix.libxml_version }}.tar.xz 46 | tar xf libxml2-${{ matrix.libxml_version }}.tar.xz 47 | cd libxml2-${{ matrix.libxml_version }} 48 | ./configure 49 | make 50 | sudo make install 51 | - uses: actions-rs/toolchain@v1 52 | with: 53 | profile: minimal 54 | toolchain: stable 55 | override: true 56 | - name: run tests 57 | uses: actions-rs/cargo@v1 58 | with: 59 | command: test 60 | env: 61 | LD_LIBRARY_PATH: /usr/local/lib -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | 6 | name: Publish Docs 7 | jobs: 8 | publish_docs: 9 | if: github.repository == 'KWARC/rust-libxml' 10 | name: Publish Docs 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: install dependencies 14 | uses: ryankurte/action-apt@v0.2.0 15 | with: 16 | packages: "libxml2-dev" 17 | - name: Set up LIBXML2 env var if compiling with the default bindings 18 | run: echo "LIBXML2=$(pkg-config libxml-2.0 --variable=libdir)/libxml2.so" >> "$GITHUB_ENV" 19 | - uses: actions-rs/toolchain@v1 20 | with: 21 | profile: minimal 22 | toolchain: stable 23 | override: true 24 | - name: Checkout sources 25 | uses: actions/checkout@v4 26 | - name: Build Documentation 27 | uses: actions-rs/cargo@v1 28 | with: 29 | command: doc 30 | args: --all --no-deps 31 | 32 | - name: Deploy Documentation 33 | env: 34 | PUBLISH_BRANCH: gh-pages 35 | PUBLISH_DIR: ./target/doc 36 | run: | 37 | git config user.name "${GITHUB_ACTOR}" 38 | git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" 39 | git fetch origin ${PUBLISH_BRANCH} 40 | git checkout ${PUBLISH_BRANCH} 41 | find "${GITHUB_WORKSPACE}/${PUBLISH_DIR}" -maxdepth 1 | tail -n +2 | xargs -I % cp -rf % "." 42 | git add --all 43 | git commit --allow-empty -m "Automated deployment: $(date -u) ${GITHUB_SHA}" 44 | git push origin ${PUBLISH_BRANCH} -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: CI Windows 4 | 5 | jobs: 6 | test-default-windows: 7 | name: Windows vcpkg (default) 8 | runs-on: windows-latest 9 | env: 10 | VCPKGRS_DYNAMIC: 1 11 | VCPKG_DEFAULT_TRIPLET: x64-windows 12 | VCPKG_ROOT: C:\vcpkg 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Setup vcpkg libxml2 Cache 16 | uses: actions/cache@v4 17 | id: vcpkg-cache 18 | with: 19 | path: C:\vcpkg 20 | key: vcpkg-libxml2 21 | - name: Install libxml2 with vcpkg 22 | run: | 23 | vcpkg install libxml2:x64-windows 24 | vcpkg integrate install 25 | - name: run tests 26 | uses: actions-rs/cargo@v1 27 | with: 28 | command: test 29 | 30 | test-mingw64-windows: 31 | name: Windows (mingw64) 32 | runs-on: windows-latest 33 | defaults: 34 | run: 35 | shell: msys2 {0} 36 | steps: 37 | - uses: actions/checkout@v4 38 | - uses: msys2/setup-msys2@v2 39 | with: 40 | path-type: minimal 41 | release: false 42 | update: false 43 | msystem: MINGW64 44 | install: >- 45 | mingw64/mingw-w64-x86_64-pkg-config 46 | mingw64/mingw-w64-x86_64-libxml2 47 | - name: Install stable windows-gnu Rust toolchain 48 | uses: actions-rs/toolchain@v1 49 | with: 50 | toolchain: stable-x86_64-pc-windows-gnu 51 | target: x86_64-pc-windows-gnu 52 | override: true 53 | - name: Ensure mingw64 pkg-config is in path 54 | run: echo "C:\msys64\mingw64\bin" >> "$GITHUB_PATH" 55 | - name: run tests 56 | uses: actions-rs/cargo@v1 57 | with: 58 | command: test -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | Cargo.lock 7 | 8 | # Executables 9 | *.exe 10 | 11 | # Vim swap files 12 | *.swp 13 | 14 | # VSCode project folder 15 | .vscode/ 16 | 17 | # Generated by Cargo 18 | /target/ 19 | 20 | # Test results 21 | /tags 22 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | # Detailed instructions at: https://github.com/rust-lang-nursery/rustfmt/blob/master/Configurations.md 2 | tab_spaces = 2 -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | dist: trusty 3 | language: rust 4 | rust: 5 | - stable 6 | - beta 7 | - nightly 8 | addons: 9 | apt: 10 | packages: libxml2-dev 11 | after_success: ./scripts/doc-upload.sh 12 | env: 13 | global: 14 | - RUST_TEST_THREADS=1 15 | - SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt 16 | - secure: "kuvtFj8UpLj0NQhk3a9PDLRhXq4cDhd9UNT9Sn0S2TBFF23AsOX2ffXN+5ey/2mfYEk18d5MpUM15Ha/7PbxTebkqZxhFeZimrqAHTC6605AUICmuJv06tmLYetoqgNvyU0Tgt4MCYblja8oJCs/TkEFtbVxZX0uZ2lgsyLG8PQyROEyiaPQyeld7rd4k6s+13w4EOFO0kGh992BmOAUiICqsDqKYddaI7KL49b4AwkGrfaXAf/mtlJT7E4NlloI/5AmCYlQdYwQui3SJojvzd9lDBF7syuPesgPz3S6dlzr80uKVuI0rVx6K6Xo+vzZLWP4HZExIjF12G8DuBKAWLmoN/QfR+ipkXGrTau78+8Jp0qCQsy4ti4rY8PvhwipkdGS+pUV8a06UwTZARLnknnhfqKFoNvIUjLwdu1HVwftXtIgdFtU7RZMJfUxdq8/dNdXNerAm4c3kdwcKl4nP6Fnus4OWkeekuuCCObcBI1qlTFQT2pp5ae+3oe3kq1Srq8/HrLGlQkcWlbYsB9/mKuSxqRQxTbGli3eWBALecpeCQPdrxygFarX6q18HTwoqprvbcCp9BM4soADv5gVUeiOuYmvn2DbREMMZJKd6cpcXpXem4epK+MxpXh7jhQ60xw9GuIIeZxcgyKf+pP3lRoII3nPZKoecUoDcJct1l4=" 17 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [0.3.6] (in development) 4 | 5 | ## [0.3.5] (2025-28-04) 6 | 7 | This release stabilizes the new "bindgen during build" approach of v0.3.4 to more platforms. 8 | 9 | Support for newer libxml2 versions has been improved. 10 | - CI support for v2.12.9, v2.13.8, v2.14.1 11 | 12 | Thanks go to @wetneb and @charmitro for contributing. 13 | 14 | ### Added 15 | 16 | * cargo build: expose libxml2 version to main build script; 17 | * creating a new `Parser` now initializes via `bindings::xmlInitParser`, called at most once. 18 | 19 | ### Changes 20 | 21 | * cargo build: mark `max_align_t` as opaque to fix i386 build failure 22 | * cfg: adapt mutability of error pointer depending on libxml version 23 | * change the return type of `xmlGetNodeType` from `u32` to the more portable `bindings::xmlElementType` 24 | * change the argument type of `NodeType::from_int` from `u32` to the more portable `bindings::xmlElementType` 25 | * protect `Schema` initialization to be closer to thread-safe (note that this wrapper is NOT thread-safe in general) 26 | 27 | ### Removed 28 | * The use of `SchemaParserContext` and `SchemaValidationContext` is currently NOT thread safe. 29 | Hence, The `schema_test` has been weakened to run in a single thread (future improvements welcome). 30 | 31 | ## [0.3.4] (2025-16-04) 32 | 33 | Thanks go to @wetneb, @anwaralameddin, @rudolphfroger, @jcamiel, @imcsk8 for contributions to this release. 34 | 35 | ### Added 36 | 37 | * Node methods: `get_property_no_ns` (alias: `get_attribute_no_ns`), `get_properties_ns` (alias: `get_attributes_ns`), `has_property_no_ns` (alias: `has_attribute_no_ns`), `remove_property_no_ns` (alias: `remove_attribute_no_ns`), `get_property_node_ns` (alias: `get_attribute_node_ns`), `get_property_node_no_ns` (alias: `get_attribute_node_no_ns`) 38 | * Added implementations of `Hash`, `PartialEq` and `Eq` traits for `Namespace` 39 | 40 | ### Changed 41 | 42 | * Call bindgen at build time on Unix platforms (thanks @wetneb) 43 | 44 | ## [0.3.3] 2023-17-07 45 | 46 | ### Changed 47 | 48 | * Update the implementation of `StructuredError` so that all validation errors are returned from the validation methods present on `SchemaValidationContext`. Previously, all returned validation errors were identical due to libxml reusing a global memory address for all reported errors. Thanks @JDSeiler ! 49 | * The `message` method of `StructuredError` has been deprecated. 50 | 51 | ## [0.3.2] 2023-07-05 52 | 53 | ### Added 54 | 55 | * XPath: `Context::findvalues`, with optional node-bound evaluation, obtaining `String` values. 56 | 57 | * `Node::findvalues` method for direct XPath search obtaining `String` values, without first explicitly instantiating a `Context`. Reusing a `Context` remains more efficient. 58 | 59 | ## [0.3.1] 2022-26-03 60 | 61 | * Added: Thanks to @lepapareil, @hurlenko and @ballsteve for contributing installation docs for Windows and MacOS. 62 | * Added: `Node` and `RoNode` now have `has_property` (alias `has_attribute`) and `has_property_ns` (alias `has_attribute_ns`) to check attribute presence without allocating the value. 63 | * Added: `xpath::is_well_formed_xpath`, thanks @bcpeinhardt ! 64 | 65 | ## [0.3.0] 2021-27-05 66 | 67 | * Change `Parser::parse_file/string_with_encoding` to `Parser::parse_file/string_with_options`. 68 | 69 | * Introduce `ParserOptions` which encapsulates the forced encoding setting together with libxml2s HTML and XML parser options. 70 | 71 | * For systems without a pkg-config capability, we now use the `LIBXML2` environment variable to detect an installed libxml2 toolchain. (thanks @przygienda !) 72 | 73 | ## [0.2.16] 2021-31-01 74 | 75 | ### Added 76 | 77 | * More element-oriented methods: `get_next_element_sibling`, `get_prev_element_sibling`, `get_last_element_child`, added to both `Node` and `RoNode`. 78 | * `Document::ronode_to_string` for printing read-only nodes 79 | * `RoNode::node_ptr` for getting the internal libxml2 raw pointer of a read-only node 80 | 81 | ## [0.2.15] 2020-28-09 82 | 83 | Thanks to @JoshuaNitschke for contributing OS-based package detection for the native libxml2! Also thanks to @coding-yogi, @ignatenkobrain and @simoin for investigating platform-specific issues with this crate, some of which expect resolution in upcoming versions. 84 | 85 | ### Added 86 | 87 | * Support for x64-Windows use via the vcpkg package manager (with new CI monitoring via appveyor). 88 | 89 | ### Changed 90 | 91 | * Added back an `Error` trait implementation for `XmlParseError` 92 | 93 | ### Removed 94 | 95 | * Dropped a large number of memory layout tests that were auto-generated by bindgen, until we have a more sophisticated test-generation setup that can enforce multiple architectures. Ideally this has no day-to-day impact and just makes portability easier in the short-term. 96 | 97 | ## [0.2.14] 2020-27-03 98 | 99 | ### Changed 100 | 101 | More consistently use `c_char` to successfully compile on ARM targets 102 | 103 | ## [0.2.13] 2020-16-01 104 | 105 | Thanks to @jangernert for the upgrades to `Document` serialization. 106 | Thanks to @lweberk for contributing the `Schema` featureset and to @cbarber for refining the FFI interop. 107 | 108 | ### Added 109 | 110 | * `Document::to_string_with_options` allowing to customize document serialization 111 | * `Document::SaveOptions` containing the currently supported serialization options, as provided internally by libxml 112 | * `Schema` holding and managing `xmlSchemaPtr` as created while parsing by `SchemaParserContext` 113 | * `SchemaParserContext` holding source of XSD and parsing into a `Schema` while gathering and –in case returning– errors that arise from the XSD parser across the FFI to libxml 114 | * `SchemaValidationContext` holding the `Schema` from resulting `SchemaParserContext` parse and offering validation methods for `Document`, `Node` or file path to XML, while gathering and –in case returning– validation errors from the XML validator across the FFI to libxml 115 | 116 | ### Changed 117 | 118 | * the `Document::to_string()` serialization method is now implemented through `fmt::Display` and no longer takes an optional boolean flag. The default behavior is now unformatted serialization - previously `to_string(false)`, while `to_string(true)` can be realized via 119 | 120 | ``` 121 | .to_string_with_options(SaveOptions { format: true, ..SaveOptions::default()})` 122 | ``` 123 | 124 | ## [0.2.12] 2019-16-06 125 | 126 | Thanks to @Alexhuszagh for contributing all enhancements for the `0.2.12` release! 127 | 128 | ### Added 129 | 130 | * BOM-aware Unicode support 131 | * New `Parser` methods allowing to specify an explicit encoding: `parse_file_with_encoding`, `parse_string_with_encoding`, `is_well_formed_html_with_encoding` 132 | 133 | ### Changed 134 | 135 | * Default encodings in `Parser` are now left for libxml to guess internally, rather than defaulted to `utf-8`. 136 | 137 | ## [0.2.11] 2019-15-04 138 | 139 | ### Added 140 | 141 | * `RoNode::to_hashable` and `RoNode::null` for parity with existing `Node`-leveraging applications 142 | 143 | ## [0.2.10] 2019-14-04 144 | 145 | ### Added 146 | 147 | * `RoNode` primitive for simple and efficient **read-only** parallel processing 148 | * Benchmarking a 120 MB XML document shows a twenty five fold speedup, when comparing `Node` to parallel rayon processing over `RoNode` with a 32 logical core desktop 149 | * While `RoNode` is added as an experiment for high performance read-only scans, any mutability requires using `Node` and incurring a bookkeeping cost of safety at runtime. 150 | * Introduced benchmarking via `criterion`, only installed during development. 151 | * `benches/parsing_benchmarks` contains examples of parallel scanning via `rayon` iterators. 152 | * added `Document::get_root_readonly` method for obtaining a `RoNode` root. 153 | * added `Context::node_evaluate_readonly` method for searching over a `RoNode` 154 | * added `Context::get_readonly_nodes_as_vec` method for collecting xpath results as `RoNode` 155 | 156 | ## [0.2.9] 2019-28-03 157 | 158 | * Squash memory leak in creating new `Node`s from the Rust API 159 | * Safely unlink `Node`s obtained via XPath searches 160 | 161 | ## [0.2.8] 2019-25-03 162 | 163 | ### Changed 164 | 165 | Minor internal changes to make the crate compile more reliably under MacOS, and other platforms which enable the `LIBXML_THREAD_ENABLED` compile-time flag. Thank you @caldwell ! 166 | 167 | ## [0.2.7] 2019-09-03 168 | 169 | ### Added 170 | 171 | * implement and test `replace_child_node` for element nodes 172 | 173 | ## [0.2.6] 2018-07-12 174 | 175 | * Internal update to Rust 2018 Edition 176 | * fix deallocation bugs with `.import_node()` and `.get_namespaces()` 177 | 178 | ## [0.2.5] 2018-26-09 179 | 180 | ### Added 181 | 182 | * `Node::null` placeholder that avoids the tricky memory management of `Node::mock` that can lead to memory leaks. Really a poor substitute for the better `Option` type with a `None` value, which is **recommended** instead. 183 | 184 | ## [0.2.4] 2018-24-09 185 | 186 | ### Added 187 | 188 | * `Context::from_node` method for convenient XPath context initialization via a Node object. Possible as nodes keep a reference to their owner `Document` object. 189 | 190 | ### Changed 191 | 192 | * Ensured memory safety of cloning xpath `Context` objects 193 | * Switched to using `Weak` references to the owner document, in `Node`, `Context` and `Object`, to prevent memory leaks in mutli-document pipelines. 194 | * Speedup to XPath node retrieval 195 | 196 | ## [0.2.3] 2018-19-09 197 | 198 | ### Added 199 | 200 | * `Node::findnodes` method for direct XPath search, without first explicitly instantiating a `Context`. Reusing a `Context` remains more efficient. 201 | 202 | ## [0.2.2] 2018-23-07 203 | 204 | * Expose the underlying `libxml2` data structures in the public crate interface, to enable a first [libxslt](https://crates.io/crates/libxslt) crate proof of concept. 205 | 206 | ## [0.2.1] 2018-23-07 207 | 208 | ### Added 209 | 210 | * `Node::set_node_rc_guard` which allows customizing the reference-count mutability threshold for Nodes. 211 | * serialization tests for `Document` 212 | * (crate internal) full set of libxml2 bindings as produced via `bindgen` (see #39) 213 | * (crate internal) using libxml2's type language in the wrapper Rust modules 214 | * (crate internal) setup bindings for reuse in higher-level crates, such as libxslt 215 | 216 | ### Changed 217 | 218 | * `NodeType::from_c_int` renamed to `NodeType::from_int`, now accepting a `u32` argument 219 | 220 | ### Removed 221 | 222 | * Removed dependence on custom C code; also removed gcc from build dependencies 223 | 224 | ## [0.2.0] 2018-19-07 225 | 226 | This release adds fundamental breaking changes to the API. The API continues to be considered unstable until the `1.0.0` release. 227 | 228 | ### Added 229 | 230 | * `dup` and `dup_from` methods for deeply duplicating a libxml2 document 231 | * `is_unlinked` for quick check if a `Node` has been unlinked from a parent 232 | 233 | ### Changed 234 | 235 | * safe API for `Node`s and `Document`s, with automatic pointer bookkeeping and memory deallocation, by @triptec 236 | * `Node`s are now bookkept by their owning document 237 | * libxml2 low-level memory deallocation is postponed until the `Document` is dropped, with the exception of unlinked nodes, who are deallocated on drop. 238 | * `Document::get_root_element` now has an option type, and returns `None` for an empty Document 239 | * `Node::mock` now takes owner `Document` as argument 240 | * proofed tests with `valgrind` and removed all obvious memory leaks 241 | * All node operations that modify a `Node` now both require a `&mut Node` argument and return a `Result` type. 242 | * Full list of changed signatures in Node: `remove_attribute`, `remove_property`, `set_name`, `set_content`, `set_property`, `set_property_ns`, `set_attribute`, `set_attribute_ns`, `remove_attribute`, `set_namespace`, `recursively_remove_namespaces`, `append_text` 243 | * Tree transforming operations that use operate on `&mut self`, and no longer return a Node if the return value is identical to the argument. 244 | * Changed signatures: `add_child`, `add_prev_sibling`, `add_next_sibling` 245 | * `Result` types should always be checked for errors, as mutability conflicts are reported during runtime. 246 | 247 | ### Removed 248 | 249 | * `global` module, which attempted to manage global libxml state for threaded workflows. May be readed after the API stabilizes 250 | 251 | ## [0.1.2] 2018-12-01 252 | 253 | * We welcome Andreas (@triptec) to the core developer team! 254 | 255 | ### Added 256 | 257 | * Workaround `.free` method for freeing nodes, until the `Rc>` free-on-drop solution by Andreas is introduced in 0.2 258 | 259 | ## [0.1.1] 2017-18-12 260 | 261 | ### Added 262 | 263 | * `get_first_element_child` - similar to `get_first_child` but only returns XML Elements 264 | * `is_element_node` - check if a given `Node` is an XML Element 265 | 266 | ### Changed 267 | 268 | * Requiring owned `Node` function arguments only when consumed - `add_*` methods largely take `&Node` now. 269 | 270 | ## [0.1.0] 2017-09-11 271 | 272 | Pushing up release to a 0.1, as contributor interest is starting to pick up, and the 0. version were getting a bit silly/wrong. 273 | 274 | ### Added 275 | 276 | * Node methods: `unbind_node`, `recursively_remove_namespaces`, `set_name`, 277 | * Document methods: `import_node` 278 | 279 | ### Changed 280 | 281 | * Updated gcc build to newer incantation, upped dependency version. 282 | 283 | ## [0.0.75] 2017-04-06 284 | 285 | ### Added 286 | 287 | * Node methods: `get_namespace_declarations`, `get_property_ns` (alias: `get_attribute_ns`), `remove_property` (alias: `remove_attribute`), `get_attribute_node`, `get_namespace`, `lookup_namespace_prefix`, `lookup_namespace_uri` 288 | 289 | * XPath methods: `findvalue` and `findnodes`, with optional node-bound evaluation. 290 | 291 | ### Changed 292 | 293 | * The Node setter for a namespaced attribute is now `set_property_ns` (alias: `set_attribute_ns`) 294 | * Node set_* methods are now consistently defined on `&mut self` 295 | * Refactored wrongly used `url` to `href` for namespace-related Node ops. 296 | * Fixed bug with Node's `get_content` method always returning empty 297 | * More stable `append_text` for node, added tests 298 | 299 | ## [0.0.74] 2016-25-12 300 | 301 | ### Changed 302 | 303 | * Namespace::new only requires a borrowed &Node now 304 | * Fixed bug with wrongly discarded namespace prefixes on Namespace::new 305 | 306 | ### Added 307 | 308 | * Namespace methods: `get_prefix`, `get_url` 309 | 310 | ## [0.0.73] 2016-25-12 311 | 312 | ### Added 313 | 314 | * Document method: `as_node` 315 | 316 | ## [0.0.72] 2016-25-12 317 | 318 | ### Added 319 | 320 | * Node methods: `get_last_child`, `get_child_nodes`, `get_child_elements`, `get_properties`, `get_attributes` 321 | 322 | ## [0.0.71] 2016-29-11 323 | 324 | ### Changed 325 | 326 | * Namespace::new takes Node argument last 327 | 328 | ### Added 329 | 330 | * Node namespace accessors - `set_namespace`, `get_namespaces`, `set_ns_attribute`, `set_ns_property` 331 | * Namespace registration for XPath 332 | 333 | ## [0.0.7] 2016-27-11 334 | 335 | ### Changed 336 | 337 | * stricter dependency spec in Cargo.toml 338 | * cargo clippy compliant 339 | * Document's `get_root_element` returns the document pointer as a Node for empty documents, type change from `Option` to simple `` 340 | 341 | ### Added 342 | 343 | * Node accessors: `set_attribute`, `get_attribute`, `set_property` (the `attribute` callers are simple aliases for `property`) 344 | * Node `to_hashable` for simple hashing of nodes 345 | * Node `mock` for simple mock nodes in testing 346 | 347 | ## [0.0.5] 2016-07-01 348 | 349 | Thanks to @grray for most of these improvements! 350 | 351 | ### Changed 352 | 353 | * Switched to using the more permissive MIT license, consistent with libxml2 licensing 354 | * Fixed segfault issues with xpath contexts 355 | 356 | ### Added 357 | 358 | * Can now evaluate ```string(/foo//@bar)``` type XPath expressions, and use their result via ```.to_string()``` 359 | 360 | ## [0.0.4] 2016-04-25 361 | 362 | ### Changed 363 | 364 | * The ```Node.add_child``` method now adds a Node, while the old behavior of creating a new node with a given namespace and name is now ```Node.new_child``` 365 | 366 | ### Added 367 | 368 | * Can add following siblings via ```Node.add_next_sibling``` 369 | * Can now add text nodes via ```Node.new_text``` 370 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libxml" 3 | version = "0.3.5" 4 | edition = "2021" 5 | authors = ["Andreas Franzén ", "Deyan Ginev ","Jan Frederik Schaefer "] 6 | description = "A Rust wrapper for libxml2 - the XML C parser and toolkit developed for the Gnome project" 7 | repository = "https://github.com/KWARC/rust-libxml" 8 | documentation = "https://kwarc.github.io/rust-libxml/libxml/index.html" 9 | readme = "README.md" 10 | license = "MIT" 11 | keywords = ["xml", "libxml","xpath", "parser", "html"] 12 | build = "build.rs" 13 | exclude = [ 14 | "scripts/*" 15 | ] 16 | 17 | [lib] 18 | name = "libxml" 19 | 20 | [dependencies] 21 | libc = "0.2" 22 | 23 | [target.'cfg(all(target_family = "windows", target_env = "msvc"))'.build-dependencies] 24 | vcpkg = "0.2" 25 | 26 | [target.'cfg(all(target_family = "windows", target_env = "gnu"))'.build-dependencies] 27 | pkg-config = "0.3.2" 28 | 29 | [target.'cfg(macos)'.build-dependencies] 30 | pkg-config = "0.3.2" 31 | 32 | [target.'cfg(unix)'.build-dependencies] 33 | pkg-config = "0.3.2" 34 | 35 | [build-dependencies.bindgen] 36 | version = "0.71" 37 | features = [ 38 | "runtime", 39 | "which-rustfmt", 40 | ] 41 | default-features = false 42 | 43 | [dev-dependencies] 44 | rayon = "1.0.0" 45 | criterion = "0.6.0" 46 | 47 | [[bench]] 48 | name = "parsing_benchmarks" 49 | harness = false 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2021 Andreas Franzén, Deyan Ginev, Jan Frederik Schaefer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CI Linux](https://github.com/KWARC/rust-libxml/actions/workflows/CI.yml/badge.svg?branch=master)](https://github.com/KWARC/rust-libxml/actions/workflows/CI.yml) 2 | [![CI Windows](https://github.com/KWARC/rust-libxml/actions/workflows/windows.yml/badge.svg?branch=master)](https://github.com/KWARC/rust-libxml/actions/workflows/windows.yml) 3 | [![API Documentation](https://img.shields.io/badge/docs-API-blue.svg)](http://KWARC.github.io/rust-libxml/libxml/index.html) 4 | [![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/KWARC/rust-libxml/master/LICENSE) 5 | [![crates.io](https://img.shields.io/crates/v/libxml.svg)](https://crates.io/crates/libxml) 6 | 7 | Rust wrapper for [libxml2](http://xmlsoft.org/). 8 | 9 | The main goal of this project is to benefit from libxml2's maturity and stability while the native Rust XML crates mature to be near-drop-in replacements. 10 | 11 | As of the `0.2.0` release of the crate, there are some modest safety guarantees: 12 | 13 | * Mutability, as well as ownership - we use `Rc>` wrappers to ensure runtime safety of libxml2 operations already in the Rust layer. 14 | * Memory safety guarantees - in particular `Node` and `Document` objects have automatic bookkeeping and deallocation on drop, for leak-free wrapper use. 15 | * No thread safety - libxml2's global memory management is a challenge to adapt in a thread-safe way with minimal intervention 16 | 17 | **Coverage**: Only covers a subset of libxml2 at the moment, contributions are welcome. We try to increase support with each release. 18 | 19 | **Welcome!** With these caveats, the contributors to the project are migrating production work towards Rust and find a continuing reliance on libxml2 a helpful relief for initial ports. As such, contributions to this crate are welcome, if your workflow is not yet fully supported. 20 | 21 | ## Installation prerequisites 22 | 23 | Before performing the usual cargo build/install steps, you need to have the relevant components for using the original libxml2 code. These may become gradually outdated with time - please do let us know by opening a new issue/PR whenever that's the case. 24 | 25 | ### Linux/Debian 26 | 27 | On linux systems you'd need the development headers of libxml2 (e.g. `libxml2-dev` in Debian), as well as `pkg-config`. 28 | 29 | Starting with `0.3.4`, build requires `bindgen` (see [`bindgen` requirements](https://rust-lang.github.io/rust-bindgen/requirements.html#clang)). 30 | So you have to install CLang 9.0 or greater: 31 | 32 | - Alpine: `$ apk add clang-dev` 33 | - Arch Linux: `$ pacman -S clang` 34 | - Debian / Ubuntu: `$ apt install libclang-dev` 35 | - Fedora: `$ dnf install clang-devel` 36 | 37 | ### MacOS 38 | [Community contributed](https://github.com/KWARC/rust-libxml/issues/88#issuecomment-890876895): 39 | 40 | ``` 41 | $ brew install libxml2 # e.g. version 2.9.12 42 | $ ln -s /usr/local/Cellar/libxml2/2.9.12/lib/libxml2.2.dylib /usr/local/lib/libxml-2.0.dylib 43 | $ export LIBXML2=/usr/local/Cellar/libxml2/2.9.12/lib/pkgconfig/libxml-2.0.pc 44 | ``` 45 | 46 | ### FreeBSD 47 | [Community contributed](https://github.com/KWARC/rust-libxml/issues/130#issuecomment-1976348349) 48 | 49 | ``` 50 | $ pkg install libxml2 pkgconf 51 | ``` 52 | 53 | ### Windows 54 | 55 | #### msvc 56 | 57 | [Community contributed](https://github.com/KWARC/rust-libxml/issues/81#issuecomment-760364976): 58 | 59 | * manually install builds tools c++ and english language by visiting [BuildTools](https://visualstudio.microsoft.com/fr/thank-you-downloading-visual-studio/?sku=BuildTools&rel=16) 60 | * launch cmd prompt with admin privileges and execute these commands sequentially: 61 | ``` 62 | C:\> git clone https://github.com/microsoft/vcpkg 63 | C:\> .\vcpkg\bootstrap-vcpkg.bat 64 | C:\> setx /M PATH "%PATH%;c:\vcpkg" && setx VCPKGRS_DYNAMIC "1" /M 65 | C:\> refreshenv 66 | C:\> vcpkg install libxml2:x64-windows 67 | C:\> vcpkg integrate install 68 | ``` 69 | 70 | #### gnu 71 | 72 | On mingw64 environment you could install libxml2 with `pacman -S mingw64/mingw-w64-x86_64-libxml2`. -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # Based on the "trust" template v0.1.2 2 | # https://github.com/japaric/trust/tree/v0.1.2 3 | 4 | environment: 5 | global: 6 | RUST_VERSION: stable 7 | CRATE_NAME: rust-libxml 8 | 9 | matrix: 10 | - CHANNEL: stable 11 | ARCH: x86_64 12 | TOOLCHAIN: msvc 13 | FEATURES: vcpkg 14 | VCPKG_DEFAULT_TRIPLET: x64-windows 15 | VCPKGRS_DYNAMIC: 1 16 | TARGET: x86_64-pc-windows-msvc 17 | 18 | install: 19 | - vcpkg install libxml2 20 | - curl -sSf -o rustup-init.exe https://win.rustup.rs/ 21 | - rustup-init.exe -y --default-host %TARGET% --default-toolchain %RUST_VERSION% 22 | - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin 23 | - rustc -Vv 24 | - cargo -V 25 | 26 | test_script: 27 | - if [%APPVEYOR_REPO_TAG%]==[false] ( 28 | cargo build --target %TARGET% && 29 | cargo test --target %TARGET% 30 | ) 31 | 32 | cache: 33 | - '%USERPROFILE%\.cargo\registry' 34 | - C:\tools\vcpkg\installed 35 | - target 36 | 37 | branches: 38 | only: 39 | - master 40 | 41 | notifications: 42 | - provider: Email 43 | on_build_success: false 44 | 45 | # Building is done in the test phase, so we disable Appveyor's build phase. 46 | build: false -------------------------------------------------------------------------------- /benches/parsing_benchmarks.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use criterion::Criterion; 5 | use libxml::parser::Parser; 6 | use libxml::readonly::RoNode; 7 | use libxml::tree::{Node, NodeType}; 8 | use rayon::prelude::*; 9 | 10 | // -- workhorse functions 11 | // not *quite* classic depth-first search, since we keep all children at the current level in memory, 12 | // but certainly DFS-order for traversal 13 | 14 | fn dfs_single_classic(node: Node) -> i32 { 15 | 1 + node 16 | .get_child_nodes() 17 | .into_iter() 18 | .map(dfs_single_classic) 19 | .sum::() 20 | } 21 | 22 | fn dfs_single(node: RoNode) -> i32 { 23 | 1 + node 24 | .get_child_nodes() 25 | .into_iter() 26 | .map(dfs_single) 27 | .sum::() 28 | } 29 | 30 | fn dfs_parallel(node: RoNode) -> i32 { 31 | 1 + node 32 | .get_child_nodes() 33 | .into_par_iter() 34 | .map(dfs_parallel) 35 | .sum::() 36 | } 37 | 38 | fn dfs_single_classic_work2(node: Node) -> (i32, usize) { 39 | if node.get_type() == Some(NodeType::TextNode) { 40 | (1, node.get_content().len()) 41 | } else { 42 | node 43 | .get_child_nodes() 44 | .into_iter() 45 | .map(dfs_single_classic_work2) 46 | .fold((1, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)) 47 | } 48 | } 49 | 50 | fn dfs_single_work2(node: RoNode) -> (i32, usize) { 51 | if node.get_type() == Some(NodeType::TextNode) { 52 | (1, node.get_content().len()) 53 | } else { 54 | node 55 | .get_child_nodes() 56 | .into_iter() 57 | .map(dfs_single_work2) 58 | .fold((1, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)) 59 | } 60 | } 61 | 62 | fn dfs_parallel_work2(node: RoNode) -> (i32, usize) { 63 | if node.get_type() == Some(NodeType::TextNode) { 64 | (1, node.get_content().len()) 65 | } else { 66 | let dfs_work = node 67 | .get_child_nodes() 68 | .into_par_iter() 69 | .map(dfs_parallel_work2) 70 | .reduce(|| (0, 0), |acc, x| (acc.0 + x.0, acc.1 + x.1)); 71 | (dfs_work.0 + 1, dfs_work.1) 72 | } 73 | } 74 | 75 | // --- bencher functions 76 | // to get big.xml download, unpack and rename: 77 | // http://www.ins.cwi.nl/projects/xmark/Assets/standard.gz 78 | // or use your own XML sample 79 | fn bench_single_thread_classic(c: &mut Criterion) { 80 | let parser = Parser::default(); 81 | let doc = parser.parse_file("benches/big.xml").unwrap(); 82 | c.bench_function("single thread DFS count", move |b| { 83 | b.iter(|| { 84 | let root = doc.get_root_element().unwrap(); 85 | assert_eq!(dfs_single_classic(root), 4_690_647) 86 | }) 87 | }); 88 | } 89 | 90 | fn bench_single_thread_classic_work2(c: &mut Criterion) { 91 | let parser = Parser::default(); 92 | let doc = parser.parse_file("benches/big.xml").unwrap(); 93 | c.bench_function("single thread DFS count+length", move |b| { 94 | b.iter(|| { 95 | let root = doc.get_root_element().unwrap(); 96 | assert_eq!(dfs_single_classic_work2(root), (4_690_647, 81_286_567)) 97 | }) 98 | }); 99 | } 100 | 101 | fn bench_single_thread(c: &mut Criterion) { 102 | let parser = Parser::default(); 103 | let doc = parser.parse_file("benches/big.xml").unwrap(); 104 | c.bench_function("read-only single thread DFS count", move |b| { 105 | b.iter(|| { 106 | let root = doc.get_root_readonly().unwrap(); 107 | assert_eq!(dfs_single(root), 4_690_647) 108 | }) 109 | }); 110 | } 111 | 112 | fn bench_single_thread_work2(c: &mut Criterion) { 113 | let parser = Parser::default(); 114 | let doc = parser.parse_file("benches/big.xml").unwrap(); 115 | c.bench_function("read-only single thread DFS count+length", move |b| { 116 | b.iter(|| { 117 | let root = doc.get_root_readonly().unwrap(); 118 | assert_eq!(dfs_single_work2(root), (4_690_647, 81_286_567)) 119 | }) 120 | }); 121 | } 122 | 123 | fn bench_multi_thread(c: &mut Criterion) { 124 | let parser = Parser::default(); 125 | let doc = parser.parse_file("benches/big.xml").unwrap(); 126 | c.bench_function("read-only multi thread DFS count", move |b| { 127 | b.iter(|| { 128 | let root = doc.get_root_readonly().unwrap(); 129 | assert_eq!(dfs_parallel(root), 4_690_647); 130 | }) 131 | }); 132 | } 133 | 134 | fn bench_multi_thread_work2(c: &mut Criterion) { 135 | let parser = Parser::default(); 136 | let doc = parser.parse_file("benches/big.xml").unwrap(); 137 | c.bench_function("read-only multi thread DFS count+length", move |b| { 138 | b.iter(|| { 139 | let root = doc.get_root_readonly().unwrap(); 140 | assert_eq!(dfs_parallel_work2(root), (4_690_647, 81_286_567)) 141 | }) 142 | }); 143 | } 144 | criterion_group!( 145 | name = benches; 146 | config = Criterion::default().sample_size(10); 147 | targets = bench_single_thread_classic, bench_single_thread_classic_work2, bench_single_thread, bench_single_thread_work2, bench_multi_thread, bench_multi_thread_work2 148 | ); 149 | 150 | criterion_main!(benches); 151 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::{env, fs, path::{Path, PathBuf}}; 2 | 3 | struct ProbedLib { 4 | version: String, 5 | include_paths: Vec, 6 | } 7 | 8 | /// Finds libxml2 and optionally return a list of header 9 | /// files from which the bindings can be generated. 10 | fn find_libxml2() -> Option { 11 | #![allow(unreachable_code)] // for platform-dependent dead code 12 | 13 | if let Ok(ref s) = std::env::var("LIBXML2") { 14 | // println!("{:?}", std::env::vars()); 15 | // panic!("set libxml2."); 16 | let p = std::path::Path::new(s); 17 | let fname = std::path::Path::new( 18 | p.file_name() 19 | .unwrap_or_else(|| panic!("no file name in LIBXML2 env ({s})")), 20 | ); 21 | assert!( 22 | p.is_file(), 23 | "{}", 24 | &format!("not a file in LIBXML2 env ({s})") 25 | ); 26 | println!( 27 | "cargo:rustc-link-lib={}", 28 | fname 29 | .file_stem() 30 | .unwrap() 31 | .to_string_lossy() 32 | .strip_prefix("lib") 33 | .unwrap() 34 | ); 35 | println!( 36 | "cargo:rustc-link-search={}", 37 | p.parent() 38 | .expect("no library path in LIBXML2 env") 39 | .to_string_lossy() 40 | ); 41 | None 42 | } else { 43 | #[cfg(any(target_family = "unix", target_os = "macos", all(target_family="windows", target_env="gnu")))] 44 | { 45 | let lib = pkg_config::Config::new() 46 | .probe("libxml-2.0") 47 | .expect("Couldn't find libxml2 via pkg-config"); 48 | return Some(ProbedLib { 49 | include_paths: lib.include_paths, 50 | version: lib.version, 51 | }) 52 | } 53 | 54 | #[cfg(all(target_family = "windows", target_env = "msvc"))] 55 | { 56 | if let Some(meta) = vcpkg_dep::vcpkg_find_libxml2() { 57 | return Some(meta); 58 | } else { 59 | eprintln!("vcpkg did not succeed in finding libxml2."); 60 | } 61 | } 62 | 63 | panic!("Could not find libxml2.") 64 | } 65 | } 66 | 67 | fn generate_bindings(header_dirs: Vec, output_path: &Path) { 68 | let bindings = bindgen::Builder::default() 69 | .header("src/wrapper.h") 70 | .opaque_type("max_align_t") 71 | // invalidate build as soon as the wrapper changes 72 | .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) 73 | .layout_tests(true) 74 | .clang_args(&["-DPKG-CONFIG"]) 75 | .clang_args( 76 | header_dirs.iter() 77 | .map(|dir| format!("-I{}", dir.display())) 78 | ); 79 | bindings 80 | .generate() 81 | .expect("failed to generate bindings with bindgen") 82 | .write_to_file(output_path) 83 | .expect("Failed to write bindings.rs"); 84 | } 85 | 86 | fn main() { 87 | let bindings_path = PathBuf::from(env::var_os("OUT_DIR").unwrap()).join("bindings.rs"); 88 | // declare availability of config variable (without setting it) 89 | println!("cargo::rustc-check-cfg=cfg(libxml_older_than_2_12)"); 90 | 91 | if let Some(probed_lib) = find_libxml2() { 92 | // if we could find header files, generate fresh bindings from them 93 | generate_bindings(probed_lib.include_paths, &bindings_path); 94 | // and expose the libxml2 version to the code 95 | let version_parts: Vec = probed_lib.version.split('.') 96 | .map(|part| part.parse::().unwrap_or(-1)).collect(); 97 | let older_than_2_12 = version_parts.len() > 1 && (version_parts[0] < 2 || 98 | version_parts[0] == 2 && version_parts[1] < 12); 99 | println!("cargo::rustc-check-cfg=cfg(libxml_older_than_2_12)"); 100 | if older_than_2_12 { 101 | println!("cargo::rustc-cfg=libxml_older_than_2_12"); 102 | } 103 | } else { 104 | // otherwise, use the default bindings on platforms where pkg-config isn't available 105 | fs::copy(PathBuf::from("src/default_bindings.rs"), bindings_path) 106 | .expect("Failed to copy the default bindings to the build directory"); 107 | // for now, assume that the library is older than 2.12, because that's what those bindings are computed with 108 | println!("cargo::rustc-cfg=libxml_older_than_2_12"); 109 | } 110 | } 111 | 112 | #[cfg(all(target_family = "windows", target_env = "msvc"))] 113 | mod vcpkg_dep { 114 | use crate::ProbedLib; 115 | pub fn vcpkg_find_libxml2() -> Option { 116 | if let Ok(metadata) = vcpkg::Config::new() 117 | .find_package("libxml2") { 118 | Some(ProbedLib { version: vcpkg_version(), include_paths: metadata.include_paths }) 119 | } else { 120 | None 121 | } 122 | } 123 | 124 | fn vcpkg_version() -> String { 125 | // What is the best way to obtain the version on Windows *before* bindgen runs? 126 | // here we attempt asking the shell for "vcpkg list libxml2" 127 | let mut vcpkg_exe = vcpkg::find_vcpkg_root(&vcpkg::Config::new()).unwrap(); 128 | vcpkg_exe.push("vcpkg.exe"); 129 | let vcpkg_list_libxml2 = std::process::Command::new(vcpkg_exe) 130 | .args(["list","libxml2"]) 131 | .output() 132 | .expect("vcpkg.exe failed to execute in vcpkg_dep build step"); 133 | if vcpkg_list_libxml2.status.success() { 134 | let libxml2_list_str = String::from_utf8_lossy(&vcpkg_list_libxml2.stdout); 135 | for line in libxml2_list_str.lines() { 136 | if line.starts_with("libxml2:") { 137 | let mut version_piece = line.split("2."); 138 | version_piece.next(); 139 | if let Some(version_tail) = version_piece.next() { 140 | if let Some(version) = version_tail.split(' ').next() 141 | .unwrap().split('#').next() { 142 | return format!("2.{version}"); 143 | } 144 | } 145 | } 146 | } 147 | } 148 | // default to a recent libxml2 from Windows 10 149 | // (or should this panic?) 150 | String::from("2.13.5") 151 | } 152 | } -------------------------------------------------------------------------------- /examples/schema_example.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Example Usage of XSD Schema Validation 3 | //! 4 | use libxml::schemas::SchemaParserContext; 5 | use libxml::schemas::SchemaValidationContext; 6 | 7 | use libxml::parser::Parser; 8 | 9 | fn main() { 10 | let xml = Parser::default() 11 | .parse_file("tests/resources/schema.xml") 12 | .expect("Expected to be able to parse XML Document from file"); 13 | 14 | let mut xsdparser = SchemaParserContext::from_file("tests/resources/schema.xsd"); 15 | let xsd = SchemaValidationContext::from_parser(&mut xsdparser); 16 | 17 | if let Err(errors) = xsd { 18 | for err in &errors { 19 | println!("{}", err.message.as_ref().unwrap()); 20 | } 21 | 22 | panic!("Failed to parse schema"); 23 | } 24 | 25 | let mut xsd = xsd.unwrap(); 26 | 27 | if let Err(errors) = xsd.validate_document(&xml) { 28 | for err in &errors { 29 | println!("{}", err.message.as_ref().unwrap()); 30 | } 31 | 32 | panic!("Invalid XML accoding to XSD schema"); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /examples/tree_example.rs: -------------------------------------------------------------------------------- 1 | use libxml::parser::Parser; 2 | use libxml::tree::*; 3 | 4 | fn my_recurse(node: &Node) { 5 | match node.get_type().unwrap() { 6 | NodeType::ElementNode => { 7 | println!("Entering {}", node.get_name()); 8 | } 9 | NodeType::TextNode => { 10 | println!("Text: {}", node.get_content()); 11 | } 12 | _ => {} 13 | } 14 | 15 | let mut c: Option = node.get_first_child(); 16 | while let Some(child) = c { 17 | my_recurse(&child); 18 | c = child.get_next_sibling(); 19 | } 20 | 21 | if node.get_type().unwrap() == NodeType::ElementNode { 22 | println!("Leaving {}", node.get_name()); 23 | } 24 | } 25 | 26 | fn main() { 27 | let parser = Parser::default(); 28 | let doc = parser.parse_file("tests/resources/file01.xml").unwrap(); 29 | let root = doc.get_root_element().unwrap(); 30 | my_recurse(&root); 31 | } 32 | -------------------------------------------------------------------------------- /examples/xpath_example.rs: -------------------------------------------------------------------------------- 1 | use libxml::parser::Parser; 2 | use libxml::xpath::Context; 3 | 4 | fn main() { 5 | let parser = Parser::default(); 6 | let doc = parser.parse_file("tests/resources/file01.xml").unwrap(); 7 | let context = Context::new(&doc).unwrap(); 8 | let result = context.evaluate("//child/text()").unwrap(); 9 | 10 | for node in &result.get_nodes_as_vec() { 11 | println!("Found: {}", node.get_content()); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /scripts/doc-upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # thanks to the original at: https://github.com/shepmaster/sxd-document/blob/master/deploy_gh_pages.sh 3 | set -eux 4 | 5 | if [[ "${TRAVIS_BRANCH}" != 'master' ]] || [[ "${TRAVIS_PULL_REQUEST}" = 'true' ]] || [[ "${TRAVIS_RUST_VERSION}" != 'stable' ]]; then 6 | exit 0 7 | fi 8 | 9 | cargo doc 10 | 11 | # Add an automatic redirect 12 | repo_name=$(echo "${TRAVIS_REPO_SLUG}" | cut -d '/' -f 2 | sed 's/-/_/') 13 | echo "" > target/doc/index.html 14 | 15 | rm -rf generated-documentation 16 | mv target/doc generated-documentation 17 | 18 | cd generated-documentation 19 | 20 | git init 21 | git config user.name "Travis-CI" 22 | git config user.email "builder@travis" 23 | 24 | git add . 25 | git commit -m "Deployed to Github Pages" 26 | 27 | set +x # Never print the token! 28 | git push --force --quiet "https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}" master:gh-pages -------------------------------------------------------------------------------- /src/bindings.rs: -------------------------------------------------------------------------------- 1 | // Issues coming from bindgen 2 | #![allow(non_upper_case_globals)] 3 | #![allow(non_camel_case_types)] 4 | #![allow(non_snake_case)] 5 | #![allow(dead_code)] 6 | #![allow(improper_ctypes)] 7 | #![allow(missing_docs)] 8 | 9 | /* 10 | * helper var until we figure out well-formedness checks 11 | */ 12 | 13 | pub static mut HACKY_WELL_FORMED: bool = false; 14 | 15 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 16 | -------------------------------------------------------------------------------- /src/c_helpers.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_camel_case_types)] 2 | #![allow(non_snake_case)] 3 | 4 | use crate::bindings::*; 5 | use libc::{c_char, c_int, size_t}; 6 | use std::os::raw::c_void; 7 | use std::ptr; 8 | use std::slice; 9 | // error handling functions 10 | // pub fn xmlSetGenericErrorFunc(ctx: *mut c_void, handler: *mut c_void); 11 | // pub fn xmlThrDefSetGenericErrorFunc(ctx: *mut c_void, handler: *mut c_void); 12 | 13 | // Taken from Nokogiri (https://github.com/sparklemotion/nokogiri/blob/24bb843327306d2d71e4b2dc337c1e327cbf4516/ext/nokogiri/xml_document.c#L64) 14 | pub fn xmlNodeRecursivelyRemoveNs(node: xmlNodePtr) { 15 | unsafe { 16 | let mut property: xmlAttrPtr; 17 | 18 | xmlSetNs(node, ptr::null_mut()); 19 | let mut child: xmlNodePtr = (*node).children; 20 | while !child.is_null() { 21 | xmlNodeRecursivelyRemoveNs(child); 22 | child = (*child).next; 23 | } 24 | 25 | if (((*node).type_ == xmlElementType_XML_ELEMENT_NODE) 26 | || ((*node).type_ == xmlElementType_XML_XINCLUDE_START) 27 | || ((*node).type_ == xmlElementType_XML_XINCLUDE_END)) 28 | && !(*node).nsDef.is_null() 29 | { 30 | xmlFreeNsList((*node).nsDef); 31 | (*node).nsDef = ptr::null_mut(); 32 | } 33 | 34 | if (*node).type_ == xmlElementType_XML_ELEMENT_NODE && !(*node).properties.is_null() { 35 | property = (*node).properties; 36 | while !property.is_null() { 37 | if !(*property).ns.is_null() { 38 | (*property).ns = ptr::null_mut(); 39 | } 40 | property = (*property).next; 41 | } 42 | } 43 | } 44 | } 45 | pub fn xmlGetDoc(cur: xmlNodePtr) -> xmlDocPtr { 46 | unsafe { (*cur).doc } 47 | } 48 | pub fn xmlNextNsSibling(ns: xmlNsPtr) -> xmlNsPtr { 49 | unsafe { (*ns).next } 50 | } 51 | pub fn xmlNsPrefix(ns: xmlNsPtr) -> *const c_char { 52 | unsafe { (*ns).prefix as *const c_char } 53 | } 54 | pub fn xmlNsHref(ns: xmlNsPtr) -> *const c_char { 55 | unsafe { (*ns).href as *const c_char } 56 | } 57 | pub fn xmlNodeNsDeclarations(cur: xmlNodePtr) -> xmlNsPtr { 58 | unsafe { (*cur).nsDef } 59 | } 60 | pub fn xmlNodeNs(cur: xmlNodePtr) -> xmlNsPtr { 61 | unsafe { (*cur).ns } 62 | } 63 | 64 | pub fn xmlNextPropertySibling(attr: xmlAttrPtr) -> xmlAttrPtr { 65 | unsafe { (*attr).next } 66 | } 67 | pub fn xmlAttrName(attr: xmlAttrPtr) -> *const c_char { 68 | unsafe { (*attr).name as *const c_char } 69 | } 70 | pub fn xmlAttrNs(attr: xmlAttrPtr) -> xmlNsPtr { 71 | unsafe { (*attr).ns } 72 | } 73 | pub fn xmlGetFirstProperty(node: xmlNodePtr) -> xmlAttrPtr { 74 | unsafe { (*node).properties } 75 | } 76 | pub fn xmlGetNodeType(cur: xmlNodePtr) -> xmlElementType { 77 | unsafe { (*cur).type_ } 78 | } 79 | 80 | pub fn xmlGetParent(cur: xmlNodePtr) -> xmlNodePtr { 81 | unsafe { (*cur).parent } 82 | } 83 | pub fn xmlGetFirstChild(cur: xmlNodePtr) -> xmlNodePtr { 84 | unsafe { (*cur).children } 85 | } 86 | pub fn xmlPrevSibling(cur: xmlNodePtr) -> xmlNodePtr { 87 | unsafe { (*cur).prev } 88 | } 89 | 90 | // helper for tree 91 | pub fn xmlNextSibling(cur: xmlNodePtr) -> xmlNodePtr { 92 | unsafe { (*cur).next } 93 | } 94 | 95 | pub fn xmlNodeGetName(cur: xmlNodePtr) -> *const c_char { 96 | unsafe { (*cur).name as *const c_char } 97 | } 98 | 99 | // dummy function: no debug output at all 100 | #[cfg(libxml_older_than_2_12)] 101 | unsafe extern "C" fn _ignoreInvalidTagsErrorFunc(_user_data: *mut c_void, error: xmlErrorPtr) { 102 | unsafe { 103 | if !error.is_null() && (*error).code as xmlParserErrors == xmlParserErrors_XML_HTML_UNKNOWN_TAG { 104 | // do not record invalid, in fact (out of despair) claim we ARE well-formed, when a tag is invalid. 105 | HACKY_WELL_FORMED = true; 106 | } 107 | } 108 | } 109 | #[cfg(not(libxml_older_than_2_12))] 110 | unsafe extern "C" fn _ignoreInvalidTagsErrorFunc(_user_data: *mut c_void, error: *const xmlError) { 111 | unsafe { 112 | if !error.is_null() && (*error).code as xmlParserErrors == xmlParserErrors_XML_HTML_UNKNOWN_TAG { 113 | // do not record invalid, in fact (out of despair) claim we ARE well-formed, when a tag is invalid. 114 | HACKY_WELL_FORMED = true; 115 | } 116 | } 117 | } 118 | 119 | pub fn setWellFormednessHandler(ctxt: *mut xmlParserCtxt) { 120 | unsafe { 121 | HACKY_WELL_FORMED = false; 122 | xmlSetStructuredErrorFunc(ctxt as *mut c_void, Some(_ignoreInvalidTagsErrorFunc)); 123 | } 124 | } 125 | // helper for parser 126 | pub fn htmlWellFormed(ctxt: *mut xmlParserCtxt) -> bool { 127 | unsafe { (!ctxt.is_null() && (*ctxt).wellFormed > 0) || HACKY_WELL_FORMED } 128 | } 129 | 130 | // helper for xpath 131 | pub fn xmlXPathObjectNumberOfNodes(val: xmlXPathObjectPtr) -> c_int { 132 | unsafe { 133 | if val.is_null() { 134 | -1 135 | } else if (*val).nodesetval.is_null() { 136 | -2 137 | } else { 138 | (*(*val).nodesetval).nodeNr 139 | } 140 | } 141 | } 142 | 143 | pub fn xmlXPathObjectGetNodes(val: xmlXPathObjectPtr, size: size_t) -> Vec { 144 | unsafe { slice::from_raw_parts((*(*val).nodesetval).nodeTab, size).to_vec() } 145 | } 146 | 147 | #[cfg(any(target_family = "unix", target_os = "macos", all(target_family="windows", target_env="gnu")))] 148 | pub fn bindgenFree(val: *mut c_void) { 149 | unsafe { 150 | if let Some(xml_free_fn) = xmlFree { 151 | xml_free_fn(val); 152 | } else { 153 | libc::free(val); 154 | } 155 | } 156 | } 157 | #[cfg(all(target_family="windows", target_env="msvc"))] 158 | pub fn bindgenFree(val: *mut c_void) { 159 | unsafe { libc::free(val as *mut c_void); } 160 | } -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Wrapper for xmlError 3 | //! 4 | use super::bindings; 5 | 6 | use std::ffi::{c_char, c_int, CStr}; 7 | 8 | /// Rust enum variant of libxml2's xmlErrorLevel 9 | #[derive(Debug)] 10 | pub enum XmlErrorLevel { 11 | /// No error 12 | None, 13 | /// A simple warning 14 | Warning, 15 | /// A recoverable error 16 | Error, 17 | /// A fatal error 18 | Fatal, 19 | } 20 | 21 | impl XmlErrorLevel { 22 | /// Convert an xmlErrorLevel provided by libxml2 (as an integer) into a Rust enum 23 | pub fn from_raw(error_level: bindings::xmlErrorLevel) -> XmlErrorLevel { 24 | match error_level { 25 | bindings::xmlErrorLevel_XML_ERR_NONE => XmlErrorLevel::None, 26 | bindings::xmlErrorLevel_XML_ERR_WARNING => XmlErrorLevel::Warning, 27 | bindings::xmlErrorLevel_XML_ERR_ERROR => XmlErrorLevel::Error, 28 | bindings::xmlErrorLevel_XML_ERR_FATAL => XmlErrorLevel::Fatal, 29 | _ => unreachable!("Should never receive an error level not in the range 0..=3"), 30 | } 31 | } 32 | } 33 | 34 | /// Wrapper around xmlErrorPtr. 35 | /// Some fields have been omitted for simplicity/safety 36 | #[derive(Debug)] 37 | pub struct StructuredError { 38 | /// Human-friendly error message, lossily converted into UTF-8 from the underlying 39 | /// C string. May be `None` if an error message is not provided by libxml2. 40 | pub message: Option, 41 | /// The error's level 42 | pub level: XmlErrorLevel, 43 | /// The filename, lossily converted into UTF-8 from the underlying C string. 44 | /// May be `None` if a filename is not provided by libxml2, such as when validating 45 | /// an XML document stored entirely in memory. 46 | pub filename: Option, 47 | /// The linenumber, or None if not applicable. 48 | pub line: Option, 49 | /// The column where the error is present, or None if not applicable. 50 | pub col: Option, 51 | 52 | /// The module that the error came from. See libxml's xmlErrorDomain enum. 53 | pub domain: c_int, 54 | /// The variety of error. See libxml's xmlParserErrors enum. 55 | pub code: c_int, 56 | } 57 | 58 | impl StructuredError { 59 | /// Copies the error information stored at `error_ptr` into a new `StructuredError` 60 | /// 61 | /// # Safety 62 | /// This function must be given a pointer to a valid `xmlError` struct. Typically, you 63 | /// will acquire such a pointer by implementing one of a number of callbacks 64 | /// defined in libXml which are provided an `xmlError` as an argument. 65 | /// 66 | /// This function copies data from the memory `error_ptr` but does not deallocate 67 | /// the error. Depending on the context in which this function is used, you may 68 | /// need to take additional steps to avoid a memory leak. 69 | pub unsafe fn from_raw(error_ptr: *const bindings::xmlError) -> Self { 70 | let error = *error_ptr; 71 | let message = StructuredError::ptr_to_string(error.message); 72 | let level = XmlErrorLevel::from_raw(error.level); 73 | let filename = StructuredError::ptr_to_string(error.file); 74 | 75 | let line = if error.line == 0 { 76 | None 77 | } else { 78 | Some(error.line) 79 | }; 80 | let col = if error.int2 == 0 { 81 | None 82 | } else { 83 | Some(error.int2) 84 | }; 85 | 86 | StructuredError { 87 | message, 88 | level, 89 | filename, 90 | line, 91 | col, 92 | domain: error.domain, 93 | code: error.code, 94 | } 95 | } 96 | 97 | /// Human-readable informative error message. 98 | /// 99 | /// This function is a hold-over from the original bindings to libxml's error 100 | /// reporting mechanism. Instead of calling this method, you can access the 101 | /// StructuredError `message` field directly. 102 | #[deprecated(since="0.3.3", note="Please use the `message` field directly instead.")] 103 | pub fn message(&self) -> &str { 104 | self.message.as_deref().unwrap_or("") 105 | } 106 | 107 | /// Returns the provided c_str as Some(String), or None if the provided pointer is null. 108 | fn ptr_to_string(c_str: *mut c_char) -> Option { 109 | if c_str.is_null() { 110 | return None; 111 | } 112 | 113 | let raw_str = unsafe { CStr::from_ptr(c_str) }; 114 | Some(String::from_utf8_lossy(raw_str.to_bytes()).to_string()) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # A wrapper for libxml2 2 | //! This library provides an interface to a subset of the libxml API. 3 | //! The idea is to extend it whenever more functionality is needed. 4 | //! Providing a more or less complete wrapper would be too much work. 5 | #![deny(missing_docs)] 6 | // Our new methods return Result types 7 | #![allow(clippy::new_ret_no_self, clippy::result_unit_err)] 8 | /// Bindings to the C interface 9 | pub mod bindings; 10 | mod c_helpers; 11 | 12 | /// XML and HTML parsing 13 | pub mod parser; 14 | 15 | /// Manipulations on the DOM representation 16 | pub mod tree; 17 | 18 | /// XML Global Error Structures and Handling 19 | pub mod error; 20 | 21 | /// `XPath` module for global lookup in the DOM 22 | pub mod xpath; 23 | 24 | /// Schema Validation 25 | pub mod schemas; 26 | 27 | /// Read-only parallel primitives 28 | pub mod readonly; 29 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | //! The parser functionality 2 | 3 | use crate::bindings::*; 4 | use crate::c_helpers::*; 5 | use crate::tree::*; 6 | 7 | use std::convert::AsRef; 8 | use std::error::Error; 9 | use std::ffi::c_void; 10 | use std::ffi::{CStr, CString}; 11 | use std::fmt; 12 | use std::fs; 13 | use std::io; 14 | use std::os::raw::{c_char, c_int}; 15 | use std::ptr; 16 | use std::slice; 17 | use std::str; 18 | use std::sync::Once; 19 | 20 | static INIT_LIBXML_PARSER: Once = Once::new(); 21 | 22 | enum XmlParserOption { 23 | Recover = 1, 24 | Nodefdtd = 4, 25 | Noerror = 32, 26 | Nowarning = 64, 27 | Pedantic = 128, 28 | Noblanks = 256, 29 | Nonet = 2048, 30 | Noimplied = 8192, 31 | Compact = 65_536, 32 | Huge = 524_288, 33 | Ignoreenc = 2_097_152, 34 | } 35 | 36 | enum HtmlParserOption { 37 | Recover = 1, 38 | Nodefdtd = 4, 39 | Noerror = 32, 40 | Nowarning = 64, 41 | Pedantic = 128, 42 | Noblanks = 256, 43 | Nonet = 2048, 44 | Noimplied = 8192, 45 | Huge = 524_288, 46 | Compact = 65_536, 47 | Ignoreenc = 2_097_152, 48 | } 49 | 50 | /// Parser Options 51 | pub struct ParserOptions<'a> { 52 | /// Relaxed parsing 53 | pub recover: bool, 54 | /// do not default a doctype if not found 55 | pub no_def_dtd: bool, 56 | /// do not default a doctype if not found 57 | pub no_error: bool, 58 | /// suppress warning reports 59 | pub no_warning: bool, 60 | /// pedantic error reporting 61 | pub pedantic: bool, 62 | /// remove blank nodes 63 | pub no_blanks: bool, 64 | /// Forbid network access 65 | pub no_net: bool, 66 | /// Do not add implied html/body... elements 67 | pub no_implied: bool, 68 | /// relax any hardcoded limit from the parser 69 | pub huge: bool, 70 | /// compact small text nodes 71 | pub compact: bool, 72 | /// ignore internal document encoding hint 73 | pub ignore_enc: bool, 74 | /// manually-specified encoding 75 | pub encoding: Option<&'a str>, 76 | } 77 | 78 | impl ParserOptions<'_> { 79 | pub(crate) fn to_flags(&self, format: &ParseFormat) -> i32 { 80 | macro_rules! to_option_flag { 81 | ( 82 | $condition:expr => $variant:ident 83 | ) => { 84 | if $condition { 85 | match format { 86 | ParseFormat::HTML => HtmlParserOption::$variant as i32, 87 | ParseFormat::XML => XmlParserOption::$variant as i32, 88 | } 89 | } else { 90 | 0 91 | } 92 | }; 93 | } 94 | // return the combined flags 95 | to_option_flag!(self.recover => Recover) 96 | + to_option_flag!(self.no_def_dtd => Nodefdtd) 97 | + to_option_flag!(self.no_error => Noerror) 98 | + to_option_flag!(self.no_warning => Nowarning) 99 | + to_option_flag!(self.pedantic => Pedantic) 100 | + to_option_flag!(self.no_blanks => Noblanks) 101 | + to_option_flag!(self.no_net => Nonet) 102 | + to_option_flag!(self.no_implied => Noimplied) 103 | + to_option_flag!(self.huge => Huge) 104 | + to_option_flag!(self.compact => Compact) 105 | + to_option_flag!(self.ignore_enc => Ignoreenc) 106 | } 107 | } 108 | 109 | impl Default for ParserOptions<'_> { 110 | fn default() -> Self { 111 | ParserOptions { 112 | recover: true, 113 | no_def_dtd: false, 114 | no_error: true, 115 | no_warning: true, 116 | pedantic: false, 117 | no_blanks: false, 118 | no_net: false, 119 | no_implied: false, 120 | huge: false, 121 | compact: false, 122 | ignore_enc: false, 123 | encoding: None, 124 | } 125 | } 126 | } 127 | 128 | ///Parser Errors 129 | pub enum XmlParseError { 130 | ///Parsing returned a null pointer as document pointer 131 | GotNullPointer, 132 | ///Could not open file error. 133 | FileOpenError, 134 | ///Document too large for libxml2. 135 | DocumentTooLarge, 136 | } 137 | 138 | impl Error for XmlParseError {} 139 | 140 | impl fmt::Debug for XmlParseError { 141 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 142 | write!(f, "{self}") 143 | } 144 | } 145 | 146 | impl fmt::Display for XmlParseError { 147 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 148 | write!( 149 | f, 150 | "{}", 151 | match self { 152 | XmlParseError::GotNullPointer => "Got a Null pointer", 153 | XmlParseError::FileOpenError => "Unable to open path to file.", 154 | XmlParseError::DocumentTooLarge => "Document too large for i32.", 155 | } 156 | ) 157 | } 158 | } 159 | 160 | /// Default encoding when not provided. 161 | const DEFAULT_ENCODING: *const c_char = ptr::null(); 162 | 163 | /// Default URL when not provided. 164 | const DEFAULT_URL: *const c_char = ptr::null(); 165 | 166 | /// Open file function. 167 | fn xml_open(filename: &str) -> io::Result<*mut c_void> { 168 | let ptr = Box::into_raw(Box::new(fs::File::open(filename)?)); 169 | Ok(ptr as *mut c_void) 170 | } 171 | 172 | /// Read callback for an FS file. 173 | unsafe extern "C" fn xml_read(context: *mut c_void, buffer: *mut c_char, len: c_int) -> c_int { 174 | // Len is always positive, typically 40-4000 bytes. 175 | let file = context as *mut fs::File; 176 | let buf = slice::from_raw_parts_mut(buffer as *mut u8, len as usize); 177 | match io::Read::read(&mut *file, buf) { 178 | Ok(v) => v as c_int, 179 | Err(_) => -1, 180 | } 181 | } 182 | 183 | type XmlReadCallback = unsafe extern "C" fn(*mut c_void, *mut c_char, c_int) -> c_int; 184 | 185 | /// Close callback for an FS file. 186 | unsafe extern "C" fn xml_close(context: *mut c_void) -> c_int { 187 | // Take rust ownership of the context and then drop it. 188 | let file = context as *mut fs::File; 189 | let _ = Box::from_raw(file); 190 | 0 191 | } 192 | 193 | type XmlCloseCallback = unsafe extern "C" fn(*mut c_void) -> c_int; 194 | 195 | ///Convert usize to i32 safely. 196 | fn try_usize_to_i32(value: usize) -> Result { 197 | if cfg!(target_pointer_width = "16") || (value < i32::MAX as usize) { 198 | // Cannot safely use our value comparison, but the conversion if always safe. 199 | // Or, if the value can be safely represented as a 32-bit signed integer. 200 | Ok(value as i32) 201 | } else { 202 | // Document too large, cannot parse using libxml2. 203 | Err(XmlParseError::DocumentTooLarge) 204 | } 205 | } 206 | 207 | #[derive(Debug, PartialEq, Eq)] 208 | /// Enum for the parse formats supported by libxml2 209 | pub enum ParseFormat { 210 | /// Strict parsing for XML 211 | XML, 212 | /// Relaxed parsing for HTML 213 | HTML, 214 | } 215 | /// Parsing API wrapper for libxml2 216 | pub struct Parser { 217 | /// The `ParseFormat` for this parser 218 | pub format: ParseFormat, 219 | } 220 | impl Default for Parser { 221 | /// Create a parser for XML documents 222 | fn default() -> Self { 223 | // avoid deadlocks from using multiple parsers 224 | INIT_LIBXML_PARSER.call_once(|| unsafe { 225 | crate::bindings::xmlInitParser(); 226 | }); 227 | Parser { 228 | format: ParseFormat::XML, 229 | } 230 | } 231 | } 232 | impl Parser { 233 | /// Create a parser for HTML documents 234 | pub fn default_html() -> Self { 235 | // avoid deadlocks from using multiple parsers 236 | INIT_LIBXML_PARSER.call_once(|| unsafe { 237 | crate::bindings::xmlInitParser(); 238 | }); 239 | Parser { 240 | format: ParseFormat::HTML, 241 | } 242 | } 243 | 244 | /// Parses the XML/HTML file `filename` to generate a new `Document` 245 | pub fn parse_file(&self, filename: &str) -> Result { 246 | self.parse_file_with_options(filename, ParserOptions::default()) 247 | } 248 | 249 | /// Parses the XML/HTML file `filename` with a manually-specified parser-options 250 | /// to generate a new `Document` 251 | pub fn parse_file_with_options( 252 | &self, 253 | filename: &str, 254 | parser_options: ParserOptions, 255 | ) -> Result { 256 | // Create extern C callbacks for to read and close a Rust file through 257 | // a void pointer. 258 | let ioread: Option = Some(xml_read); 259 | let ioclose: Option = Some(xml_close); 260 | let ioctx = match xml_open(filename) { 261 | Ok(v) => v, 262 | Err(_) => return Err(XmlParseError::FileOpenError), 263 | }; 264 | 265 | // Process encoding. 266 | let encoding_cstring: Option = 267 | parser_options.encoding.map(|v| CString::new(v).unwrap()); 268 | let encoding_ptr = match encoding_cstring { 269 | Some(v) => v.as_ptr(), 270 | None => DEFAULT_ENCODING, 271 | }; 272 | 273 | // Process url. 274 | let url_ptr = DEFAULT_URL; 275 | 276 | unsafe { 277 | xmlKeepBlanksDefault(1); 278 | } 279 | 280 | let options = parser_options.to_flags(&self.format); 281 | 282 | match self.format { 283 | ParseFormat::XML => unsafe { 284 | let doc_ptr = xmlReadIO(ioread, ioclose, ioctx, url_ptr, encoding_ptr, options); 285 | if doc_ptr.is_null() { 286 | Err(XmlParseError::GotNullPointer) 287 | } else { 288 | Ok(Document::new_ptr(doc_ptr)) 289 | } 290 | }, 291 | ParseFormat::HTML => unsafe { 292 | let doc_ptr = htmlReadIO(ioread, ioclose, ioctx, url_ptr, encoding_ptr, options); 293 | if doc_ptr.is_null() { 294 | Err(XmlParseError::GotNullPointer) 295 | } else { 296 | Ok(Document::new_ptr(doc_ptr)) 297 | } 298 | }, 299 | } 300 | } 301 | 302 | ///Parses the XML/HTML bytes `input` to generate a new `Document` 303 | pub fn parse_string>(&self, input: Bytes) -> Result { 304 | self.parse_string_with_options(input, ParserOptions::default()) 305 | } 306 | 307 | ///Parses the XML/HTML bytes `input` with a manually-specified 308 | ///parser-options to generate a new `Document` 309 | pub fn parse_string_with_options>( 310 | &self, 311 | input: Bytes, 312 | parser_options: ParserOptions, 313 | ) -> Result { 314 | // Process input bytes. 315 | let input_bytes = input.as_ref(); 316 | let input_ptr = input_bytes.as_ptr() as *const c_char; 317 | let input_len = try_usize_to_i32(input_bytes.len())?; 318 | 319 | // Process encoding. 320 | let encoding_cstring: Option = 321 | parser_options.encoding.map(|v| CString::new(v).unwrap()); 322 | let encoding_ptr = match encoding_cstring { 323 | Some(v) => v.as_ptr(), 324 | None => DEFAULT_ENCODING, 325 | }; 326 | 327 | // Process url. 328 | let url_ptr = DEFAULT_URL; 329 | 330 | let options = parser_options.to_flags(&self.format); 331 | 332 | match self.format { 333 | ParseFormat::XML => unsafe { 334 | let docptr = xmlReadMemory(input_ptr, input_len, url_ptr, encoding_ptr, options); 335 | if docptr.is_null() { 336 | Err(XmlParseError::GotNullPointer) 337 | } else { 338 | Ok(Document::new_ptr(docptr)) 339 | } 340 | }, 341 | ParseFormat::HTML => unsafe { 342 | let docptr = htmlReadMemory(input_ptr, input_len, url_ptr, encoding_ptr, options); 343 | if docptr.is_null() { 344 | Err(XmlParseError::GotNullPointer) 345 | } else { 346 | Ok(Document::new_ptr(docptr)) 347 | } 348 | }, 349 | } 350 | } 351 | 352 | /// Checks a string for well-formedness. 353 | pub fn is_well_formed_html>(&self, input: Bytes) -> bool { 354 | self.is_well_formed_html_with_encoding(input, None) 355 | } 356 | 357 | /// Checks a string for well-formedness with manually-specified encoding. 358 | /// IMPORTANT: This function is currently implemented in a HACKY way, to ignore invalid errors for HTML5 elements (such as ) 359 | /// this means you should NEVER USE IT WHILE THREADING, it is CERTAIN TO BREAK 360 | /// 361 | /// Help is welcome in implementing it correctly. 362 | pub fn is_well_formed_html_with_encoding>( 363 | &self, 364 | input: Bytes, 365 | encoding: Option<&str>, 366 | ) -> bool { 367 | // Process input string. 368 | let input_bytes = input.as_ref(); 369 | if input_bytes.is_empty() { 370 | return false; 371 | } 372 | let input_ptr = input_bytes.as_ptr() as *const c_char; 373 | let input_len = match try_usize_to_i32(input_bytes.len()) { 374 | Ok(v) => v, 375 | Err(_) => return false, 376 | }; 377 | 378 | // Process encoding. 379 | let encoding_cstring: Option = encoding.map(|v| CString::new(v).unwrap()); 380 | let encoding_ptr = match encoding_cstring { 381 | Some(v) => v.as_ptr(), 382 | None => DEFAULT_ENCODING, 383 | }; 384 | 385 | // Process url. 386 | let url_ptr = DEFAULT_URL; 387 | // disable generic error lines from libxml2 388 | match self.format { 389 | ParseFormat::XML => false, // TODO: Add support for XML at some point 390 | ParseFormat::HTML => unsafe { 391 | let ctxt = htmlNewParserCtxt(); 392 | setWellFormednessHandler(ctxt); 393 | let docptr = htmlCtxtReadMemory(ctxt, input_ptr, input_len, url_ptr, encoding_ptr, 10_596); // htmlParserOption = 4+32+64+256+2048+8192 394 | let well_formed_final = if htmlWellFormed(ctxt) { 395 | // Basic well-formedness passes, let's check if we have an element as root too 396 | if !docptr.is_null() { 397 | let node_ptr = xmlDocGetRootElement(docptr); 398 | if node_ptr.is_null() { 399 | return false 400 | } 401 | let name_ptr = xmlNodeGetName(node_ptr); 402 | if name_ptr.is_null() { 403 | false 404 | } 405 | //empty string 406 | else { 407 | let c_root_name = CStr::from_ptr(name_ptr); 408 | let root_name = str::from_utf8(c_root_name.to_bytes()).unwrap().to_owned(); 409 | root_name == "html" 410 | } 411 | } else { 412 | false 413 | } 414 | } else { 415 | false 416 | }; 417 | 418 | if !ctxt.is_null() { 419 | htmlFreeParserCtxt(ctxt); 420 | } 421 | if !docptr.is_null() { 422 | xmlFreeDoc(docptr); 423 | } 424 | well_formed_final 425 | }, 426 | } 427 | } 428 | } 429 | -------------------------------------------------------------------------------- /src/readonly.rs: -------------------------------------------------------------------------------- 1 | mod tree; 2 | 3 | pub use self::tree::RoNode; 4 | -------------------------------------------------------------------------------- /src/readonly/tree.rs: -------------------------------------------------------------------------------- 1 | use libc::{c_char, c_void}; 2 | use std::collections::{HashMap, HashSet}; 3 | use std::ffi::{CStr, CString}; 4 | use std::ptr; 5 | use std::str; 6 | 7 | use crate::bindings::*; 8 | use crate::c_helpers::*; 9 | use crate::tree::namespace::Namespace; 10 | use crate::tree::nodetype::NodeType; 11 | use crate::tree::Document; 12 | use crate::xpath::Context; 13 | 14 | /// Lightweight struct for read-only parallel processing 15 | #[derive(Debug, Copy, Clone)] 16 | pub struct RoNode(pub(crate) xmlNodePtr); 17 | 18 | // we claim Sync and Send, as we are in read-only mode over the owning document 19 | unsafe impl Sync for RoNode {} 20 | unsafe impl Send for RoNode {} 21 | 22 | impl PartialEq for RoNode { 23 | /// Two nodes are considered equal, if they point to the same xmlNode. 24 | fn eq(&self, other: &RoNode) -> bool { 25 | std::ptr::eq(self.0, other.0) 26 | } 27 | } 28 | impl Eq for RoNode {} 29 | 30 | impl RoNode { 31 | /// Immutably borrows the underlying libxml2 `xmlNodePtr` pointer 32 | pub fn node_ptr(&self) -> xmlNodePtr { 33 | self.0 34 | } 35 | 36 | /// Returns the next sibling if it exists 37 | pub fn get_next_sibling(self) -> Option { 38 | let ptr = xmlNextSibling(self.0); 39 | self.ptr_as_option(ptr) 40 | } 41 | 42 | /// Returns the previous sibling if it exists 43 | pub fn get_prev_sibling(self) -> Option { 44 | let ptr = xmlPrevSibling(self.0); 45 | self.ptr_as_option(ptr) 46 | } 47 | 48 | /// Returns the first child if it exists 49 | pub fn get_first_child(self) -> Option { 50 | let ptr = xmlGetFirstChild(self.0); 51 | self.ptr_as_option(ptr) 52 | } 53 | 54 | /// Returns the last child if it exists 55 | pub fn get_last_child(self) -> Option { 56 | let ptr = unsafe { xmlGetLastChild(self.0) }; 57 | self.ptr_as_option(ptr) 58 | } 59 | 60 | /// Returns the next element sibling if it exists 61 | pub fn get_next_element_sibling(&self) -> Option { 62 | match self.get_next_sibling() { 63 | None => None, 64 | Some(child) => { 65 | let mut current_node = child; 66 | while !current_node.is_element_node() { 67 | if let Some(sibling) = current_node.get_next_sibling() { 68 | current_node = sibling; 69 | } else { 70 | break; 71 | } 72 | } 73 | if current_node.is_element_node() { 74 | Some(current_node) 75 | } else { 76 | None 77 | } 78 | } 79 | } 80 | } 81 | 82 | /// Returns the previous element sibling if it exists 83 | pub fn get_prev_element_sibling(&self) -> Option { 84 | match self.get_prev_sibling() { 85 | None => None, 86 | Some(child) => { 87 | let mut current_node = child; 88 | while !current_node.is_element_node() { 89 | if let Some(sibling) = current_node.get_prev_sibling() { 90 | current_node = sibling; 91 | } else { 92 | break; 93 | } 94 | } 95 | if current_node.is_element_node() { 96 | Some(current_node) 97 | } else { 98 | None 99 | } 100 | } 101 | } 102 | } 103 | 104 | /// Returns the first element child if it exists 105 | pub fn get_first_element_child(self) -> Option { 106 | match self.get_first_child() { 107 | None => None, 108 | Some(child) => { 109 | let mut current_node = child; 110 | while !current_node.is_element_node() { 111 | if let Some(sibling) = current_node.get_next_sibling() { 112 | current_node = sibling; 113 | } else { 114 | break; 115 | } 116 | } 117 | if current_node.is_element_node() { 118 | Some(current_node) 119 | } else { 120 | None 121 | } 122 | } 123 | } 124 | } 125 | 126 | /// Returns the last element child if it exists 127 | pub fn get_last_element_child(&self) -> Option { 128 | match self.get_last_child() { 129 | None => None, 130 | Some(child) => { 131 | let mut current_node = child; 132 | while !current_node.is_element_node() { 133 | if let Some(sibling) = current_node.get_prev_sibling() { 134 | current_node = sibling; 135 | } else { 136 | break; 137 | } 138 | } 139 | if current_node.is_element_node() { 140 | Some(current_node) 141 | } else { 142 | None 143 | } 144 | } 145 | } 146 | } 147 | 148 | /// Returns all child nodes of the given node as a vector 149 | pub fn get_child_nodes(self) -> Vec { 150 | let mut children = Vec::new(); 151 | if let Some(first_child) = self.get_first_child() { 152 | children.push(first_child); 153 | while let Some(sibling) = children.last().unwrap().get_next_sibling() { 154 | children.push(sibling) 155 | } 156 | } 157 | children 158 | } 159 | 160 | /// Returns all child elements of the given node as a vector 161 | pub fn get_child_elements(self) -> Vec { 162 | self 163 | .get_child_nodes() 164 | .into_iter() 165 | .filter(|n| n.get_type() == Some(NodeType::ElementNode)) 166 | .collect::>() 167 | } 168 | 169 | /// Returns the parent if it exists 170 | pub fn get_parent(self) -> Option { 171 | let ptr = xmlGetParent(self.0); 172 | self.ptr_as_option(ptr) 173 | } 174 | 175 | /// Get the node type 176 | pub fn get_type(self) -> Option { 177 | NodeType::from_int(xmlGetNodeType(self.0)) 178 | } 179 | 180 | /// Returns true if it is a text node 181 | pub fn is_text_node(self) -> bool { 182 | self.get_type() == Some(NodeType::TextNode) 183 | } 184 | 185 | /// Checks if the given node is an Element 186 | pub fn is_element_node(self) -> bool { 187 | self.get_type() == Some(NodeType::ElementNode) 188 | } 189 | 190 | /// Checks if the underlying libxml2 pointer is `NULL` 191 | pub fn is_null(self) -> bool { 192 | self.0.is_null() 193 | } 194 | 195 | /// Returns the name of the node (empty string if name pointer is `NULL`) 196 | pub fn get_name(self) -> String { 197 | let name_ptr = xmlNodeGetName(self.0); 198 | if name_ptr.is_null() { 199 | return String::new(); 200 | } //empty string 201 | let c_string = unsafe { CStr::from_ptr(name_ptr) }; 202 | c_string.to_string_lossy().into_owned() 203 | } 204 | 205 | /// Returns the content of the node 206 | /// (assumes UTF-8 XML document) 207 | pub fn get_content(self) -> String { 208 | let content_ptr = unsafe { xmlNodeGetContent(self.0) }; 209 | if content_ptr.is_null() { 210 | //empty string when none 211 | return String::new(); 212 | } 213 | let c_string = unsafe { CStr::from_ptr(content_ptr as *const c_char) }; 214 | let rust_utf8 = c_string.to_string_lossy().into_owned(); 215 | bindgenFree(content_ptr as *mut c_void); 216 | rust_utf8 217 | } 218 | 219 | /// Returns the value of property `name` 220 | pub fn get_property(self, name: &str) -> Option { 221 | let c_name = CString::new(name).unwrap(); 222 | let value_ptr = unsafe { xmlGetProp(self.0, c_name.as_bytes().as_ptr()) }; 223 | if value_ptr.is_null() { 224 | return None; 225 | } 226 | let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; 227 | let prop_str = c_value_string.to_string_lossy().into_owned(); 228 | bindgenFree(value_ptr as *mut c_void); 229 | Some(prop_str) 230 | } 231 | 232 | /// Returns the value of property `name` in namespace `ns` 233 | pub fn get_property_ns(self, name: &str, ns: &str) -> Option { 234 | let c_name = CString::new(name).unwrap(); 235 | let c_ns = CString::new(ns).unwrap(); 236 | let value_ptr = 237 | unsafe { xmlGetNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) }; 238 | if value_ptr.is_null() { 239 | return None; 240 | } 241 | let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; 242 | let prop_str = c_value_string.to_string_lossy().into_owned(); 243 | bindgenFree(value_ptr as *mut c_void); 244 | Some(prop_str) 245 | } 246 | 247 | /// Returns the value of property `name` with no namespace 248 | pub fn get_property_no_ns(self, name: &str) -> Option { 249 | let c_name = CString::new(name).unwrap(); 250 | let value_ptr = unsafe { xmlGetNoNsProp(self.0, c_name.as_bytes().as_ptr()) }; 251 | if value_ptr.is_null() { 252 | return None; 253 | } 254 | let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; 255 | let prop_str = c_value_string.to_string_lossy().into_owned(); 256 | bindgenFree(value_ptr as *mut c_void); 257 | Some(prop_str) 258 | } 259 | 260 | /// Return an attribute as a `Node` struct of type AttributeNode 261 | pub fn get_property_node(self, name: &str) -> Option { 262 | let c_name = CString::new(name).unwrap(); 263 | unsafe { 264 | let attr_node = xmlHasProp(self.0, c_name.as_bytes().as_ptr()); 265 | self.ptr_as_option(attr_node as xmlNodePtr) 266 | } 267 | } 268 | 269 | /// Return an attribute in a namespace `ns` as a `Node` of type AttributeNode 270 | pub fn get_property_node_ns(self, name: &str, ns: &str) -> Option { 271 | let c_name = CString::new(name).unwrap(); 272 | let c_ns = CString::new(ns).unwrap(); 273 | let attr_node = 274 | unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) }; 275 | self.ptr_as_option(attr_node as xmlNodePtr) 276 | } 277 | 278 | /// Return an attribute with no namespace as a `Node` of type AttributeNode 279 | pub fn get_property_node_no_ns(self, name: &str) -> Option { 280 | let c_name = CString::new(name).unwrap(); 281 | let attr_node = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), ptr::null()) }; 282 | self.ptr_as_option(attr_node as xmlNodePtr) 283 | } 284 | 285 | /// Alias for get_property 286 | pub fn get_attribute(self, name: &str) -> Option { 287 | self.get_property(name) 288 | } 289 | 290 | /// Alias for get_property_ns 291 | pub fn get_attribute_ns(self, name: &str, ns: &str) -> Option { 292 | self.get_property_ns(name, ns) 293 | } 294 | 295 | /// Alias for get_property_no_ns 296 | pub fn get_attribute_no_ns(self, name: &str) -> Option { 297 | self.get_property_no_ns(name) 298 | } 299 | 300 | /// Alias for get_property_node 301 | pub fn get_attribute_node(self, name: &str) -> Option { 302 | self.get_property_node(name) 303 | } 304 | 305 | /// Alias for get_property_node_ns 306 | pub fn get_attribute_node_ns(self, name: &str, ns: &str) -> Option { 307 | self.get_property_node_ns(name, ns) 308 | } 309 | 310 | /// Alias for get_property_node_no_ns 311 | pub fn get_attribute_node_no_ns(self, name: &str) -> Option { 312 | self.get_property_node_no_ns(name) 313 | } 314 | 315 | /// Get a copy of the attributes of this node 316 | pub fn get_properties(self) -> HashMap { 317 | let mut attributes = HashMap::new(); 318 | 319 | let mut current_prop = xmlGetFirstProperty(self.0); 320 | while !current_prop.is_null() { 321 | let name_ptr = xmlAttrName(current_prop); 322 | let c_name_string = unsafe { CStr::from_ptr(name_ptr) }; 323 | let name = c_name_string.to_string_lossy().into_owned(); 324 | let value = self.get_property(&name).unwrap_or_default(); 325 | attributes.insert(name, value); 326 | current_prop = xmlNextPropertySibling(current_prop); 327 | } 328 | 329 | attributes 330 | } 331 | 332 | /// Get a copy of this node's attributes and their namespaces 333 | pub fn get_properties_ns(self) -> HashMap<(String, Option), String> { 334 | let mut attributes = HashMap::new(); 335 | 336 | let mut current_prop = xmlGetFirstProperty(self.0); 337 | while !current_prop.is_null() { 338 | let name_ptr = xmlAttrName(current_prop); 339 | let c_name_string = unsafe { CStr::from_ptr(name_ptr) }; 340 | let name = c_name_string.to_string_lossy().into_owned(); 341 | let ns_ptr = xmlAttrNs(current_prop); 342 | if ns_ptr.is_null() { 343 | let value = self.get_property_no_ns(&name).unwrap_or_default(); 344 | attributes.insert((name, None), value); 345 | } else { 346 | let ns = Namespace { ns_ptr }; 347 | let value = self 348 | .get_property_ns(&name, &ns.get_href()) 349 | .unwrap_or_default(); 350 | attributes.insert((name, Some(ns)), value); 351 | } 352 | current_prop = xmlNextPropertySibling(current_prop); 353 | } 354 | 355 | attributes 356 | } 357 | 358 | /// Alias for `get_properties` 359 | pub fn get_attributes(self) -> HashMap { 360 | self.get_properties() 361 | } 362 | 363 | /// Alias for `get_properties_ns` 364 | pub fn get_attributes_ns(self) -> HashMap<(String, Option), String> { 365 | self.get_properties_ns() 366 | } 367 | 368 | /// Check if a property has been defined, without allocating its value 369 | pub fn has_property(self, name: &str) -> bool { 370 | let c_name = CString::new(name).unwrap(); 371 | let value_ptr = unsafe { xmlHasProp(self.0, c_name.as_bytes().as_ptr()) }; 372 | !value_ptr.is_null() 373 | } 374 | 375 | /// Check if property `name` in namespace `ns` exists 376 | pub fn has_property_ns(self, name: &str, ns: &str) -> bool { 377 | let c_name = CString::new(name).unwrap(); 378 | let c_ns = CString::new(ns).unwrap(); 379 | let value_ptr = 380 | unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) }; 381 | !value_ptr.is_null() 382 | } 383 | 384 | /// Check if property `name` with no namespace exists 385 | pub fn has_property_no_ns(self, name: &str) -> bool { 386 | let c_name = CString::new(name).unwrap(); 387 | let value_ptr = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), ptr::null()) }; 388 | !value_ptr.is_null() 389 | } 390 | 391 | /// Alias for has_property 392 | pub fn has_attribute(self, name: &str) -> bool { 393 | self.has_property(name) 394 | } 395 | 396 | /// Alias for has_property_ns 397 | pub fn has_attribute_ns(self, name: &str, ns: &str) -> bool { 398 | self.has_property_ns(name, ns) 399 | } 400 | 401 | /// Alias for has_property_no_ns 402 | pub fn has_attribute_no_ns(self, name: &str) -> bool { 403 | self.has_property_no_ns(name) 404 | } 405 | 406 | /// Gets the active namespace associated of this node 407 | pub fn get_namespace(self) -> Option { 408 | let ns_ptr = xmlNodeNs(self.0); 409 | if ns_ptr.is_null() { 410 | None 411 | } else { 412 | Some(Namespace { ns_ptr }) 413 | } 414 | } 415 | 416 | /// Gets a list of namespaces associated with this node 417 | pub fn get_namespaces(self, doc: &Document) -> Vec { 418 | let list_ptr_raw = unsafe { xmlGetNsList(doc.doc_ptr(), self.0) }; 419 | if list_ptr_raw.is_null() { 420 | Vec::new() 421 | } else { 422 | let mut namespaces = Vec::new(); 423 | let mut ptr_iter = list_ptr_raw as *mut xmlNsPtr; 424 | unsafe { 425 | while !ptr_iter.is_null() && !(*ptr_iter).is_null() { 426 | namespaces.push(Namespace { ns_ptr: *ptr_iter }); 427 | ptr_iter = ptr_iter.add(1); 428 | } 429 | /* TODO: valgrind suggests this technique isn't sufficiently fluent: 430 | ==114895== Conditional jump or move depends on uninitialised value(s) 431 | ==114895== at 0x4E9962F: xmlFreeNs (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.4) 432 | ==114895== by 0x195CE8: libxml::tree::Node::get_namespaces (tree.rs:723) 433 | ==114895== by 0x12E7B6: base_tests::can_work_with_namespaces (base_tests.rs:537) 434 | DG: I could not improve on this state without creating memory leaks after ~1 hour, so I am 435 | marking it as future work. 436 | */ 437 | /* TODO: How do we properly deallocate here? The approach bellow reliably segfaults tree_tests on 1 thread */ 438 | // println!("\n-- xmlfreens on : {:?}", list_ptr_raw); 439 | // xmlFreeNs(list_ptr_raw as xmlNsPtr); 440 | } 441 | namespaces 442 | } 443 | } 444 | 445 | /// Get a list of namespaces declared with this node 446 | pub fn get_namespace_declarations(self) -> Vec { 447 | if self.get_type() != Some(NodeType::ElementNode) { 448 | // only element nodes can have declarations 449 | return Vec::new(); 450 | } 451 | let mut namespaces = Vec::new(); 452 | let mut ns_ptr = xmlNodeNsDeclarations(self.0); 453 | while !ns_ptr.is_null() { 454 | if !xmlNsPrefix(ns_ptr).is_null() || !xmlNsHref(ns_ptr).is_null() { 455 | namespaces.push(Namespace { ns_ptr }); 456 | } 457 | ns_ptr = xmlNextNsSibling(ns_ptr); 458 | } 459 | namespaces 460 | } 461 | 462 | /// Looks up the prefix of a namespace from its URI, basedo around a given `Node` 463 | pub fn lookup_namespace_prefix(self, href: &str) -> Option { 464 | if href.is_empty() { 465 | return None; 466 | } 467 | let c_href = CString::new(href).unwrap(); 468 | unsafe { 469 | let ptr_mut = self.0; 470 | let ns_ptr = xmlSearchNsByHref(xmlGetDoc(ptr_mut), ptr_mut, c_href.as_bytes().as_ptr()); 471 | if !ns_ptr.is_null() { 472 | let ns = Namespace { ns_ptr }; 473 | let ns_prefix = ns.get_prefix(); 474 | Some(ns_prefix) 475 | } else { 476 | None 477 | } 478 | } 479 | } 480 | 481 | /// Looks up the uri of a namespace from its prefix, basedo around a given `Node` 482 | pub fn lookup_namespace_uri(self, prefix: &str) -> Option { 483 | if prefix.is_empty() { 484 | return None; 485 | } 486 | let c_prefix = CString::new(prefix).unwrap(); 487 | unsafe { 488 | let ns_ptr = xmlSearchNs(xmlGetDoc(self.0), self.0, c_prefix.as_bytes().as_ptr()); 489 | if !ns_ptr.is_null() { 490 | let ns = Namespace { ns_ptr }; 491 | let ns_prefix = ns.get_href(); 492 | if !ns_prefix.is_empty() { 493 | Some(ns_prefix) 494 | } else { 495 | None 496 | } 497 | } else { 498 | None 499 | } 500 | } 501 | } 502 | 503 | /// Get a set of class names from this node's attributes 504 | pub fn get_class_names(self) -> HashSet { 505 | let mut set = HashSet::new(); 506 | if let Some(value) = self.get_property("class") { 507 | for n in value.split(' ') { 508 | set.insert(n.to_owned()); 509 | } 510 | } 511 | set 512 | } 513 | 514 | /// find read-only nodes via xpath, at the specified node and a given document 515 | pub fn findnodes(self, xpath: &str, owner: &Document) -> Result, ()> { 516 | let context = Context::new(owner)?; 517 | let evaluated = context.node_evaluate_readonly(xpath, self)?; 518 | Ok(evaluated.get_readonly_nodes_as_vec()) 519 | } 520 | 521 | /// Read-only nodes are always linked 522 | pub fn is_unlinked(self) -> bool { 523 | false 524 | } 525 | /// Read-only nodes only need a null check 526 | fn ptr_as_option(self, node_ptr: xmlNodePtr) -> Option { 527 | if node_ptr.is_null() { 528 | None 529 | } else { 530 | Some(RoNode(node_ptr)) 531 | } 532 | } 533 | 534 | /// `libc::c_void` isn't hashable and cannot be made hashable 535 | pub fn to_hashable(self) -> usize { 536 | self.0 as usize 537 | } 538 | /// Create a mock node, used for a placeholder argument 539 | pub fn null() -> Self { 540 | RoNode(ptr::null_mut()) 541 | } 542 | } 543 | -------------------------------------------------------------------------------- /src/schemas/common.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Common Utilities 3 | //! 4 | use crate::bindings; 5 | 6 | use crate::error::StructuredError; 7 | 8 | use std::ffi::c_void; 9 | 10 | /// Provides a callback to the C side of things to accumulate xmlErrors to be 11 | /// handled back on the Rust side. 12 | #[cfg(libxml_older_than_2_12)] 13 | pub unsafe extern "C" fn structured_error_handler(ctx: *mut c_void, error: bindings::xmlErrorPtr) { 14 | assert!(!ctx.is_null()); 15 | let errlog = unsafe { &mut *{ ctx as *mut Vec } }; 16 | 17 | let error = unsafe { StructuredError::from_raw(error) }; 18 | 19 | errlog.push(error); 20 | } 21 | 22 | #[cfg(not(libxml_older_than_2_12))] 23 | pub unsafe extern "C" fn structured_error_handler(ctx: *mut c_void, error: *const bindings::xmlError) { 24 | assert!(!ctx.is_null()); 25 | let errlog = unsafe { &mut *{ ctx as *mut Vec } }; 26 | 27 | let error = unsafe { StructuredError::from_raw(error) }; 28 | 29 | errlog.push(error); 30 | } -------------------------------------------------------------------------------- /src/schemas/mod.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Schema Validation Support (XSD) 3 | //! 4 | //! This module exposes wraps xmlschemas in libxml2. See original documentation or 5 | //! look at the example at examples/schema_example.rs for usage. 6 | //! 7 | //! WARNING: This module has not been tested in a multithreaded or multiprocessing 8 | //! environment. 9 | //! 10 | mod common; 11 | mod parser; 12 | mod schema; 13 | mod validation; 14 | 15 | use schema::Schema; // internally handled by SchemaValidationContext 16 | 17 | pub use parser::SchemaParserContext; 18 | pub use validation::SchemaValidationContext; 19 | -------------------------------------------------------------------------------- /src/schemas/parser.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Wrapping of the Parser Context (xmlSchemaParserCtxt) 3 | //! 4 | use super::common; 5 | 6 | use crate::bindings; 7 | use crate::error::StructuredError; 8 | use crate::tree::document::Document; 9 | 10 | use std::ffi::CString; 11 | use std::os::raw::c_char; 12 | 13 | /// Wrapper on xmlSchemaParserCtxt 14 | pub struct SchemaParserContext { 15 | inner: *mut bindings::_xmlSchemaParserCtxt, 16 | errlog: *mut Vec, 17 | } 18 | 19 | impl SchemaParserContext { 20 | /// Create a schema parsing context from a Document object 21 | pub fn from_document(doc: &Document) -> Self { 22 | let parser = unsafe { bindings::xmlSchemaNewDocParserCtxt(doc.doc_ptr()) }; 23 | 24 | if parser.is_null() { 25 | panic!("Failed to create schema parser context from XmlDocument"); // TODO error handling 26 | } 27 | 28 | Self::from_raw(parser) 29 | } 30 | 31 | /// Create a schema parsing context from a buffer in memory 32 | pub fn from_buffer>(buff: Bytes) -> Self { 33 | let buff_bytes = buff.as_ref(); 34 | let buff_ptr = buff_bytes.as_ptr() as *const c_char; 35 | let buff_len = buff_bytes.len() as i32; 36 | 37 | let parser = unsafe { bindings::xmlSchemaNewMemParserCtxt(buff_ptr, buff_len) }; 38 | 39 | if parser.is_null() { 40 | panic!("Failed to create schema parser context from buffer"); // TODO error handling 41 | } 42 | 43 | Self::from_raw(parser) 44 | } 45 | 46 | /// Create a schema parsing context from an URL 47 | pub fn from_file(path: &str) -> Self { 48 | let path = CString::new(path).unwrap(); // TODO error handling for \0 containing strings 49 | let path_ptr = path.as_bytes_with_nul().as_ptr() as *const c_char; 50 | 51 | let parser = unsafe { bindings::xmlSchemaNewParserCtxt(path_ptr) }; 52 | 53 | if parser.is_null() { 54 | panic!("Failed to create schema parser context from path"); // TODO error handling 55 | } 56 | 57 | Self::from_raw(parser) 58 | } 59 | 60 | /// Drains error log from errors that might have accumulated while parsing schema 61 | pub fn drain_errors(&mut self) -> Vec { 62 | assert!(!self.errlog.is_null()); 63 | let errors = unsafe { &mut *self.errlog }; 64 | std::mem::take(errors) 65 | } 66 | 67 | /// Return a raw pointer to the underlying xmlSchemaParserCtxt structure 68 | pub fn as_ptr(&self) -> *mut bindings::_xmlSchemaParserCtxt { 69 | self.inner 70 | } 71 | } 72 | 73 | /// Private Interface 74 | impl SchemaParserContext { 75 | fn from_raw(parser: *mut bindings::_xmlSchemaParserCtxt) -> Self { 76 | let errors: Box> = Box::default(); 77 | 78 | unsafe { 79 | let reference: *mut Vec = std::mem::transmute(errors); 80 | bindings::xmlSchemaSetParserStructuredErrors( 81 | parser, 82 | Some(common::structured_error_handler), 83 | reference as *mut _, 84 | ); 85 | 86 | Self { 87 | inner: parser, 88 | errlog: reference, 89 | } 90 | } 91 | } 92 | } 93 | 94 | impl Drop for SchemaParserContext { 95 | fn drop(&mut self) { 96 | unsafe { 97 | bindings::xmlSchemaFreeParserCtxt(self.inner); 98 | if !self.errlog.is_null() { 99 | let errors: Box> = std::mem::transmute(self.errlog); 100 | drop(errors) 101 | } 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/schemas/schema.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Wrapping of the Schema (xmlSchema) 3 | //! 4 | use std::sync::OnceLock; 5 | 6 | use super::SchemaParserContext; 7 | 8 | use crate::bindings; 9 | 10 | use crate::error::StructuredError; 11 | 12 | static SCHEMA_TYPES_LOCK: OnceLock = OnceLock::new(); 13 | 14 | /// Wrapper on xmlSchema 15 | pub struct Schema(*mut bindings::_xmlSchema); 16 | 17 | impl Schema { 18 | /// Create schema by having a SchemaParserContext do the actual parsing of the schema it was provided 19 | pub fn from_parser(parser: &mut SchemaParserContext) -> Result> { 20 | 21 | // `xmlSchemaParse` calls `xmlSchemaInitTypes`. 22 | // `xmlSchemaInitTypes` is a lazy function which is only intended to be 23 | // called once for optimization purposes - but libxml2 doesn't do this 24 | // in a thread-safe manner. We wrap the call in a OnceLock so that it 25 | // only ever needs to be invoked once - and will do it in a thread-safe 26 | // way. 27 | let _ = SCHEMA_TYPES_LOCK.get_or_init(|| { 28 | unsafe { bindings::xmlSchemaInitTypes() }; 29 | true 30 | }); 31 | 32 | let raw = unsafe { bindings::xmlSchemaParse(parser.as_ptr()) }; 33 | 34 | if raw.is_null() { 35 | Err(parser.drain_errors()) 36 | } else { 37 | Ok(Self(raw)) 38 | } 39 | } 40 | 41 | /// Return a raw pointer to the underlying xmlSchema structure 42 | pub fn as_ptr(&self) -> *mut bindings::_xmlSchema { 43 | self.0 44 | } 45 | } 46 | 47 | impl Drop for Schema { 48 | fn drop(&mut self) { 49 | unsafe { bindings::xmlSchemaFree(self.0) } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/schemas/validation.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Wrapping of the Validation Context (xmlSchemaValidCtxt) 3 | //! 4 | use super::common; 5 | 6 | use super::Schema; 7 | use super::SchemaParserContext; 8 | 9 | use crate::bindings; 10 | 11 | use crate::tree::document::Document; 12 | use crate::tree::node::Node; 13 | 14 | use crate::error::StructuredError; 15 | 16 | use std::ffi::CString; 17 | use std::os::raw::c_char; 18 | 19 | /// Wrapper on xmlSchemaValidCtxt 20 | pub struct SchemaValidationContext { 21 | ctxt: *mut bindings::_xmlSchemaValidCtxt, 22 | errlog: *mut Vec, 23 | _schema: Schema, 24 | } 25 | 26 | 27 | impl SchemaValidationContext { 28 | /// Create a schema validation context from a parser object 29 | pub fn from_parser(parser: &mut SchemaParserContext) -> Result> { 30 | let schema = Schema::from_parser(parser); 31 | 32 | match schema { 33 | Ok(s) => { 34 | let ctx = unsafe { bindings::xmlSchemaNewValidCtxt(s.as_ptr()) }; 35 | 36 | if ctx.is_null() { 37 | panic!("Failed to create validation context from XML schema") // TODO error handling 38 | } 39 | 40 | Ok(Self::from_raw(ctx, s)) 41 | } 42 | Err(e) => Err(e), 43 | } 44 | } 45 | 46 | /// Validates a given Document, that is to be tested to comply with the loaded XSD schema definition 47 | pub fn validate_document(&mut self, doc: &Document) -> Result<(), Vec> { 48 | let rc = unsafe { bindings::xmlSchemaValidateDoc(self.ctxt, doc.doc_ptr()) }; 49 | 50 | match rc { 51 | -1 => panic!("Failed to validate document due to internal error"), // TODO error handling 52 | 0 => Ok(()), 53 | _ => Err(self.drain_errors()), 54 | } 55 | } 56 | 57 | /// Validates a given file from path for its compliance with the loaded XSD schema definition 58 | pub fn validate_file(&mut self, path: &str) -> Result<(), Vec> { 59 | let path = CString::new(path).unwrap(); // TODO error handling for \0 containing strings 60 | let path_ptr = path.as_bytes_with_nul().as_ptr() as *const c_char; 61 | 62 | let rc = unsafe { bindings::xmlSchemaValidateFile(self.ctxt, path_ptr, 0) }; 63 | 64 | match rc { 65 | -1 => panic!("Failed to validate file due to internal error"), // TODO error handling 66 | 0 => Ok(()), 67 | _ => Err(self.drain_errors()), 68 | } 69 | } 70 | 71 | /// Validates a branch or leaf of a document given as a Node against the loaded XSD schema definition 72 | pub fn validate_node(&mut self, node: &Node) -> Result<(), Vec> { 73 | let rc = unsafe { bindings::xmlSchemaValidateOneElement(self.ctxt, node.node_ptr()) }; 74 | 75 | match rc { 76 | -1 => panic!("Failed to validate element due to internal error"), // TODO error handling 77 | 0 => Ok(()), 78 | _ => Err(self.drain_errors()), 79 | } 80 | } 81 | 82 | /// Drains error log from errors that might have accumulated while validating something 83 | pub fn drain_errors(&mut self) -> Vec { 84 | assert!(!self.errlog.is_null()); 85 | let errors = unsafe { &mut *self.errlog }; 86 | std::mem::take(errors) 87 | } 88 | 89 | /// Return a raw pointer to the underlying xmlSchemaValidCtxt structure 90 | pub fn as_ptr(&self) -> *mut bindings::_xmlSchemaValidCtxt { 91 | self.ctxt 92 | } 93 | } 94 | 95 | /// Private Interface 96 | impl SchemaValidationContext { 97 | fn from_raw(ctx: *mut bindings::_xmlSchemaValidCtxt, schema: Schema) -> Self { 98 | let errors: Box> = Box::default(); 99 | 100 | unsafe { 101 | let reference: *mut Vec = std::mem::transmute(errors); 102 | bindings::xmlSchemaSetValidStructuredErrors( 103 | ctx, 104 | Some(common::structured_error_handler), 105 | reference as *mut _, 106 | // Box::into_raw(Box::new(Rc::downgrade(&errors))) as *mut _, 107 | ); 108 | Self { 109 | ctxt: ctx, 110 | errlog: reference, 111 | _schema: schema, 112 | } 113 | } 114 | } 115 | } 116 | 117 | impl Drop for SchemaValidationContext { 118 | fn drop(&mut self) { 119 | unsafe { 120 | bindings::xmlSchemaFreeValidCtxt(self.ctxt); 121 | if !self.errlog.is_null() { 122 | let errors: Box> = std::mem::transmute(self.errlog); 123 | drop(errors) 124 | } 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/tree/document.rs: -------------------------------------------------------------------------------- 1 | //! Document feature set 2 | //! 3 | use libc::{c_char, c_int}; 4 | use std::cell::RefCell; 5 | use std::collections::HashMap; 6 | use std::ffi::{CStr, CString}; 7 | use std::fmt; 8 | use std::ptr; 9 | use std::rc::{Rc, Weak}; 10 | use std::str; 11 | 12 | use crate::bindings::*; 13 | use crate::readonly::RoNode; 14 | use crate::tree::node::Node; 15 | 16 | pub(crate) type DocumentRef = Rc>; 17 | pub(crate) type DocumentWeak = Weak>; 18 | 19 | #[derive(Debug, Copy, Clone, Default)] 20 | /// Save Options for Document 21 | pub struct SaveOptions { 22 | /// format save output 23 | pub format: bool, 24 | /// drop the xml declaration 25 | pub no_declaration: bool, 26 | /// no empty tags 27 | pub no_empty_tags: bool, 28 | /// disable XHTML1 specific rules 29 | pub no_xhtml: bool, 30 | /// force XHTML1 specific rules 31 | pub xhtml: bool, 32 | /// force XML serialization on HTML doc 33 | pub as_xml: bool, 34 | /// force HTML serialization on XML doc 35 | pub as_html: bool, 36 | /// format with non-significant whitespace 37 | pub non_significant_whitespace: bool, 38 | } 39 | 40 | #[derive(Debug)] 41 | pub(crate) struct _Document { 42 | /// pointer to a libxml document 43 | pub(crate) doc_ptr: xmlDocPtr, 44 | /// hashed pointer-to-Node bookkeeping table 45 | nodes: HashMap, 46 | } 47 | 48 | impl _Document { 49 | /// Internal bookkeeping function, so far only used by `Node::wrap` 50 | pub(crate) fn insert_node(&mut self, node_ptr: xmlNodePtr, node: Node) { 51 | self.nodes.insert(node_ptr, node); 52 | } 53 | /// Internal bookkeeping function, so far only used by `Node::wrap` 54 | pub(crate) fn get_node(&self, node_ptr: xmlNodePtr) -> Option<&Node> { 55 | self.nodes.get(&node_ptr) 56 | } 57 | /// Internal bookkeeping function 58 | pub(crate) fn forget_node(&mut self, node_ptr: xmlNodePtr) { 59 | self.nodes.remove(&node_ptr); 60 | } 61 | } 62 | 63 | /// A libxml2 Document 64 | #[derive(Clone)] 65 | pub struct Document(pub(crate) DocumentRef); 66 | 67 | impl Drop for _Document { 68 | ///Free document when it goes out of scope 69 | fn drop(&mut self) { 70 | unsafe { 71 | if !self.doc_ptr.is_null() { 72 | xmlFreeDoc(self.doc_ptr); 73 | } 74 | } 75 | } 76 | } 77 | 78 | impl fmt::Display for Document { 79 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 80 | write!(f, "{}", self.to_string_with_options(SaveOptions::default())) 81 | } 82 | } 83 | 84 | impl Document { 85 | /// Creates a new empty libxml2 document 86 | pub fn new() -> Result { 87 | unsafe { 88 | let c_version = CString::new("1.0").unwrap(); 89 | let c_version_bytes = c_version.as_bytes(); 90 | let doc_ptr = xmlNewDoc(c_version_bytes.as_ptr()); 91 | if doc_ptr.is_null() { 92 | Err(()) 93 | } else { 94 | let doc = _Document { 95 | doc_ptr, 96 | nodes: HashMap::new(), 97 | }; 98 | Ok(Document(Rc::new(RefCell::new(doc)))) 99 | } 100 | } 101 | } 102 | 103 | /// Obtain the underlying libxml2 `xmlDocPtr` for this Document 104 | pub fn doc_ptr(&self) -> xmlDocPtr { 105 | self.0.borrow().doc_ptr 106 | } 107 | 108 | /// Creates a new `Document` from an existing libxml2 pointer 109 | pub fn new_ptr(doc_ptr: xmlDocPtr) -> Self { 110 | let doc = _Document { 111 | doc_ptr, 112 | nodes: HashMap::new(), 113 | }; 114 | Document(Rc::new(RefCell::new(doc))) 115 | } 116 | 117 | pub(crate) fn null_ref() -> DocumentRef { 118 | Rc::new(RefCell::new(_Document { 119 | doc_ptr: ptr::null_mut(), 120 | nodes: HashMap::new(), 121 | })) 122 | } 123 | 124 | /// Write document to `filename` 125 | pub fn save_file(&self, filename: &str) -> Result { 126 | let c_filename = CString::new(filename).unwrap(); 127 | unsafe { 128 | let retval = xmlSaveFile(c_filename.as_ptr(), self.doc_ptr()); 129 | if retval < 0 { 130 | return Err(()); 131 | } 132 | Ok(retval) 133 | } 134 | } 135 | 136 | pub(crate) fn register_node(&self, node_ptr: xmlNodePtr) -> Node { 137 | Node::wrap(node_ptr, &self.0) 138 | } 139 | 140 | /// Get the root element of the document 141 | pub fn get_root_element(&self) -> Option { 142 | unsafe { 143 | let node_ptr = xmlDocGetRootElement(self.doc_ptr()); 144 | if node_ptr.is_null() { 145 | None 146 | } else { 147 | Some(self.register_node(node_ptr)) 148 | } 149 | } 150 | } 151 | 152 | /// Get the root element of the document (read-only) 153 | pub fn get_root_readonly(&self) -> Option { 154 | unsafe { 155 | let node_ptr = xmlDocGetRootElement(self.doc_ptr()); 156 | if node_ptr.is_null() { 157 | None 158 | } else { 159 | Some(RoNode(node_ptr)) 160 | } 161 | } 162 | } 163 | 164 | /// Sets the root element of the document 165 | pub fn set_root_element(&mut self, root: &Node) { 166 | unsafe { 167 | xmlDocSetRootElement(self.doc_ptr(), root.node_ptr()); 168 | } 169 | root.set_linked(); 170 | } 171 | 172 | fn ptr_as_result(&mut self, node_ptr: xmlNodePtr) -> Result { 173 | if node_ptr.is_null() { 174 | Err(()) 175 | } else { 176 | let node = self.register_node(node_ptr); 177 | Ok(node) 178 | } 179 | } 180 | 181 | /// Import a `Node` from another `Document` 182 | pub fn import_node(&mut self, node: &mut Node) -> Result { 183 | if !node.is_unlinked() { 184 | return Err(()); 185 | } 186 | // Also remove this node from the prior document hash 187 | node 188 | .get_docref() 189 | .upgrade() 190 | .unwrap() 191 | .borrow_mut() 192 | .forget_node(node.node_ptr()); 193 | 194 | let node_ptr = unsafe { xmlDocCopyNode(node.node_ptr(), self.doc_ptr(), 1) }; 195 | node.set_linked(); 196 | self.ptr_as_result(node_ptr) 197 | } 198 | 199 | /// Serializes the `Document` with options 200 | pub fn to_string_with_options(&self, options: SaveOptions) -> String { 201 | unsafe { 202 | // allocate a buffer to dump into 203 | let buf = xmlBufferCreate(); 204 | let c_utf8 = CString::new("UTF-8").unwrap(); 205 | let mut xml_options = 0; 206 | 207 | if options.format { 208 | xml_options += xmlSaveOption_XML_SAVE_FORMAT; 209 | } 210 | if options.no_declaration { 211 | xml_options += xmlSaveOption_XML_SAVE_NO_DECL; 212 | } 213 | if options.no_empty_tags { 214 | xml_options += xmlSaveOption_XML_SAVE_NO_EMPTY; 215 | } 216 | if options.no_xhtml { 217 | xml_options += xmlSaveOption_XML_SAVE_NO_XHTML; 218 | } 219 | if options.xhtml { 220 | xml_options += xmlSaveOption_XML_SAVE_XHTML; 221 | } 222 | if options.as_xml { 223 | xml_options += xmlSaveOption_XML_SAVE_AS_XML; 224 | } 225 | if options.as_html { 226 | xml_options += xmlSaveOption_XML_SAVE_AS_HTML; 227 | } 228 | if options.non_significant_whitespace { 229 | xml_options += xmlSaveOption_XML_SAVE_WSNONSIG; 230 | } 231 | 232 | let save_ctx = xmlSaveToBuffer(buf, c_utf8.as_ptr(), xml_options as i32); 233 | let _size = xmlSaveDoc(save_ctx, self.doc_ptr()); 234 | let _size = xmlSaveClose(save_ctx); 235 | 236 | let result = xmlBufferContent(buf); 237 | let c_string = CStr::from_ptr(result as *const c_char); 238 | let node_string = c_string.to_string_lossy().into_owned(); 239 | xmlBufferFree(buf); 240 | 241 | node_string 242 | } 243 | } 244 | 245 | /// Serializes a `Node` owned by this `Document` 246 | pub fn node_to_string(&self, node: &Node) -> String { 247 | unsafe { 248 | // allocate a buffer to dump into 249 | let buf = xmlBufferCreate(); 250 | 251 | // dump the node 252 | xmlNodeDump( 253 | buf, 254 | self.doc_ptr(), 255 | node.node_ptr(), 256 | 1, // level of indentation 257 | 0, /* disable formatting */ 258 | ); 259 | let result = xmlBufferContent(buf); 260 | let c_string = CStr::from_ptr(result as *const c_char); 261 | let node_string = c_string.to_string_lossy().into_owned(); 262 | xmlBufferFree(buf); 263 | 264 | node_string 265 | } 266 | } 267 | /// Serializes a `RoNode` owned by this `Document` 268 | pub fn ronode_to_string(&self, node: &RoNode) -> String { 269 | unsafe { 270 | // allocate a buffer to dump into 271 | let buf = xmlBufferCreate(); 272 | 273 | // dump the node 274 | xmlNodeDump( 275 | buf, 276 | self.doc_ptr(), 277 | node.node_ptr(), 278 | 1, // level of indentation 279 | 0, /* disable formatting */ 280 | ); 281 | let result = xmlBufferContent(buf); 282 | let c_string = CStr::from_ptr(result as *const c_char); 283 | let node_string = c_string.to_string_lossy().into_owned(); 284 | xmlBufferFree(buf); 285 | 286 | node_string 287 | } 288 | } 289 | 290 | /// Creates a node for an XML processing instruction 291 | pub fn create_processing_instruction(&mut self, name: &str, content: &str) -> Result { 292 | unsafe { 293 | let c_name = CString::new(name).unwrap(); 294 | let c_name_bytes = c_name.as_bytes(); 295 | let c_content = CString::new(content).unwrap(); 296 | let c_content_bytes = c_content.as_bytes(); 297 | 298 | let node_ptr: xmlNodePtr = xmlNewDocPI( 299 | self.doc_ptr(), 300 | c_name_bytes.as_ptr(), 301 | c_content_bytes.as_ptr(), 302 | ); 303 | if node_ptr.is_null() { 304 | Err(()) 305 | } else { 306 | Ok(self.register_node(node_ptr)) 307 | } 308 | } 309 | } 310 | 311 | /// Cast the document as a libxml Node 312 | pub fn as_node(&self) -> Node { 313 | // Note: this method is important to keep, as it enables certain low-level libxml2 idioms 314 | // In particular, method dispatch based on NodeType is only possible when the document can be cast as a Node 315 | // 316 | // Memory management is not an issue, as a document node can not be unbound/removed, and does not require 317 | // any additional deallocation than the Drop of a Document object. 318 | self.register_node(self.doc_ptr() as xmlNodePtr) 319 | } 320 | 321 | /// Duplicates the libxml2 Document into a new instance 322 | pub fn dup(&self) -> Result { 323 | let doc_ptr = unsafe { xmlCopyDoc(self.doc_ptr(), 1) }; 324 | if doc_ptr.is_null() { 325 | Err(()) 326 | } else { 327 | let doc = _Document { 328 | doc_ptr, 329 | nodes: HashMap::new(), 330 | }; 331 | Ok(Document(Rc::new(RefCell::new(doc)))) 332 | } 333 | } 334 | 335 | /// Duplicates a source libxml2 Document into the empty Document self 336 | pub fn dup_from(&mut self, source: &Self) -> Result<(), ()> { 337 | if !self.doc_ptr().is_null() { 338 | return Err(()); 339 | } 340 | 341 | let doc_ptr = unsafe { xmlCopyDoc(source.doc_ptr(), 1) }; 342 | if doc_ptr.is_null() { 343 | return Err(()); 344 | } 345 | self.0.borrow_mut().doc_ptr = doc_ptr; 346 | Ok(()) 347 | } 348 | } 349 | -------------------------------------------------------------------------------- /src/tree/mod.rs: -------------------------------------------------------------------------------- 1 | //! The tree functionality 2 | //! 3 | 4 | pub mod document; 5 | pub mod namespace; 6 | pub mod node; 7 | pub mod nodetype; 8 | 9 | pub use self::document::{Document, SaveOptions}; 10 | pub(crate) use self::document::{DocumentRef, DocumentWeak}; 11 | pub use self::namespace::Namespace; 12 | pub use self::node::set_node_rc_guard; 13 | pub use self::node::{Node, NODE_RC_MAX_GUARD}; 14 | pub use self::nodetype::NodeType; 15 | -------------------------------------------------------------------------------- /src/tree/namespace.rs: -------------------------------------------------------------------------------- 1 | //! Namespace feature set 2 | //! 3 | use std::error::Error; 4 | use std::ffi::{CStr, CString}; 5 | use std::hash::{Hash, Hasher}; 6 | use std::ptr; 7 | use std::str; 8 | 9 | use crate::bindings::*; 10 | use crate::c_helpers::*; 11 | use crate::tree::node::Node; 12 | 13 | ///An xml namespace 14 | #[derive(Clone)] 15 | pub struct Namespace { 16 | ///libxml's xmlNsPtr 17 | pub(crate) ns_ptr: xmlNsPtr, 18 | } 19 | 20 | impl PartialEq for Namespace { 21 | fn eq(&self, other: &Self) -> bool { 22 | self.get_prefix() == other.get_prefix() && self.get_href() == other.get_href() 23 | } 24 | } 25 | 26 | impl Eq for Namespace {} 27 | 28 | impl Hash for Namespace { 29 | fn hash(&self, state: &mut H) { 30 | self.get_prefix().hash(state); 31 | self.get_href().hash(state); 32 | } 33 | } 34 | 35 | impl Namespace { 36 | /// Creates a new namespace 37 | pub fn new( 38 | prefix: &str, 39 | href: &str, 40 | node: &mut Node, 41 | ) -> Result> { 42 | let c_href = CString::new(href).unwrap(); 43 | let c_prefix = CString::new(prefix).unwrap(); 44 | let c_prefix_ptr = if prefix.is_empty() { 45 | ptr::null() 46 | } else { 47 | c_prefix.as_ptr() 48 | }; 49 | 50 | unsafe { 51 | let ns = xmlNewNs( 52 | node.node_ptr_mut()?, 53 | c_href.as_bytes().as_ptr(), 54 | c_prefix_ptr as *const u8, 55 | ); 56 | if ns.is_null() { 57 | Err(From::from("xmlNewNs returned NULL")) 58 | } else { 59 | Ok(Namespace { ns_ptr: ns }) 60 | } 61 | } 62 | } 63 | 64 | /// Immutably borrows the underlying libxml2 `xmlNsPtr` pointer 65 | pub fn ns_ptr(&self) -> xmlNsPtr { 66 | self.ns_ptr 67 | } 68 | 69 | /// Mutably borrows the underlying libxml2 `xmlNsPtr` pointer 70 | pub fn ns_ptr_mut(&mut self) -> xmlNsPtr { 71 | self.ns_ptr 72 | } 73 | /// The namespace prefix 74 | pub fn get_prefix(&self) -> String { 75 | unsafe { 76 | let prefix_ptr = xmlNsPrefix(self.ns_ptr()); 77 | if prefix_ptr.is_null() { 78 | String::new() 79 | } else { 80 | let c_prefix = CStr::from_ptr(prefix_ptr); 81 | c_prefix.to_string_lossy().into_owned() 82 | } 83 | } 84 | } 85 | 86 | /// The namespace href 87 | pub fn get_href(&self) -> String { 88 | unsafe { 89 | let href_ptr = xmlNsHref(self.ns_ptr()); 90 | if href_ptr.is_null() { 91 | String::new() 92 | } else { 93 | let c_href = CStr::from_ptr(href_ptr); 94 | c_href.to_string_lossy().into_owned() 95 | } 96 | } 97 | } 98 | 99 | /// Explicit free method, until (if?) we implement automatic+safe free-on-drop 100 | pub fn free(&mut self) { 101 | unsafe { xmlFreeNs(self.ns_ptr()) } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/tree/nodetype.rs: -------------------------------------------------------------------------------- 1 | //! Types of libxml2 Nodes 2 | //! 3 | 4 | use crate::bindings::xmlElementType; 5 | 6 | /// Types of xml nodes 7 | #[derive(Debug, PartialEq, Eq)] 8 | #[allow(missing_docs)] 9 | pub enum NodeType { 10 | ElementNode, 11 | AttributeNode, 12 | TextNode, 13 | CDataSectionNode, 14 | EntityRefNode, 15 | EntityNode, 16 | PiNode, 17 | CommentNode, 18 | DocumentNode, 19 | DocumentTypeNode, 20 | DocumentFragNode, 21 | NotationNode, 22 | HtmlDocumentNode, 23 | DTDNode, 24 | ElementDecl, 25 | AttributeDecl, 26 | EntityDecl, 27 | NamespaceDecl, 28 | XIncludeStart, 29 | XIncludeEnd, 30 | DOCBDocumentNode, 31 | } 32 | 33 | impl NodeType { 34 | /// converts an integer from libxml's `enum NodeType` 35 | /// to an instance of our `NodeType` 36 | pub fn from_int(i: xmlElementType) -> Option { 37 | match i { 38 | 1 => Some(NodeType::ElementNode), 39 | 2 => Some(NodeType::AttributeNode), 40 | 3 => Some(NodeType::TextNode), 41 | 4 => Some(NodeType::CDataSectionNode), 42 | 5 => Some(NodeType::EntityRefNode), 43 | 6 => Some(NodeType::EntityNode), 44 | 7 => Some(NodeType::PiNode), 45 | 8 => Some(NodeType::CommentNode), 46 | 9 => Some(NodeType::DocumentNode), 47 | 10 => Some(NodeType::DocumentTypeNode), 48 | 11 => Some(NodeType::DocumentFragNode), 49 | 12 => Some(NodeType::NotationNode), 50 | 13 => Some(NodeType::HtmlDocumentNode), 51 | 14 => Some(NodeType::DTDNode), 52 | 15 => Some(NodeType::ElementDecl), 53 | 16 => Some(NodeType::AttributeDecl), 54 | 17 => Some(NodeType::EntityDecl), 55 | 18 => Some(NodeType::NamespaceDecl), 56 | 19 => Some(NodeType::XIncludeStart), 57 | 20 => Some(NodeType::XIncludeEnd), 58 | 21 => Some(NodeType::DOCBDocumentNode), 59 | _ => None, 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/wrapper.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | // #include 5 | #include 6 | // #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include -------------------------------------------------------------------------------- /src/xpath.rs: -------------------------------------------------------------------------------- 1 | //! The `XPath` functionality 2 | 3 | use crate::bindings::*; 4 | use crate::c_helpers::*; 5 | use crate::readonly::RoNode; 6 | use crate::tree::{Document, DocumentRef, DocumentWeak, Node}; 7 | use libc::{c_char, c_void, size_t}; 8 | use std::cell::RefCell; 9 | use std::ffi::{CStr, CString}; 10 | use std::fmt; 11 | use std::rc::Rc; 12 | use std::str; 13 | 14 | ///Thinly wrapped libxml2 xpath context 15 | pub(crate) type ContextRef = Rc>; 16 | 17 | #[derive(Debug)] 18 | pub(crate) struct _Context(pub(crate) xmlXPathContextPtr); 19 | 20 | impl Drop for _Context { 21 | ///free xpath context when it goes out of scope 22 | fn drop(&mut self) { 23 | unsafe { 24 | xmlXPathFreeContext(self.0); 25 | } 26 | } 27 | } 28 | 29 | /// An XPath context 30 | #[derive(Clone)] 31 | pub struct Context { 32 | /// Safe reference to the libxml2 context pointer 33 | pub(crate) context_ptr: ContextRef, 34 | ///Document contains pointer, needed for ContextPtr, so we need to borrow Document to prevent it's freeing 35 | pub(crate) document: DocumentWeak, 36 | } 37 | 38 | ///Essentially, the result of the evaluation of some xpath expression 39 | #[derive(Debug)] 40 | pub struct Object { 41 | ///libxml's `ObjectPtr` 42 | pub ptr: xmlXPathObjectPtr, 43 | document: DocumentWeak, 44 | } 45 | 46 | impl Context { 47 | ///create the xpath context for a document 48 | pub fn new(doc: &Document) -> Result { 49 | let ctxtptr = unsafe { xmlXPathNewContext(doc.doc_ptr()) }; 50 | if ctxtptr.is_null() { 51 | Err(()) 52 | } else { 53 | Ok(Context { 54 | context_ptr: Rc::new(RefCell::new(_Context(ctxtptr))), 55 | document: Rc::downgrade(&doc.0), 56 | }) 57 | } 58 | } 59 | pub(crate) fn new_ptr(docref: &DocumentRef) -> Result { 60 | let ctxtptr = unsafe { xmlXPathNewContext(docref.borrow().doc_ptr) }; 61 | if ctxtptr.is_null() { 62 | Err(()) 63 | } else { 64 | Ok(Context { 65 | context_ptr: Rc::new(RefCell::new(_Context(ctxtptr))), 66 | document: Rc::downgrade(docref), 67 | }) 68 | } 69 | } 70 | 71 | /// Returns the raw libxml2 context pointer behind the struct 72 | pub fn as_ptr(&self) -> xmlXPathContextPtr { 73 | self.context_ptr.borrow().0 74 | } 75 | 76 | /// Instantiate a new Context for the Document of a given Node. 77 | /// Note: the Context is root-level for that document, use `.set_context_node` to limit scope to this node 78 | pub fn from_node(node: &Node) -> Result { 79 | let docref = node.get_docref().upgrade().unwrap(); 80 | Context::new_ptr(&docref) 81 | } 82 | 83 | /// Register a namespace prefix-href pair on the xpath context 84 | pub fn register_namespace(&self, prefix: &str, href: &str) -> Result<(), ()> { 85 | let c_prefix = CString::new(prefix).unwrap(); 86 | let c_href = CString::new(href).unwrap(); 87 | unsafe { 88 | let result = xmlXPathRegisterNs( 89 | self.as_ptr(), 90 | c_prefix.as_bytes().as_ptr(), 91 | c_href.as_bytes().as_ptr(), 92 | ); 93 | if result != 0 { 94 | Err(()) 95 | } else { 96 | Ok(()) 97 | } 98 | } 99 | } 100 | 101 | ///evaluate an xpath 102 | pub fn evaluate(&self, xpath: &str) -> Result { 103 | let c_xpath = CString::new(xpath).unwrap(); 104 | let ptr = unsafe { xmlXPathEvalExpression(c_xpath.as_bytes().as_ptr(), self.as_ptr()) }; 105 | if ptr.is_null() { 106 | Err(()) 107 | } else { 108 | Ok(Object { 109 | ptr, 110 | document: self.document.clone(), 111 | }) 112 | } 113 | } 114 | 115 | ///evaluate an xpath on a context Node 116 | pub fn node_evaluate(&self, xpath: &str, node: &Node) -> Result { 117 | let c_xpath = CString::new(xpath).unwrap(); 118 | let ptr = 119 | unsafe { xmlXPathNodeEval(node.node_ptr(), c_xpath.as_bytes().as_ptr(), self.as_ptr()) }; 120 | if ptr.is_null() { 121 | Err(()) 122 | } else { 123 | Ok(Object { 124 | ptr, 125 | document: self.document.clone(), 126 | }) 127 | } 128 | } 129 | 130 | ///evaluate an xpath on a context RoNode 131 | pub fn node_evaluate_readonly(&self, xpath: &str, node: RoNode) -> Result { 132 | let c_xpath = CString::new(xpath).unwrap(); 133 | let ptr = unsafe { xmlXPathNodeEval(node.0, c_xpath.as_bytes().as_ptr(), self.as_ptr()) }; 134 | if ptr.is_null() { 135 | Err(()) 136 | } else { 137 | Ok(Object { 138 | ptr, 139 | document: self.document.clone(), 140 | }) 141 | } 142 | } 143 | 144 | /// localize xpath context to a specific Node 145 | pub fn set_context_node(&mut self, node: &Node) -> Result<(), ()> { 146 | unsafe { 147 | let result = xmlXPathSetContextNode(node.node_ptr(), self.as_ptr()); 148 | if result != 0 { 149 | return Err(()); 150 | } 151 | } 152 | Ok(()) 153 | } 154 | 155 | /// find nodes via xpath, at a specified node or the document root 156 | pub fn findnodes(&mut self, xpath: &str, node_opt: Option<&Node>) -> Result, ()> { 157 | let evaluated = if let Some(node) = node_opt { 158 | self.node_evaluate(xpath, node)? 159 | } else { 160 | self.evaluate(xpath)? 161 | }; 162 | Ok(evaluated.get_nodes_as_vec()) 163 | } 164 | 165 | /// find literal values via xpath, at a specified node or the document root 166 | pub fn findvalues(&mut self, xpath: &str, node_opt: Option<&Node>) -> Result, ()> { 167 | let evaluated = if let Some(node) = node_opt { 168 | self.node_evaluate(xpath, node)? 169 | } else { 170 | self.evaluate(xpath)? 171 | }; 172 | Ok(evaluated.get_nodes_as_str()) 173 | } 174 | 175 | /// find a literal value via xpath, at a specified node or the document root 176 | pub fn findvalue(&mut self, xpath: &str, node_opt: Option<&Node>) -> Result { 177 | let evaluated = if let Some(node) = node_opt { 178 | self.node_evaluate(xpath, node)? 179 | } else { 180 | self.evaluate(xpath)? 181 | }; 182 | Ok(evaluated.to_string()) 183 | } 184 | } 185 | 186 | impl Drop for Object { 187 | /// free the memory allocated 188 | fn drop(&mut self) { 189 | unsafe { 190 | xmlXPathFreeObject(self.ptr); 191 | } 192 | } 193 | } 194 | 195 | impl Object { 196 | ///get the number of nodes in the result set 197 | pub fn get_number_of_nodes(&self) -> usize { 198 | let v = xmlXPathObjectNumberOfNodes(self.ptr); 199 | if v == -1 { 200 | panic!("rust-libxml: xpath: Passed in null pointer!"); 201 | } 202 | if v == -2 { 203 | // No nodes found! 204 | return 0; 205 | } 206 | if v < -2 { 207 | panic!("rust-libxml: xpath: expected non-negative number of result nodes"); 208 | } 209 | v as usize 210 | } 211 | 212 | /// returns the result set as a vector of `Node` objects 213 | pub fn get_nodes_as_vec(&self) -> Vec { 214 | let n = self.get_number_of_nodes(); 215 | let mut vec: Vec = Vec::with_capacity(n); 216 | let slice = if n > 0 { 217 | xmlXPathObjectGetNodes(self.ptr, n as size_t) 218 | } else { 219 | Vec::new() 220 | }; 221 | for ptr in slice { 222 | if ptr.is_null() { 223 | panic!("rust-libxml: xpath: found null pointer result set"); 224 | } 225 | let node = Node::wrap(ptr, &self.document.upgrade().unwrap()); 226 | vec.push(node); 227 | } 228 | vec 229 | } 230 | 231 | /// returns the result set as a vector of `RoNode` objects 232 | pub fn get_readonly_nodes_as_vec(&self) -> Vec { 233 | let n = self.get_number_of_nodes(); 234 | let mut vec: Vec = Vec::with_capacity(n); 235 | let slice = if n > 0 { 236 | xmlXPathObjectGetNodes(self.ptr, n as size_t) 237 | } else { 238 | Vec::new() 239 | }; 240 | for ptr in slice { 241 | if ptr.is_null() { 242 | panic!("rust-libxml: xpath: found null pointer result set"); 243 | } 244 | vec.push(RoNode(ptr)); 245 | } 246 | vec 247 | } 248 | 249 | /// returns the result set as a vector of Strings 250 | pub fn get_nodes_as_str(&self) -> Vec { 251 | let n = self.get_number_of_nodes(); 252 | let mut vec: Vec = Vec::with_capacity(n); 253 | let slice = if n > 0 { 254 | xmlXPathObjectGetNodes(self.ptr, n as size_t) 255 | } else { 256 | Vec::new() 257 | }; 258 | for ptr in slice { 259 | if ptr.is_null() { 260 | panic!("rust-libxml: xpath: found null pointer result set"); 261 | } 262 | let value_ptr = unsafe { xmlXPathCastNodeToString(ptr) }; 263 | let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; 264 | let ready_str = c_value_string.to_string_lossy().into_owned(); 265 | bindgenFree(value_ptr as *mut c_void); 266 | vec.push(ready_str); 267 | } 268 | vec 269 | } 270 | 271 | } 272 | 273 | impl fmt::Display for Object { 274 | /// use if the XPath used was meant to return a string, such as string(//foo/@attr) 275 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 276 | unsafe { 277 | let receiver = xmlXPathCastToString(self.ptr); 278 | let c_string = CStr::from_ptr(receiver as *const c_char); 279 | let rust_string = str::from_utf8(c_string.to_bytes()).unwrap().to_owned(); 280 | bindgenFree(receiver as *mut c_void); 281 | write!(f, "{rust_string}") 282 | } 283 | } 284 | } 285 | 286 | /// Calls the binding to http://xmlsoft.org/html/libxml-xpath.html#xmlXPathCompile and return true if 287 | /// a non-null pointer is returned. The idea is to use this to validate an xpath independent of context. 288 | /// Tests describing what this validates in tests/xpath_tests.rs 289 | pub fn is_well_formed_xpath(xpath: &str) -> bool { 290 | let c_xpath = CString::new(xpath).unwrap(); 291 | let xml_xpath_comp_expr_ptr = unsafe { xmlXPathCompile(c_xpath.as_bytes().as_ptr()) }; 292 | if xml_xpath_comp_expr_ptr.is_null() { 293 | false 294 | } else { 295 | bindgenFree(xml_xpath_comp_expr_ptr as *mut c_void); 296 | true 297 | } 298 | } 299 | -------------------------------------------------------------------------------- /tests/VALGRIND.md: -------------------------------------------------------------------------------- 1 | It is often good practice, especially when venturing on large API refactors, to double-check for any newly created memory leaks. 2 | 3 | Some leaks can only be spotted in external projects that show advance use cases of `rust-libxml`, for example allocating a `Node` in a default trait of a struct with a `Node` field. For now the only safe approach to that pattern is using the `Node::null()` placeholder, but the Rust idiomatic approach is to instead refactor to an `Option` field. 4 | 5 | Some, more direct, leak scenarios can already be spotted from the libxml test suite, and one can use valgrind to obtain a report via a call of the form: 6 | 7 | ``` 8 | valgrind --leak-check=full target/debug/base_tests-3d29e5da1f969267 9 | ``` 10 | 11 | Additionally, as Rust nightlies keep evolving, a specific allocation system may be necessary to properly run valgrind. At the time of writing, `rust-libxml` tests need no such changes, but some external projects do. For convenience, here is a known working preamble, which can be added to the preambles of executable files, including example and test files. 12 | 13 | ```rust 14 | #![feature(alloc_system, allocator_api)] 15 | extern crate alloc_system; 16 | use alloc_system::System; 17 | 18 | #[global_allocator] 19 | static A: System = System; 20 | ``` 21 | 22 | For more discussion motivating this explanation, see the respective [GitHub pull request](https://github.com/KWARC/rust-libxml/pull/43). -------------------------------------------------------------------------------- /tests/base_tests.rs: -------------------------------------------------------------------------------- 1 | //! Base API tests, to be split into distinct sub-suites later on 2 | //! 3 | use std::env; 4 | use std::fs::File; 5 | use std::io::Read; 6 | 7 | use libxml::parser::{Parser, ParserOptions}; 8 | use libxml::tree::{Document, Node, SaveOptions}; 9 | 10 | #[test] 11 | /// Build a hello world XML doc 12 | fn hello_builder() { 13 | let doc_result = Document::new(); 14 | assert!(doc_result.is_ok()); 15 | let mut doc = doc_result.unwrap(); 16 | 17 | // This tests for functionality (return self if there is no root element) that is removed. 18 | let doc_node = doc.get_root_element(); 19 | assert_eq!(doc_node, None, "empty document has no root element"); 20 | 21 | let hello_element_result = Node::new("hello", None, &doc); 22 | assert!(hello_element_result.is_ok()); 23 | let mut hello_element = hello_element_result.unwrap(); 24 | 25 | assert!(hello_element.set_content("world!").is_ok()); 26 | 27 | doc.set_root_element(&hello_element); 28 | 29 | assert!(hello_element.set_content("world!").is_ok()); 30 | 31 | let added = hello_element.new_child(None, "child"); 32 | assert!(added.is_ok()); 33 | let mut new_child = added.unwrap(); 34 | 35 | assert!(new_child.set_content("set content").is_ok()); 36 | 37 | assert_eq!(new_child.get_content(), "set content"); 38 | assert_eq!(hello_element.get_content(), "world!set content"); 39 | 40 | let node_string = doc.node_to_string(&hello_element); 41 | assert!(node_string.len() > 1); 42 | 43 | assert!(hello_element.set_name("world").is_ok()); 44 | assert_eq!(hello_element.get_name(), "world"); 45 | 46 | let doc_string = doc.to_string(); 47 | assert!(doc_string.len() > 1); 48 | let output_path = env::temp_dir().join("rust_libxml_tests_helloworld.xml"); 49 | assert!(doc.save_file(&output_path.display().to_string()).is_ok()); 50 | } 51 | 52 | #[test] 53 | fn create_pi() { 54 | let doc_result = Document::new(); 55 | assert!(doc_result.is_ok()); 56 | let mut doc = doc_result.unwrap(); 57 | // Add a PI 58 | let node_ok: Result = doc.create_processing_instruction("piname", "picontent"); 59 | assert!(node_ok.is_ok()); 60 | assert_eq!(node_ok.unwrap().get_content(), "picontent"); 61 | let doc_string = doc.to_string(); 62 | assert!(doc_string.len() > 1); 63 | } 64 | 65 | #[test] 66 | /// Duplicate an xml file 67 | fn duplicate_file() { 68 | let parser = Parser::default(); 69 | { 70 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 71 | assert!(doc_result.is_ok()); 72 | 73 | let doc = doc_result.unwrap(); 74 | let output_path = env::temp_dir().join("rust_libxml_tests_copy.xml"); 75 | doc.save_file(&output_path.display().to_string()).unwrap(); 76 | } 77 | } 78 | 79 | #[test] 80 | // Can parse an xml string in memory 81 | fn can_parse_xml_string() { 82 | let mut file = File::open("tests/resources/file01.xml").unwrap(); 83 | let mut xml_string = String::new(); 84 | file.read_to_string(&mut xml_string).unwrap(); 85 | let parser = Parser::default(); 86 | let doc = parser.parse_string(&xml_string).unwrap(); 87 | assert_eq!(doc.get_root_element().unwrap().get_name(), "root"); 88 | } 89 | 90 | #[test] 91 | /// Can load an HTML file 92 | fn can_load_html_file() { 93 | let parser = Parser::default_html(); 94 | { 95 | let doc_result = parser.parse_file("tests/resources/example.html"); 96 | assert!(doc_result.is_ok()); 97 | 98 | let doc = doc_result.unwrap(); 99 | let root = doc.get_root_element().unwrap(); 100 | assert_eq!(root.get_name(), "html"); 101 | } 102 | } 103 | 104 | fn create_test_document(file: Option<&str>) -> Document { 105 | let parser = Parser::default(); 106 | let doc_result = parser.parse_file(file.unwrap_or("tests/resources/file01.xml")); 107 | assert!(doc_result.is_ok()); 108 | doc_result.unwrap() 109 | } 110 | 111 | #[test] 112 | fn document_can_import_node() { 113 | let doc1 = create_test_document(None); 114 | let mut doc2 = create_test_document(None); 115 | 116 | assert_eq!( 117 | doc2.get_root_element().unwrap().get_child_elements().len(), 118 | 2 119 | ); 120 | 121 | let mut elements = doc1.get_root_element().unwrap().get_child_elements(); 122 | let mut node = elements.pop().unwrap(); 123 | node.unlink(); 124 | let mut imported = doc2.import_node(&mut node).unwrap(); 125 | assert!(doc2 126 | .get_root_element() 127 | .unwrap() 128 | .add_child(&mut imported) 129 | .is_ok()); 130 | 131 | assert_eq!( 132 | doc2.get_root_element().unwrap().get_child_elements().len(), 133 | 3 134 | ); 135 | } 136 | 137 | #[test] 138 | fn document_formatted_serialization() { 139 | let doc = create_test_document(Some("tests/resources/unformatted.xml")); 140 | let doc_str = doc.to_string(); 141 | // don't insist too hard on the length, cross-platform differences may have a minor influence 142 | assert!(doc_str.len() > 370); 143 | let doc_str_formatted = doc.to_string_with_options(SaveOptions { 144 | format: true, 145 | ..SaveOptions::default() 146 | }); 147 | assert!(doc_str_formatted.len() > 460); 148 | // basic assertion - a formatted document is longer than an unformatted one 149 | assert!(doc_str_formatted.len() > doc_str.len()); 150 | } 151 | 152 | #[test] 153 | /// Test well-formedness of a Rust string 154 | /// IMPORTANT: Currenlty NOT THREAD-SAFE, use in single-threaded apps only! 155 | fn well_formed_html() { 156 | let parser = Parser::default_html(); 157 | 158 | let trivial_well_formed = 159 | parser.is_well_formed_html("\n"); 160 | assert!(trivial_well_formed); 161 | 162 | let trivial_ill_formed = parser.is_well_formed_html("garbage"); 163 | assert!(!trivial_ill_formed); 164 | 165 | let should_ill_formed = parser.is_well_formed_html("> "); 166 | assert!(!should_ill_formed); 167 | 168 | let should_well_formed = parser.is_well_formed_html("\nTest\n

Tiny

2"); 169 | assert!(should_well_formed); 170 | } 171 | 172 | #[test] 173 | /// Parse & serialize HTML fragment 174 | fn html_fragment() { 175 | let fragment = r#"
Compression results on incompressible data.

Compression results on incompressible data.

"#; 176 | 177 | let parser = Parser::default_html(); 178 | let document = parser 179 | .parse_string_with_options( 180 | fragment, 181 | ParserOptions { 182 | no_def_dtd: true, 183 | no_implied: true, 184 | ..Default::default() 185 | }, 186 | ) 187 | .unwrap(); 188 | 189 | let mut serialized_fragment = document.to_string_with_options(SaveOptions { 190 | no_empty_tags: true, 191 | as_html: true, 192 | ..Default::default() 193 | }); 194 | let _added_newline = serialized_fragment.pop(); // remove added '\n' 195 | 196 | assert_eq!(fragment, serialized_fragment); 197 | } 198 | 199 | fn serialization_roundtrip(file_name: &str) { 200 | let file_result = std::fs::read_to_string(file_name); 201 | assert!(file_result.is_ok()); 202 | let xml_file = file_result.unwrap(); 203 | 204 | let parser = Parser::default(); 205 | let parse_result = parser.parse_string(xml_file.as_bytes()); 206 | assert!(parse_result.is_ok()); 207 | let doc = parse_result.unwrap(); 208 | 209 | let doc_str = doc.to_string(); 210 | 211 | assert_eq!(strip_whitespace(&xml_file), strip_whitespace(&doc_str)); 212 | } 213 | 214 | fn strip_whitespace(string: &str) -> String { 215 | string.replace("\r","") 216 | .replace("\n", "") 217 | .replace(" ", "") 218 | } 219 | 220 | #[test] 221 | fn simple_serialization_test01() { 222 | serialization_roundtrip("tests/resources/file01.xml"); 223 | } 224 | 225 | #[test] 226 | fn simple_serialization_unformatted() { 227 | serialization_roundtrip("tests/resources/unformatted.xml"); 228 | } 229 | 230 | #[test] 231 | fn simple_serialization_namespaces() { 232 | serialization_roundtrip("tests/resources/simple_namespaces.xml"); 233 | } 234 | 235 | #[test] 236 | fn serialization_no_empty() { 237 | let source_result = std::fs::read_to_string("tests/resources/empty_tags.xml"); 238 | assert!(source_result.is_ok()); 239 | let source_file = source_result.unwrap(); 240 | 241 | let result = std::fs::read_to_string("tests/resources/empty_tags_result.xml"); 242 | assert!(result.is_ok()); 243 | let result_file = result.unwrap(); 244 | 245 | let options = SaveOptions { 246 | no_empty_tags: true, 247 | ..SaveOptions::default() 248 | }; 249 | 250 | let parser = Parser::default(); 251 | let parse_result = parser.parse_string(source_file.as_bytes()); 252 | assert!(parse_result.is_ok()); 253 | let doc = parse_result.unwrap(); 254 | 255 | let doc_str = doc.to_string_with_options(options); 256 | 257 | assert_eq!(strip_whitespace(&result_file), strip_whitespace(&doc_str)); 258 | } 259 | 260 | #[test] 261 | fn serialization_as_html() { 262 | let source_result = std::fs::read_to_string("tests/resources/as_html.xml"); 263 | assert!(source_result.is_ok()); 264 | let source_file = source_result.unwrap(); 265 | 266 | let result = std::fs::read_to_string("tests/resources/as_html_result.xml"); 267 | assert!(result.is_ok()); 268 | let result_file = result.unwrap(); 269 | 270 | let options = SaveOptions { 271 | as_html: true, 272 | ..SaveOptions::default() 273 | }; 274 | 275 | let parser = Parser::default(); 276 | let parse_result = parser.parse_string(source_file.as_bytes()); 277 | assert!(parse_result.is_ok()); 278 | let doc = parse_result.unwrap(); 279 | 280 | let doc_str = doc.to_string_with_options(options); 281 | 282 | assert_eq!(strip_whitespace(&result_file), strip_whitespace(&doc_str)); 283 | } 284 | -------------------------------------------------------------------------------- /tests/codec_tests.rs: -------------------------------------------------------------------------------- 1 | //! BOM parsing tests 2 | //! 3 | use libxml::parser::{Parser, XmlParseError}; 4 | use libxml::tree::Document; 5 | use std::fs; 6 | use std::io; 7 | use std::io::prelude::*; 8 | 9 | // HELPERS 10 | 11 | ///Read the entire file to a byte vector. Similar to read_to_string with 12 | ///no encoding assumption. 13 | fn read_to_end(path: &str) -> io::Result> { 14 | let mut buffer = Vec::new(); 15 | let mut file = fs::File::open(path)?; 16 | file.read_to_end(&mut buffer)?; 17 | Ok(buffer) 18 | } 19 | 20 | ///Generate a unittest for a document result from parsing a variant of file01. 21 | fn file01_test(doc_result: Result) { 22 | assert!(doc_result.is_ok()); 23 | let doc = doc_result.unwrap(); 24 | let root = doc.get_root_element().unwrap(); 25 | 26 | // Tests 27 | let root_children = root.get_child_nodes(); 28 | assert_eq!(root_children.len(), 5, "file01 root has five child nodes"); 29 | let mut element_children = root.get_child_elements(); 30 | assert_eq!( 31 | element_children.len(), 32 | 2, 33 | "file01 root has two child elements" 34 | ); 35 | assert_eq!(element_children.pop().unwrap().get_name(), "child"); 36 | assert_eq!(element_children.pop().unwrap().get_name(), "child"); 37 | assert!(element_children.is_empty()); 38 | } 39 | 40 | ///Run a test for both the file and the path of file01. 41 | fn run_test(path: &str) { 42 | let parser = Parser::default(); 43 | file01_test(parser.parse_file(path)); 44 | 45 | let input = read_to_end(path).unwrap(); 46 | file01_test(parser.parse_string(&input)); 47 | } 48 | 49 | // ENCODINGS 50 | 51 | #[test] 52 | fn utf8_test() { 53 | run_test("tests/resources/file01.xml"); 54 | } 55 | 56 | #[test] 57 | fn utf16le_test() { 58 | run_test("tests/resources/file01_utf16le.xml"); 59 | } 60 | 61 | #[test] 62 | fn utf16be_test() { 63 | run_test("tests/resources/file01_utf16be.xml"); 64 | } 65 | 66 | // BOM 67 | 68 | #[test] 69 | fn utf8_bom_test() { 70 | run_test("tests/resources/file01_utf8_bom.xml"); 71 | } 72 | 73 | #[test] 74 | fn utf16le_bom_test() { 75 | run_test("tests/resources/file01_utf16le_bom.xml"); 76 | } 77 | 78 | #[test] 79 | fn utf16be_bom_test() { 80 | run_test("tests/resources/file01_utf16be_bom.xml"); 81 | } 82 | 83 | // UNICODE PATHS 84 | 85 | #[test] 86 | fn nonbmp_path_test() { 87 | run_test("tests/resources/file01_🔥🔥🔥.xml"); 88 | } 89 | -------------------------------------------------------------------------------- /tests/mutability_guards.rs: -------------------------------------------------------------------------------- 1 | //! Enforce Rust ownership pragmatics for the underlying libxml2 objects 2 | 3 | use libxml::parser::Parser; 4 | use libxml::tree::set_node_rc_guard; 5 | 6 | #[test] 7 | fn ownership_guards() { 8 | // Setup 9 | let parser = Parser::default(); 10 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 11 | assert!(doc_result.is_ok()); 12 | let doc = doc_result.unwrap(); 13 | let root = doc.get_root_element().unwrap(); 14 | 15 | let mut first_a = root.get_first_element_child().unwrap(); 16 | let first_b = root.get_first_element_child().unwrap(); 17 | 18 | assert_eq!( 19 | first_a.get_attribute("attribute"), 20 | Some(String::from("value")) 21 | ); 22 | assert_eq!( 23 | first_b.get_attribute("attribute"), 24 | Some(String::from("value")) 25 | ); 26 | 27 | // Setting an attribute will fail and return an error, as there are too many Rc references 28 | // to the same node (Rc strong count of 3) 29 | // see `Node::node_ptr_mut` for details 30 | assert!(first_a.set_attribute("attribute", "newa").is_err()); 31 | 32 | assert_eq!( 33 | first_a.get_attribute("attribute"), 34 | Some(String::from("value")) 35 | ); 36 | assert_eq!( 37 | first_b.get_attribute("attribute"), 38 | Some(String::from("value")) 39 | ); 40 | 41 | // Try again with guard boosted, which allows the change 42 | set_node_rc_guard(3); 43 | 44 | // Setting an attribute will fail and return an error, as there are too many Rc references 45 | // to the same node (Rc strong count of 3) 46 | // see `Node::node_ptr_mut` for details 47 | assert!(first_a.set_attribute("attribute", "newa").is_ok()); 48 | 49 | assert_eq!( 50 | first_a.get_attribute("attribute"), 51 | Some(String::from("newa")) 52 | ); 53 | assert_eq!( 54 | first_b.get_attribute("attribute"), 55 | Some(String::from("newa")) 56 | ); 57 | } 58 | -------------------------------------------------------------------------------- /tests/readonly_tests.rs: -------------------------------------------------------------------------------- 1 | //! Tree module tests 2 | //! 3 | use libxml::parser::Parser; 4 | use libxml::readonly::RoNode; 5 | use libxml::tree::NodeType; 6 | 7 | fn dfs_node(node: RoNode) -> i32 { 8 | 1 + node 9 | .get_child_nodes() 10 | .into_iter() 11 | .map(dfs_node) 12 | .sum::() 13 | } 14 | 15 | fn dfs_element(node: RoNode) -> i32 { 16 | 1 + node 17 | .get_child_elements() 18 | .into_iter() 19 | .map(dfs_element) 20 | .sum::() 21 | } 22 | 23 | #[test] 24 | fn readonly_scan_test() { 25 | let parser = Parser::default_html(); 26 | let doc_result = parser.parse_file("tests/resources/example.html"); 27 | assert!(doc_result.is_ok()); 28 | let doc = doc_result.unwrap(); 29 | 30 | let root: RoNode = doc.get_root_readonly().unwrap(); 31 | assert_eq!(root.get_name(), "html"); 32 | // "get_child_nodes" exhaustivity test, 33 | // 33 nodes, including text, comments, etc 34 | assert_eq!(dfs_node(root), 33); 35 | // "get_element_nodes" exhaustivity test, 36 | // 13 named element nodes in example.html 37 | assert_eq!(dfs_element(root), 13); 38 | 39 | let text: RoNode = root.get_first_child().expect("first child is a text node"); 40 | assert_eq!(text.get_name(), "text"); 41 | 42 | let head: RoNode = root 43 | .get_first_element_child() 44 | .expect("head is first child of html"); 45 | assert_eq!(head.get_name(), "head"); 46 | 47 | let mut sibling: RoNode = head 48 | .get_next_sibling() 49 | .expect("head should be followed by text"); 50 | assert_eq!(sibling.get_name(), "text"); 51 | while let Some(next) = sibling.get_next_sibling() { 52 | sibling = next; 53 | if next.get_type() == Some(NodeType::ElementNode) { 54 | break; 55 | } 56 | } 57 | assert_eq!(sibling.get_type(), Some(NodeType::ElementNode)); 58 | assert_eq!(sibling.get_name(), "body"); 59 | } 60 | -------------------------------------------------------------------------------- /tests/resources/as_html.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Page Title 4 | 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/resources/as_html_result.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Page Title 4 | 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/resources/empty_tags.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /tests/resources/empty_tags_result.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /tests/resources/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Example Domain 5 | 6 | 7 | 8 | 9 | 40 | 41 | 42 | 43 |
44 |

Example Domain

45 |

This domain is established to be used for illustrative examples in documents. You may use this 46 | domain in examples without prior coordination or asking for permission.

47 |

More information...

48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /tests/resources/file01.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | some text 4 | more text 5 | 6 | -------------------------------------------------------------------------------- /tests/resources/file01_ns.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | some text 9 | 10 | more text 11 | 12 | -------------------------------------------------------------------------------- /tests/resources/file01_utf16be.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-16BE"?> 2 | <root> 3 | <child attribute="value">some text</child> 4 | <child attribute="empty">more text</child> 5 | </root> 6 | -------------------------------------------------------------------------------- /tests/resources/file01_utf16be_bom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KWARC/rust-libxml/13fbf1ce585fcd4d2fe975d6377edc6a825644c8/tests/resources/file01_utf16be_bom.xml -------------------------------------------------------------------------------- /tests/resources/file01_utf16le.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-16LE"?> 2 | <root> 3 | <child attribute="value">some text</child> 4 | <child attribute="empty">more text</child> 5 | </root> 6 | -------------------------------------------------------------------------------- /tests/resources/file01_utf16le_bom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KWARC/rust-libxml/13fbf1ce585fcd4d2fe975d6377edc6a825644c8/tests/resources/file01_utf16le_bom.xml -------------------------------------------------------------------------------- /tests/resources/file01_utf8_bom.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | some text 4 | more text 5 | 6 | -------------------------------------------------------------------------------- /tests/resources/file01_🔥🔥🔥.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | some text 4 | more text 5 | 6 | -------------------------------------------------------------------------------- /tests/resources/file02.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

Something

5 | 6 | 7 | -------------------------------------------------------------------------------- /tests/resources/ids.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |

Hello

8 | 9 |

10 |
11 | 12 | 13 |

World!

14 |
15 |
16 |
17 |
-------------------------------------------------------------------------------- /tests/resources/schema.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Tove 5 | Jani 6 | Reminder 7 | Don't forget me this weekend! 8 | 9 | -------------------------------------------------------------------------------- /tests/resources/schema.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/resources/simple_namespaces.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | col 1 8 | col 2 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | col 3 20 | nested f 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /tests/resources/unformatted.xml: -------------------------------------------------------------------------------- 1 | col 1col 2col 3 nested f -------------------------------------------------------------------------------- /tests/results/README.md: -------------------------------------------------------------------------------- 1 | # Test results 2 | This directory will contain the result files of the tests. 3 | -------------------------------------------------------------------------------- /tests/schema_tests.rs: -------------------------------------------------------------------------------- 1 | //! 2 | //! Test Schema Loading, XML Validating 3 | //! 4 | use libxml::schemas::SchemaParserContext; 5 | use libxml::schemas::SchemaValidationContext; 6 | 7 | use libxml::parser::Parser; 8 | 9 | static NOTE_SCHEMA: &str = r#" 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | "#; 23 | 24 | static STOCK_SCHEMA: &str = r#" 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | "#; 44 | 45 | static VALID_NOTE_XML: &str = r#" 46 | 47 | Tove 48 | Jani 49 | Reminder 50 | Don't forget me this weekend! 51 | 52 | "#; 53 | 54 | static INVALID_NOTE_XML: &str = r#" 55 | 56 | Tove 57 | Jani 58 | Reminder 59 | Don't forget me this weekend! 60 | 61 | "#; 62 | 63 | static INVALID_STOCK_XML: &str = r#" 64 | 65 | 66 | 2014-01-01 67 | NOT A NUMBER 68 | 69 | 70 | 2014-01-02 71 | 540.98 72 | 73 | 74 | NOT A DATE 75 | 543.93 76 | 77 | =2.12, at least not as currently implemented. 83 | // while it still reliably succeeds single-threaded, new implementation is needed to use 84 | // these in a parallel setting. 85 | #[test] 86 | fn schema_from_string() { 87 | let xml = Parser::default() 88 | .parse_string(VALID_NOTE_XML) 89 | .expect("Expected to be able to parse XML Document from string"); 90 | 91 | let mut xsdparser = SchemaParserContext::from_buffer(NOTE_SCHEMA); 92 | let xsd = SchemaValidationContext::from_parser(&mut xsdparser); 93 | 94 | if let Err(errors) = xsd { 95 | for err in &errors { 96 | eprintln!("{}", err.message.as_ref().unwrap()); 97 | } 98 | panic!("Failed to parse schema with {} errors", errors.len()); 99 | } 100 | 101 | let mut xsdvalidator = xsd.unwrap(); 102 | 103 | // loop over more than one validation to test for leaks in the error handling callback interactions 104 | for _ in 0..5 { 105 | if let Err(errors) = xsdvalidator.validate_document(&xml) { 106 | for err in &errors { 107 | eprintln!("{}", err.message.as_ref().unwrap()); 108 | } 109 | 110 | panic!("Invalid XML accoding to XSD schema"); 111 | } 112 | } 113 | } 114 | 115 | #[test] 116 | fn schema_from_string_generates_errors() { 117 | let xml = Parser::default() 118 | .parse_string(INVALID_NOTE_XML) 119 | .expect("Expected to be able to parse XML Document from string"); 120 | 121 | let mut xsdparser = SchemaParserContext::from_buffer(NOTE_SCHEMA); 122 | let xsd = SchemaValidationContext::from_parser(&mut xsdparser); 123 | 124 | if let Err(errors) = xsd { 125 | for err in &errors { 126 | eprintln!("{}", err.message.as_ref().unwrap()); 127 | } 128 | panic!("Failed to parse schema with {} errors", errors.len()); 129 | } 130 | 131 | let mut xsdvalidator = xsd.unwrap(); 132 | for _ in 0..5 { 133 | if let Err(errors) = xsdvalidator.validate_document(&xml) { 134 | for err in &errors { 135 | assert_eq!( 136 | "Element 'bad': This element is not expected. Expected is ( to ).\n", 137 | err.message.as_ref().unwrap() 138 | ); 139 | } 140 | } 141 | } 142 | } 143 | 144 | #[test] 145 | fn schema_from_string_reports_unique_errors() { 146 | let xml = Parser::default() 147 | .parse_string(INVALID_STOCK_XML) 148 | .expect("Expected to be able to parse XML Document from string"); 149 | 150 | let mut xsdparser = SchemaParserContext::from_buffer(STOCK_SCHEMA); 151 | let xsd = SchemaValidationContext::from_parser(&mut xsdparser); 152 | 153 | if let Err(errors) = xsd { 154 | for err in &errors { 155 | eprintln!("{}", err.message.as_ref().unwrap()); 156 | } 157 | 158 | panic!("Failed to parse schema with {} errors", errors.len()); 159 | } 160 | 161 | let mut xsdvalidator = xsd.unwrap(); 162 | for _ in 0..5 { 163 | if let Err(errors) = xsdvalidator.validate_document(&xml) { 164 | assert_eq!(errors.len(), 5); 165 | let expected_errors = vec![ 166 | "Element 'stock', attribute 'junkAttribute': The attribute 'junkAttribute' is not allowed.\n", 167 | "Element 'stock': The attribute 'ticker' is required but missing.\n", 168 | "Element 'stock': The attribute 'exchange' is required but missing.\n", 169 | "Element 'price': 'NOT A NUMBER' is not a valid value of the atomic type 'xs:float'.\n", 170 | "Element 'date': 'NOT A DATE' is not a valid value of the atomic type 'xs:date'.\n" 171 | ]; 172 | for err_msg in expected_errors { 173 | assert!(errors.iter().any(|err| err.message.as_ref().unwrap() == err_msg), "Expected error message {} was not found", err_msg); 174 | } 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /tests/tree_tests.rs: -------------------------------------------------------------------------------- 1 | //! Tree module tests 2 | //! 3 | 4 | use libxml::parser::Parser; 5 | use libxml::tree::{Document, Namespace, Node, NodeType}; 6 | 7 | #[test] 8 | /// Root node and first child of root node are different 9 | /// (There is a tiny chance this might fail for a correct program) 10 | fn child_of_root_has_different_hash() { 11 | let parser = Parser::default(); 12 | { 13 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 14 | assert!(doc_result.is_ok()); 15 | let doc = doc_result.unwrap(); 16 | let root = doc.get_root_element().unwrap(); 17 | assert!(!root.is_text_node()); 18 | if let Some(child) = root.get_first_child() { 19 | assert!(root != child); 20 | } else { 21 | assert!(false); //test failed - child doesn't exist 22 | } 23 | // same check with last child 24 | if let Some(child) = root.get_last_child() { 25 | assert!(root != child); 26 | } else { 27 | assert!(false); //test failed - child doesn't exist 28 | } 29 | } 30 | } 31 | 32 | #[test] 33 | /// Siblings basic unit tests 34 | fn node_sibling_accessors() { 35 | let mut doc = Document::new().unwrap(); 36 | let hello_element_result = Node::new("hello", None, &doc); 37 | assert!(hello_element_result.is_ok()); 38 | let mut hello_element = hello_element_result.unwrap(); 39 | doc.set_root_element(&hello_element); 40 | 41 | let mut new_sibling = Node::new("sibling", None, &doc).unwrap(); 42 | assert!(hello_element.add_prev_sibling(&mut new_sibling).is_ok()); 43 | } 44 | 45 | #[test] 46 | fn node_children_accessors() { 47 | // Setup 48 | let parser = Parser::default(); 49 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 50 | assert!(doc_result.is_ok()); 51 | let doc = doc_result.unwrap(); 52 | let root = doc.get_root_element().unwrap(); 53 | 54 | // Tests 55 | let root_children = root.get_child_nodes(); 56 | assert_eq!(root_children.len(), 5, "file01 root has five child nodes"); 57 | let mut element_children = root.get_child_elements(); 58 | assert_eq!( 59 | element_children.len(), 60 | 2, 61 | "file01 root has two child elements" 62 | ); 63 | assert_eq!(element_children.pop().unwrap().get_name(), "child"); 64 | assert_eq!(element_children.pop().unwrap().get_name(), "child"); 65 | assert!(element_children.is_empty()); 66 | } 67 | 68 | #[test] 69 | fn node_attributes_accessor() { 70 | // Setup 71 | let parser = Parser::default(); 72 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 73 | assert!(doc_result.is_ok()); 74 | let doc = doc_result.unwrap(); 75 | let root = doc.get_root_element().unwrap(); 76 | let mut root_elements = root.get_child_elements(); 77 | let child_opt = root_elements.first_mut(); 78 | assert!(child_opt.is_some()); 79 | let child = child_opt.unwrap(); 80 | 81 | // All attributes 82 | let attributes = child.get_attributes(); 83 | assert_eq!(attributes.len(), 1); 84 | assert_eq!(attributes.get("attribute"), Some(&"value".to_string())); 85 | 86 | // Has 87 | assert_eq!(child.has_attribute("attribute"), true); 88 | // Get 89 | assert_eq!(child.get_attribute("attribute"), Some("value".to_string())); 90 | // Get as node 91 | let attr_node_opt = child.get_attribute_node("attribute"); 92 | assert!(attr_node_opt.is_some()); 93 | let attr_node = attr_node_opt.unwrap(); 94 | assert_eq!(attr_node.get_name(), "attribute"); 95 | assert_eq!(attr_node.get_type(), Some(NodeType::AttributeNode)); 96 | 97 | // Set 98 | assert!(child.set_attribute("attribute", "setter_value").is_ok()); 99 | assert_eq!( 100 | child.get_attribute("attribute"), 101 | Some("setter_value".to_string()) 102 | ); 103 | // Remove 104 | assert!(child.remove_attribute("attribute").is_ok()); 105 | assert_eq!(child.get_attribute("attribute"), None); 106 | assert_eq!(child.has_attribute("attribute"), false); 107 | // Recount 108 | let attributes = child.get_attributes(); 109 | assert_eq!(attributes.len(), 0); 110 | } 111 | 112 | #[test] 113 | fn node_attributes_ns_accessor() { 114 | // Setup 115 | let parser = Parser::default(); 116 | let doc_result = parser.parse_file("tests/resources/file01_ns.xml"); 117 | assert!(doc_result.is_ok()); 118 | let doc = doc_result.unwrap(); 119 | let root = doc.get_root_element().unwrap(); 120 | let mut root_elements = root.get_child_elements(); 121 | let child_opt = root_elements.first_mut(); 122 | assert!(child_opt.is_some()); 123 | let child = child_opt.unwrap(); 124 | 125 | // All attributes 126 | let attributes = child.get_attributes_ns(); 127 | assert_eq!(attributes.len(), 3); 128 | assert_eq!( 129 | attributes.get(&("attribute".to_string(), None)), 130 | Some(&"value1".to_string()) 131 | ); 132 | let namespaces = child.get_namespaces(&doc); 133 | assert_eq!(namespaces.len(), 2); 134 | let foo_ns = namespaces[0].clone(); 135 | let bar_ns = namespaces[1].clone(); 136 | 137 | assert_eq!( 138 | attributes.get(&("attribute".to_string(), Some(foo_ns.clone()))), 139 | Some(&"foo1".to_string()) 140 | ); 141 | assert_eq!( 142 | attributes.get(&("attr".to_string(), Some(bar_ns.clone()))), 143 | Some(&"bar1".to_string()) 144 | ); 145 | 146 | // Has 147 | assert!(child.has_attribute("attribute")); 148 | assert!(child.has_attribute_no_ns("attribute")); 149 | assert!(child.has_attribute_ns("attribute", "http://www.example.com/myns"),); 150 | assert!(child.has_attribute("attr")); 151 | assert!(!child.has_attribute_no_ns("attr")); 152 | assert!(child.has_attribute_ns("attr", "http://www.example.com/myns")); 153 | 154 | // Get 155 | assert_eq!( 156 | child.get_attribute_no_ns("attribute"), 157 | Some("value1".to_string()) 158 | ); 159 | assert_eq!( 160 | child.get_attribute_ns("attribute", "http://www.example.com/myns"), 161 | Some("foo1".to_string()) 162 | ); 163 | assert_eq!( 164 | child.get_attribute_ns("attr", "http://www.example.com/myns"), 165 | Some("bar1".to_string()) 166 | ); 167 | 168 | // Get as node 169 | let attr_node_opt = child.get_attribute_node_no_ns("attribute"); 170 | assert!(attr_node_opt.is_some()); 171 | let attr_node = attr_node_opt.unwrap(); 172 | assert_eq!(attr_node.get_name(), "attribute"); 173 | assert_eq!(attr_node.get_type(), Some(NodeType::AttributeNode)); 174 | let attr_node_opt = child.get_attribute_node_no_ns("attr"); 175 | assert!(attr_node_opt.is_none()); 176 | let attr_node_opt = child.get_attribute_node_ns("attr", "http://www.example.com/myns"); 177 | assert!(attr_node_opt.is_some()); 178 | let attr_node = attr_node_opt.unwrap(); 179 | assert_eq!(attr_node.get_name(), "attr"); 180 | assert_eq!(attr_node.get_type(), Some(NodeType::AttributeNode)); 181 | 182 | // Set 183 | assert!(child.set_attribute("attribute", "setter_value").is_ok()); 184 | assert_eq!( 185 | child.get_attribute_no_ns("attribute"), 186 | Some("setter_value".to_string()) 187 | ); 188 | assert!(child 189 | .set_attribute_ns("attribute", "foo_value", &foo_ns) 190 | .is_ok()); 191 | assert_eq!( 192 | child.get_attribute_no_ns("attribute"), 193 | Some("setter_value".to_string()) 194 | ); 195 | // Remove 196 | assert!(child.has_attribute_no_ns("attribute")); 197 | assert!(child.remove_attribute_no_ns("attribute").is_ok()); 198 | assert_eq!(child.get_attribute_no_ns("attribute"), None); 199 | assert!(!child.has_attribute_no_ns("attribute")); 200 | // Recount 201 | let attributes = child.get_attributes_ns(); 202 | assert_eq!(attributes.len(), 2); 203 | } 204 | 205 | #[test] 206 | fn namespace_partial_eq() { 207 | // Setup 208 | let parser = Parser::default(); 209 | let doc_result = parser.parse_file("tests/resources/file01_ns.xml"); 210 | assert!(doc_result.is_ok()); 211 | let doc = doc_result.unwrap(); 212 | let root = doc.get_root_element().unwrap(); 213 | let mut root_elements = root.get_child_elements(); 214 | let child1_opt = root_elements.first_mut(); 215 | assert!(child1_opt.is_some()); 216 | let child1 = child1_opt.unwrap(); 217 | 218 | // Child 1 namespaces 219 | let namespaces1 = child1.get_namespaces(&doc); 220 | assert_eq!(namespaces1.len(), 2); 221 | let foo_ns1 = namespaces1[0].clone(); 222 | assert_eq!(foo_ns1.get_prefix(), "foo"); 223 | assert_eq!(foo_ns1.get_href(), "http://www.example.com/myns"); 224 | let bar_ns1 = namespaces1[1].clone(); 225 | assert_eq!(bar_ns1.get_prefix(), "bar"); 226 | assert_eq!(bar_ns1.get_href(), "http://www.example.com/myns"); 227 | // The current implementation of PartialEq for Namespace compares the prefix 228 | // and href 229 | assert!(foo_ns1 != bar_ns1); 230 | 231 | // Compare with child2 namespace 232 | let child2_opt = child1.get_next_element_sibling(); 233 | assert!(child2_opt.is_some()); 234 | let child2 = child2_opt.unwrap(); 235 | let attributes2 = child2.get_attributes_ns(); 236 | assert_eq!(attributes2.len(), 2); 237 | let namespaces2 = child2.get_namespaces(&doc); 238 | assert_eq!(namespaces2.len(), 1); 239 | let foo_ns2 = namespaces2[0].clone(); 240 | // The current implementation of PartialEq for Namespace compares the prefix 241 | // and href not the pointer 242 | assert!(foo_ns1 == foo_ns2); 243 | assert_eq!(foo_ns1.get_href(), foo_ns2.get_href()); 244 | assert_eq!(foo_ns1.get_prefix(), foo_ns2.get_prefix()); 245 | assert_ne!(foo_ns1.ns_ptr(), foo_ns2.ns_ptr()); 246 | } 247 | 248 | #[test] 249 | fn attribute_namespace_accessors() { 250 | let mut doc = Document::new().unwrap(); 251 | let element_result = Node::new("example", None, &doc); 252 | assert!(element_result.is_ok()); 253 | 254 | let mut element = element_result.unwrap(); 255 | doc.set_root_element(&element); 256 | 257 | let ns_result = Namespace::new( 258 | "myxml", 259 | "http://www.w3.org/XML/1998/namespace", 260 | &mut element, 261 | ); 262 | assert!(ns_result.is_ok()); 263 | let ns = ns_result.unwrap(); 264 | assert!(element.set_attribute_ns("id", "testing", &ns).is_ok()); 265 | 266 | let id_attr = element.get_attribute_ns("id", "http://www.w3.org/XML/1998/namespace"); 267 | assert!(id_attr.is_some()); 268 | assert_eq!(id_attr.unwrap(), "testing"); 269 | 270 | let id_regular = element.get_attribute("id"); 271 | assert!(id_regular.is_some()); 272 | assert_eq!(id_regular.unwrap(), "testing"); 273 | 274 | let id_false_ns = element.get_attribute_ns("id", "http://www.foobar.org"); 275 | assert!(id_false_ns.is_none()); 276 | let fb_ns_result = Namespace::new("fb", "http://www.foobar.org", &mut element); 277 | assert!(fb_ns_result.is_ok()); 278 | let fb_ns = fb_ns_result.unwrap(); 279 | assert!(element.set_attribute_ns("fb", "fb", &fb_ns).is_ok()); 280 | assert_eq!( 281 | element.get_attribute_ns("fb", "http://www.foobar.org"), 282 | Some("fb".to_string()) 283 | ); 284 | assert!(element 285 | .remove_attribute_ns("fb", "http://www.foobar.org") 286 | .is_ok()); 287 | assert_eq!( 288 | element.get_attribute_ns("fb", "http://www.foobar.org"), 289 | None 290 | ); 291 | 292 | let ns_prefix = element.lookup_namespace_prefix("http://www.w3.org/XML/1998/namespace"); 293 | assert_eq!(ns_prefix, Some("xml".to_string())); // system ns has the global prefix when doing global lookup 294 | let fb_prefix = element.lookup_namespace_prefix("http://www.foobar.org"); 295 | assert_eq!(fb_prefix, Some("fb".to_string())); // system ns has the global prefix when doing global lookup 296 | 297 | let ns_uri = element.lookup_namespace_uri("myxml"); 298 | assert_eq!( 299 | ns_uri, 300 | Some("http://www.w3.org/XML/1998/namespace".to_string()) 301 | ); // system ns has the global uri when doing global lookup 302 | let fb_uri = element.lookup_namespace_uri("fb"); 303 | assert_eq!(fb_uri, Some("http://www.foobar.org".to_string())); // system ns has the global prefix when doing global lookup 304 | } 305 | 306 | #[test] 307 | fn attribute_no_namespace() { 308 | let mut doc = Document::new().unwrap(); 309 | let element_result = Node::new("example", None, &doc); 310 | assert!(element_result.is_ok()); 311 | 312 | let mut element = element_result.unwrap(); 313 | doc.set_root_element(&element); 314 | 315 | let ns_result = Namespace::new("myns", "https://www.example.com/myns", &mut element); 316 | assert!(ns_result.is_ok()); 317 | let ns = ns_result.unwrap(); 318 | assert!(element.set_attribute_ns("foo", "ns", &ns).is_ok()); 319 | 320 | let foo_ns_attr = element.get_attribute_ns("foo", "https://www.example.com/myns"); 321 | assert!(foo_ns_attr.is_some()); 322 | assert_eq!(foo_ns_attr.unwrap(), "ns"); 323 | 324 | let foo_no_ns_attr = element.get_attribute_no_ns("foo"); 325 | assert!(foo_no_ns_attr.is_none()); 326 | 327 | assert!(element.set_attribute("foo", "no_ns").is_ok()); 328 | 329 | let foo_no_ns_attr = element.get_attribute_no_ns("foo"); 330 | assert!(foo_no_ns_attr.is_some()); 331 | assert_eq!(foo_no_ns_attr.unwrap(), "no_ns"); 332 | 333 | assert!(element.remove_attribute_no_ns("foo").is_ok()); 334 | let foo_no_ns_attr = element.get_attribute_no_ns("foo"); 335 | assert!(foo_no_ns_attr.is_none()); 336 | 337 | assert!(element.set_attribute("bar", "bar").is_ok()); 338 | let bar_no_ns_attr = element.get_attribute_no_ns("bar"); 339 | assert!(bar_no_ns_attr.is_some()); 340 | assert_eq!(bar_no_ns_attr.unwrap(), "bar"); 341 | } 342 | 343 | #[test] 344 | fn node_can_unbind() { 345 | let mut doc = Document::new().unwrap(); 346 | let element_result = Node::new("example", None, &doc); 347 | assert!(element_result.is_ok()); 348 | 349 | let mut element = element_result.unwrap(); 350 | doc.set_root_element(&element); 351 | 352 | let mut first_child = Node::new("first", None, &doc).unwrap(); 353 | let mut second_child = Node::new("second", None, &doc).unwrap(); 354 | let mut third_child = Node::new("third", None, &doc).unwrap(); 355 | 356 | assert!(element.add_child(&mut first_child).is_ok()); 357 | assert!(element.add_child(&mut second_child).is_ok()); 358 | assert!(element.add_child(&mut third_child).is_ok()); 359 | 360 | assert_eq!(element.get_child_nodes().len(), 3); 361 | first_child.unbind_node(); 362 | assert_eq!(element.get_child_nodes().len(), 2); 363 | second_child.unlink_node(); 364 | assert_eq!(element.get_child_nodes().len(), 1); 365 | third_child.unlink(); 366 | assert_eq!(element.get_child_nodes().len(), 0); 367 | 368 | // Test reparenting via unlink 369 | let mut transfer = Node::new("transfer", None, &doc).unwrap(); 370 | assert!(element.add_child(&mut transfer).is_ok()); 371 | assert!(transfer.append_text("test text").is_ok()); 372 | let mut receiver = Node::new("receiver", None, &doc).unwrap(); 373 | assert!(element.add_child(&mut receiver).is_ok()); 374 | assert_eq!(element.get_child_nodes().len(), 2); 375 | assert_eq!(transfer.get_child_nodes().len(), 1); 376 | assert_eq!(receiver.get_child_nodes().len(), 0); 377 | 378 | transfer.unlink(); 379 | assert_eq!(element.get_child_nodes().len(), 1); 380 | assert_eq!(receiver.get_child_nodes().len(), 0); 381 | assert!(receiver.add_child(&mut transfer).is_ok()); 382 | assert_eq!(receiver.get_child_nodes().len(), 1); 383 | assert_eq!(transfer.get_content(), "test text".to_owned()); 384 | assert_eq!(transfer.get_parent(), Some(receiver)); 385 | } 386 | 387 | #[test] 388 | /// Can mock a node object (useful for defaults that will be overridden) 389 | fn can_mock_node() { 390 | let doc_mock = Document::new().unwrap(); 391 | let node_mock = Node::mock(&doc_mock); 392 | assert!(!node_mock.is_text_node()); 393 | } 394 | 395 | #[test] 396 | /// Can make a mock node hashable 397 | fn can_hash_mock_node() { 398 | let doc_mock = Document::new().unwrap(); 399 | let node_mock = Node::mock(&doc_mock); 400 | assert!(node_mock.to_hashable() > 0); 401 | } 402 | 403 | #[test] 404 | /// Can make null nodes and documents, to avoid memory allocations 405 | fn can_null_node() { 406 | let null_node = Node::null(); 407 | let second_null_node = Node::null(); 408 | assert!(null_node.is_null()); 409 | assert!(second_null_node.is_null()); 410 | assert_eq!(null_node, second_null_node); 411 | } 412 | 413 | #[test] 414 | /// Can set and get attributes 415 | fn can_manage_attributes() { 416 | let mut doc = Document::new().unwrap(); 417 | let hello_element_result = Node::new("hello", None, &doc); 418 | assert!(hello_element_result.is_ok()); 419 | let mut hello_element = hello_element_result.unwrap(); 420 | doc.set_root_element(&hello_element); 421 | 422 | let key = "examplekey"; 423 | let value = "examplevalue"; 424 | let pre_value = hello_element.get_attribute(key); 425 | assert_eq!(pre_value, None); 426 | let pre_prop_check = hello_element.has_property(key); 427 | assert_eq!(pre_prop_check, false); 428 | let pre_prop_value = hello_element.get_property(key); 429 | assert_eq!(pre_prop_value, None); 430 | 431 | assert!(hello_element.set_attribute(key, value).is_ok()); 432 | let new_check = hello_element.has_attribute(key); 433 | assert_eq!(new_check, true); 434 | let new_value = hello_element.get_attribute(key); 435 | assert_eq!(new_value, Some(value.to_owned())); 436 | } 437 | 438 | #[test] 439 | /// Can set and get text node content 440 | fn can_set_get_text_node_content() { 441 | let mut doc = Document::new().unwrap(); 442 | let hello_element_result = Node::new("hello", None, &doc); 443 | assert!(hello_element_result.is_ok()); 444 | let mut hello_element = hello_element_result.unwrap(); 445 | doc.set_root_element(&hello_element); 446 | 447 | assert!(hello_element.get_content().is_empty()); 448 | assert!(hello_element.append_text("hello ").is_ok()); 449 | assert_eq!(hello_element.get_content(), "hello "); 450 | assert!(hello_element.append_text("world!").is_ok()); 451 | assert_eq!(hello_element.get_content(), "hello world!"); 452 | } 453 | 454 | #[test] 455 | /// Basic namespace workflow 456 | fn can_work_with_namespaces() { 457 | let mut doc = Document::new().unwrap(); 458 | let mut root_node = Node::new("root", None, &doc).unwrap(); 459 | doc.set_root_element(&root_node); 460 | 461 | let initial_namespace_list = root_node.get_namespaces(&doc); 462 | assert_eq!(initial_namespace_list.len(), 0); 463 | 464 | let mock_ns_result = Namespace::new("mock", "http://example.com/ns/mock", &mut root_node); 465 | assert!(mock_ns_result.is_ok()); 466 | let second_ns_result = Namespace::new("second", "http://example.com/ns/second", &mut root_node); 467 | assert!(second_ns_result.is_ok()); 468 | 469 | // try to attach this namespace to a node 470 | assert!(root_node.get_namespace().is_none()); 471 | assert!(root_node.set_namespace(&mock_ns_result.unwrap()).is_ok()); 472 | let active_ns_opt = root_node.get_namespace(); 473 | assert!(active_ns_opt.is_some()); 474 | let active_ns = active_ns_opt.unwrap(); 475 | assert_eq!(active_ns.get_prefix(), "mock"); 476 | assert_eq!(active_ns.get_href(), "http://example.com/ns/mock"); 477 | 478 | // now get all namespaces for the node and check we have ours 479 | let mut namespace_list = root_node.get_namespaces(&doc); 480 | assert_eq!(namespace_list.len(), 2); 481 | 482 | let second_ns = namespace_list.pop().unwrap(); 483 | assert_eq!(second_ns.get_prefix(), "second"); 484 | assert_eq!(second_ns.get_href(), "http://example.com/ns/second"); 485 | 486 | let first_ns = namespace_list.pop().unwrap(); 487 | assert_eq!(first_ns.get_prefix(), "mock"); 488 | assert_eq!(first_ns.get_href(), "http://example.com/ns/mock"); 489 | } 490 | 491 | #[test] 492 | fn can_work_with_ns_declarations() { 493 | let mut doc = Document::new().unwrap(); 494 | let mut root_node = Node::new("root", None, &doc).unwrap(); 495 | doc.set_root_element(&root_node); 496 | 497 | let mock_ns_result = Namespace::new("mock1", "http://example.com/ns/mock1", &mut root_node); 498 | assert!(mock_ns_result.is_ok()); 499 | let second_ns_result = Namespace::new("mock2", "http://example.com/ns/mock2", &mut root_node); 500 | assert!(second_ns_result.is_ok()); 501 | 502 | let declarations = root_node.get_namespace_declarations(); 503 | assert_eq!(declarations.len(), 2); 504 | } 505 | 506 | #[test] 507 | /// Can view documents as nodes 508 | fn can_cast_doc_to_node() { 509 | // Setup 510 | let parser = Parser::default(); 511 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 512 | assert!(doc_result.is_ok()); 513 | 514 | let doc = doc_result.unwrap(); 515 | let doc_node = doc.as_node(); 516 | assert_eq!(doc_node.get_type(), Some(NodeType::DocumentNode)); 517 | let root_node_opt = doc_node.get_first_child(); 518 | assert!(root_node_opt.is_some()); 519 | let root_node = root_node_opt.unwrap(); 520 | assert_eq!(root_node.get_name(), "root"); 521 | } 522 | 523 | #[test] 524 | fn can_replace_child() { 525 | let mut doc = Document::new().unwrap(); 526 | let mut root_node = Node::new("root", None, &doc).unwrap(); 527 | doc.set_root_element(&root_node); 528 | let mut a = Node::new("a", None, &doc).unwrap(); 529 | let mut b = Node::new("b", None, &doc).unwrap(); 530 | let mut c = Node::new("c", None, &doc).unwrap(); 531 | let mut d = Node::new("d", None, &doc).unwrap(); 532 | let mut e = Node::new("e", None, &doc).unwrap(); 533 | 534 | assert!(root_node.add_child(&mut a).is_ok()); 535 | assert!(root_node.add_child(&mut b).is_ok()); 536 | assert!(root_node.add_child(&mut c).is_ok()); 537 | assert!(root_node.add_child(&mut d).is_ok()); 538 | assert!(root_node.add_child(&mut e).is_ok()); 539 | assert_eq!( 540 | doc.to_string(), 541 | "\n\n", 542 | "document initialized correctly." 543 | ); 544 | 545 | // replace first child with new F 546 | let f = Node::new("F", None, &doc).unwrap(); 547 | let a_result = root_node.replace_child_node(f, a); 548 | assert!(a_result.is_ok()); 549 | 550 | assert_eq!( 551 | doc.to_string(), 552 | "\n\n", 553 | "document initialized correctly." 554 | ); 555 | 556 | // replace last child with new G 557 | let g = Node::new("G", None, &doc).unwrap(); 558 | assert!(root_node.replace_child_node(g, e).is_ok()); 559 | assert_eq!( 560 | doc.to_string(), 561 | "\n\n", 562 | "document initialized correctly." 563 | ); 564 | 565 | // replace middle child with new H 566 | let h = Node::new("H", None, &doc).unwrap(); 567 | assert!(root_node.replace_child_node(h, c).is_ok()); 568 | assert_eq!( 569 | doc.to_string(), 570 | "\n\n", 571 | "document initialized correctly." 572 | ); 573 | 574 | // fail to replace a, as it is already removed. 575 | let none = Node::new("none", None, &doc).unwrap(); 576 | assert!(root_node 577 | .replace_child_node(none, a_result.unwrap()) 578 | .is_err()); 579 | // no change. 580 | assert_eq!( 581 | doc.to_string(), 582 | "\n\n", 583 | "document initialized correctly." 584 | ); 585 | 586 | // replacing with self succeeds without change. 587 | assert!(root_node.replace_child_node(b.clone(), b).is_ok()); 588 | assert_eq!( 589 | doc.to_string(), 590 | "\n\n", 591 | "document initialized correctly." 592 | ); 593 | // replacing with parent succeeds without change. 594 | assert!(root_node.replace_child_node(root_node.clone(), d).is_ok()); 595 | assert_eq!( 596 | doc.to_string(), 597 | "\n\n", 598 | "document initialized correctly." 599 | ); 600 | } 601 | -------------------------------------------------------------------------------- /tests/xpath_tests.rs: -------------------------------------------------------------------------------- 1 | //! xpath module tests 2 | //! 3 | 4 | use libxml::parser::Parser; 5 | use libxml::xpath::Context; 6 | 7 | #[test] 8 | /// Test the evaluation of an xpath expression yields the correct number of nodes 9 | fn xpath_result_number_correct() { 10 | let parser = Parser::default(); 11 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 12 | assert!(doc_result.is_ok()); 13 | let doc = doc_result.unwrap(); 14 | let context = Context::new(&doc).unwrap(); 15 | 16 | let result1 = context.evaluate("//child").unwrap(); 17 | assert_eq!(result1.get_number_of_nodes(), 2); 18 | assert_eq!(result1.get_nodes_as_vec().len(), 2); 19 | 20 | let result2 = context.evaluate("//nonexistent").unwrap(); 21 | assert_eq!(result2.get_number_of_nodes(), 0); 22 | assert_eq!(result2.get_nodes_as_vec().len(), 0); 23 | } 24 | 25 | #[test] 26 | /// Test xpath with namespaces 27 | fn xpath_with_namespaces() { 28 | let parser = Parser::default(); 29 | let doc_result = parser.parse_file("tests/resources/simple_namespaces.xml"); 30 | assert!(doc_result.is_ok()); 31 | 32 | let doc = doc_result.unwrap(); 33 | let context = Context::new(&doc).unwrap(); 34 | assert!(context 35 | .register_namespace("h", "http://example.com/ns/hello") 36 | .is_ok()); 37 | assert!(context 38 | .register_namespace("f", "http://example.com/ns/farewell") 39 | .is_ok()); 40 | assert!(context 41 | .register_namespace("r", "http://example.com/ns/root") 42 | .is_ok()); 43 | let result_h_td = context.evaluate("//h:td").unwrap(); 44 | assert_eq!(result_h_td.get_number_of_nodes(), 3); 45 | assert_eq!(result_h_td.get_nodes_as_vec().len(), 3); 46 | 47 | let result_h_table = context.evaluate("//h:table").unwrap(); 48 | assert_eq!(result_h_table.get_number_of_nodes(), 2); 49 | assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); 50 | 51 | let result_f_footer = context.evaluate("//f:footer").unwrap(); 52 | assert_eq!(result_f_footer.get_number_of_nodes(), 2); 53 | assert_eq!(result_f_footer.get_nodes_as_vec().len(), 2); 54 | 55 | let result_r = context.evaluate("//r:*").unwrap(); 56 | assert_eq!(result_r.get_number_of_nodes(), 1); 57 | assert_eq!(result_r.get_nodes_as_vec().len(), 1); 58 | 59 | let result_h = context.evaluate("//h:*").unwrap(); 60 | assert_eq!(result_h.get_number_of_nodes(), 7); 61 | assert_eq!(result_h.get_nodes_as_vec().len(), 7); 62 | 63 | let result_f = context.evaluate("//f:*").unwrap(); 64 | assert_eq!(result_f.get_number_of_nodes(), 4); 65 | assert_eq!(result_f.get_nodes_as_vec().len(), 4); 66 | 67 | let result_all = context.evaluate("//*").unwrap(); 68 | assert_eq!(result_all.get_number_of_nodes(), 12); 69 | assert_eq!(result_all.get_nodes_as_vec().len(), 12); 70 | 71 | let result_h_table = context.evaluate("//table").unwrap(); 72 | assert_eq!(result_h_table.get_number_of_nodes(), 0); 73 | assert_eq!(result_h_table.get_nodes_as_vec().len(), 0); 74 | 75 | assert!(doc.as_node().recursively_remove_namespaces().is_ok()); 76 | let result_h_table = context.evaluate("//table").unwrap(); 77 | assert_eq!(result_h_table.get_number_of_nodes(), 2); 78 | assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); 79 | } 80 | 81 | #[test] 82 | /// Test that an xpath expression finds the correct node and 83 | /// that the class names are interpreted correctly. 84 | fn class_names() { 85 | let parser = Parser::default_html(); 86 | let doc_result = parser.parse_file("tests/resources/file02.xml"); 87 | assert!(doc_result.is_ok()); 88 | let doc = doc_result.unwrap(); 89 | let context = Context::new(&doc).unwrap(); 90 | 91 | let p_result = context.evaluate("/html/body/p"); 92 | assert!(p_result.is_ok()); 93 | let p = p_result.unwrap(); 94 | assert_eq!(p.get_number_of_nodes(), 1); 95 | 96 | let node = &p.get_nodes_as_vec()[0]; 97 | let names = node.get_class_names(); 98 | assert_eq!(names.len(), 2); 99 | assert!(names.contains("paragraph")); 100 | assert!(names.contains("important")); 101 | assert!(!names.contains("nonsense")); 102 | } 103 | 104 | #[test] 105 | /// Test that an xpath string() function processed correctly 106 | fn xpath_string_function() { 107 | let parser = Parser::default_html(); 108 | let doc_result = parser.parse_file("tests/resources/file01.xml"); 109 | assert!(doc_result.is_ok()); 110 | let doc = doc_result.unwrap(); 111 | let context = Context::new(&doc).unwrap(); 112 | 113 | let p_result = context.evaluate("string(//root//child[1]/@attribute)"); 114 | assert!(p_result.is_ok()); 115 | let p = p_result.unwrap(); 116 | // Not a node really 117 | assert_eq!(p.get_number_of_nodes(), 0); 118 | let content = p.to_string(); 119 | assert_eq!(content, "value"); 120 | } 121 | 122 | #[test] 123 | /// Test that the dual findnodes interfaces are operational 124 | fn findnodes_interfaces() { 125 | let parser = Parser::default_html(); 126 | let doc_result = parser.parse_file("tests/resources/file02.xml"); 127 | assert!(doc_result.is_ok()); 128 | let doc = doc_result.unwrap(); 129 | 130 | // Xpath interface 131 | let mut context = Context::new(&doc).unwrap(); 132 | let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); 133 | let p_result = context.findnodes("p", body.first()); 134 | assert!(p_result.is_ok()); 135 | let p = p_result.unwrap(); 136 | assert_eq!(p.len(), 1); 137 | 138 | // Node interface 139 | let body_node = body.first().unwrap(); 140 | let p2_result = body_node.findnodes("p"); 141 | assert!(p2_result.is_ok()); 142 | let p2 = p2_result.unwrap(); 143 | assert_eq!(p2.len(), 1); 144 | } 145 | 146 | #[test] 147 | /// Clone is safe on Context objects 148 | fn safe_context_clone() { 149 | let parser = Parser::default_html(); 150 | let doc_result = parser.parse_file("tests/resources/file02.xml"); 151 | assert!(doc_result.is_ok()); 152 | let doc = doc_result.unwrap(); 153 | 154 | // Xpath interface 155 | let context = Context::new(&doc).unwrap(); 156 | let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); 157 | assert_eq!(body.len(), 1); 158 | let context2 = context.clone(); 159 | let body2 = context2.evaluate("/html/body").unwrap().get_nodes_as_vec(); 160 | assert_eq!(body2.len(), 1); 161 | } 162 | 163 | #[test] 164 | fn cleanup_safely_unlinked_xpath_nodes() { 165 | let p = Parser::default(); 166 | let doc_result = p.parse_string(r##" 167 | 168 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | "##); 182 | assert!(doc_result.is_ok(), "successfully parsed SVG snippet"); 183 | let doc = doc_result.unwrap(); 184 | let mut xpath = libxml::xpath::Context::new(&doc).unwrap(); 185 | xpath 186 | .register_namespace("svg", "http://www.w3.org/2000/svg") 187 | .unwrap(); 188 | for mut k in xpath.findnodes("//svg:c", None).unwrap() { 189 | k.unlink_node(); 190 | } 191 | drop(xpath); 192 | drop(doc); 193 | assert!(true, "Drops went OK."); 194 | } 195 | 196 | #[test] 197 | fn xpath_find_string_values() { 198 | let parser = Parser::default(); 199 | let doc_result = parser.parse_file("tests/resources/ids.xml"); 200 | assert!(doc_result.is_ok()); 201 | let doc = doc_result.unwrap(); 202 | let mut xpath = libxml::xpath::Context::new(&doc).unwrap(); 203 | if let Some(root) = doc.get_root_element() { 204 | let tests = root.get_child_elements(); 205 | let empty_test = &tests[0]; 206 | let ids_test = &tests[1]; 207 | let empty_values = xpath.findvalues(".//@xml:id", Some(empty_test)); 208 | assert_eq!(empty_values, Ok(Vec::new())); 209 | let ids_values = xpath.findvalues(".//@xml:id", Some(ids_test)); 210 | let expected_ids = Ok(vec![String::from("start"),String::from("mid"),String::from("end")]); 211 | assert_eq!(ids_values, expected_ids); 212 | let node_ids_values = ids_test.findvalues(".//@xml:id"); 213 | assert_eq!(node_ids_values, expected_ids); 214 | } else { 215 | panic!("Document fails to obtain root!"); 216 | } 217 | } 218 | 219 | /// Tests for checking xpath well-formedness 220 | mod compile_tests { 221 | use libxml::xpath::is_well_formed_xpath; 222 | 223 | #[test] 224 | fn can_compile_an_xpath() { 225 | let compiles = is_well_formed_xpath("//a"); 226 | assert_eq!(compiles, true); 227 | } 228 | 229 | #[test] 230 | fn invalid_xpath_does_not_compile() { 231 | let compiles = is_well_formed_xpath("//a[but invalid]"); 232 | assert_eq!(compiles, false); 233 | } 234 | } 235 | --------------------------------------------------------------------------------