├── .gitattributes ├── .github └── workflows │ ├── cifuzz.yml │ └── rust.yml ├── .gitignore ├── .gitmodules ├── .rustfmt.toml ├── Cargo.toml ├── Changelog.md ├── LICENSE-MIT.md ├── README.md ├── benches ├── macrobenches.rs └── microbenches.rs ├── compare ├── Cargo.toml └── benches │ └── bench.rs ├── examples ├── custom_entities.rs ├── flattened_enum.rs ├── nested_readers.rs ├── read_buffered.rs ├── read_nodes.rs ├── read_nodes_serde.rs └── read_texts.rs ├── fuzz ├── .gitignore ├── Cargo.toml ├── README.md └── fuzz_targets │ ├── fuzz_target_1.rs │ └── structured_roundtrip.rs ├── src ├── de │ ├── attributes.rs │ ├── key.rs │ ├── map.rs │ ├── mod.rs │ ├── resolver.rs │ ├── simple_type.rs │ ├── text.rs │ └── var.rs ├── encoding.rs ├── errors.rs ├── escape.rs ├── events │ ├── attributes.rs │ └── mod.rs ├── lib.rs ├── name.rs ├── parser │ ├── element.rs │ ├── mod.rs │ └── pi.rs ├── reader │ ├── async_tokio.rs │ ├── buffered_reader.rs │ ├── mod.rs │ ├── ns_reader.rs │ ├── slice_reader.rs │ └── state.rs ├── se │ ├── content.rs │ ├── element.rs │ ├── key.rs │ ├── mod.rs │ ├── simple_type.rs │ └── text.rs ├── serde_helpers.rs ├── utils.rs ├── writer.rs └── writer │ └── async_tokio.rs ├── test-gen ├── Cargo.toml └── src │ └── main.rs └── tests ├── README.md ├── async-tokio.rs ├── documents ├── document.xml ├── encoding │ ├── Big5.xml │ ├── EUC-JP.xml │ ├── EUC-KR.xml │ ├── GBK.xml │ ├── IBM866.xml │ ├── ISO-2022-JP.xml │ ├── ISO-8859-10.xml │ ├── ISO-8859-13.xml │ ├── ISO-8859-14.xml │ ├── ISO-8859-15.xml │ ├── ISO-8859-16.xml │ ├── ISO-8859-2.xml │ ├── ISO-8859-3.xml │ ├── ISO-8859-4.xml │ ├── ISO-8859-5.xml │ ├── ISO-8859-6.xml │ ├── ISO-8859-7.xml │ ├── ISO-8859-8-I.xml │ ├── ISO-8859-8.xml │ ├── KOI8-R.xml │ ├── KOI8-U.xml │ ├── Shift_JIS.xml │ ├── gb18030.xml │ ├── macintosh.xml │ ├── utf16be-bom.xml │ ├── utf16be.xml │ ├── utf16le-bom.xml │ ├── utf16le.xml │ ├── utf8-bom.xml │ ├── utf8.xml │ ├── windows-1250.xml │ ├── windows-1251.xml │ ├── windows-1252.xml │ ├── windows-1253.xml │ ├── windows-1254.xml │ ├── windows-1255.xml │ ├── windows-1256.xml │ ├── windows-1257.xml │ ├── windows-1258.xml │ ├── windows-874.xml │ ├── x-mac-cyrillic.xml │ └── x-user-defined.xml ├── html5.html ├── html5.txt ├── libreoffice_document.fodt ├── linescore.xml ├── opennews_all.rss ├── players.xml ├── rpm_filelists.xml ├── rpm_other.xml ├── rpm_primary.xml ├── rpm_primary2.xml ├── sample_1.xml ├── sample_ns.xml ├── sample_rss.xml ├── test_writer.xml ├── test_writer_indent.xml └── test_writer_indent_cdata.xml ├── encodings.rs ├── escape.rs ├── fuzzing.rs ├── helpers └── mod.rs ├── html.rs ├── issues.rs ├── reader-attributes.rs ├── reader-config.rs ├── reader-errors.rs ├── reader-namespaces.rs ├── reader-references.rs ├── reader.rs ├── roundtrip.rs ├── serde-de-enum.rs ├── serde-de-seq.rs ├── serde-de-xsi.rs ├── serde-de.rs ├── serde-issues.rs ├── serde-migrated.rs ├── serde-se.rs ├── serde_helpers └── mod.rs ├── serde_roundtrip.rs ├── writer-indentation.rs └── writer.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | # Unit tests assume that all xml files have unix style line endings 2 | /tests/documents/* text eol=lf 3 | /tests/documents/encoding/* text eol=lf 4 | 5 | /tests/documents/encoding/utf16be.xml binary 6 | /tests/documents/encoding/utf16le.xml binary 7 | /tests/documents/encoding/utf16be-bom.xml binary 8 | /tests/documents/encoding/utf16le-bom.xml binary 9 | /tests/documents/sample_5_utf16bom.xml binary 10 | -------------------------------------------------------------------------------- /.github/workflows/cifuzz.yml: -------------------------------------------------------------------------------- 1 | name: CIFuzz 2 | on: [pull_request] 3 | jobs: 4 | Fuzzing: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Build Fuzzers 8 | id: build 9 | uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master 10 | with: 11 | oss-fuzz-project-name: 'quick-xml' 12 | language: rust 13 | - name: Run Fuzzers 14 | uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master 15 | with: 16 | oss-fuzz-project-name: 'quick-xml' 17 | language: rust 18 | fuzz-seconds: 600 19 | - name: Upload Crash 20 | uses: actions/upload-artifact@v4 21 | if: failure() && steps.build.outcome == 'success' 22 | with: 23 | name: artifacts 24 | path: ./out/artifacts 25 | 26 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - name: Check fmt 11 | run: cargo fmt -- --check 12 | 13 | msrv: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: dtolnay/rust-toolchain@1.56.0 18 | - run: cargo check 19 | 20 | minimal-versions: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Install tools 25 | run: cargo install cargo-hack cargo-minimal-versions 26 | - name: Install nightly rust 27 | uses: dtolnay/rust-toolchain@nightly 28 | - name: Check with minimal versions 29 | run: cargo minimal-versions check 30 | - name: Check with minimal versions (serialize) 31 | run: cargo minimal-versions check --features serialize 32 | - name: Check with minimal versions (encoding) 33 | run: cargo minimal-versions check --features encoding 34 | - name: Check with minimal versions (async-tokio) 35 | run: cargo minimal-versions check --features async-tokio 36 | 37 | test: 38 | strategy: 39 | matrix: 40 | platform: [ubuntu-latest, windows-latest] 41 | 42 | runs-on: ${{ matrix.platform }} 43 | 44 | # Set variable to enable coverage 45 | env: 46 | RUSTFLAGS: -C instrument-coverage 47 | 48 | steps: 49 | - uses: actions/checkout@v4 50 | - name: Install coverage reporter (llvm-tools-preview) 51 | if: runner.os == 'Linux' 52 | run: rustup component add llvm-tools-preview 53 | - name: Install coverage reporter (grcov) 54 | if: runner.os == 'Linux' 55 | run: cargo install grcov 56 | 57 | - name: Build 58 | run: cargo build 59 | - name: Build benchmarks 60 | run: cargo bench --no-run 61 | - name: Build benchmarks (compare) 62 | working-directory: compare 63 | run: cargo bench --no-run 64 | - name: Run tests + benchmarks 65 | run: cargo test --all-features --benches --tests 66 | 67 | - name: Run tests (no features) 68 | env: 69 | LLVM_PROFILE_FILE: coverage/no-features-%p-%m.profraw 70 | run: cargo test --no-default-features 71 | - name: Run tests (serialize) 72 | env: 73 | LLVM_PROFILE_FILE: coverage/serialize-%p-%m.profraw 74 | run: cargo test --features serialize 75 | - name: Run tests (serialize+encoding) 76 | env: 77 | LLVM_PROFILE_FILE: coverage/serialize-encoding-%p-%m.profraw 78 | run: cargo test --features serialize,encoding 79 | - name: Run tests (serialize+escape-html) 80 | env: 81 | LLVM_PROFILE_FILE: coverage/serialize-escape-html-%p-%m.profraw 82 | run: cargo test --features serialize,escape-html 83 | - name: Run tests (all features) 84 | env: 85 | LLVM_PROFILE_FILE: coverage/all-features-%p-%m.profraw 86 | run: cargo test --all-features 87 | - name: Prepare coverage information for upload 88 | if: runner.os == 'Linux' 89 | run: | 90 | grcov ./coverage \ 91 | -s . \ 92 | --binary-path ./target/debug/ \ 93 | --branch \ 94 | --ignore-not-existing \ 95 | --ignore 'tests/*' \ 96 | -o ./coverage.lcov 97 | - name: Upload coverage to codecov.io 98 | if: runner.os == 'Linux' 99 | uses: codecov/codecov-action@v4 100 | with: 101 | files: ./coverage.lcov 102 | flags: unittests 103 | verbose: true 104 | continue-on-error: true 105 | 106 | # Check that tests that are sensitive to target are passed 107 | x86: 108 | runs-on: ubuntu-latest 109 | steps: 110 | - uses: actions/checkout@v4 111 | - name: Install 32-bit target 112 | run: rustup target add i686-unknown-linux-gnu 113 | - name: Install 32-bit libs (for criterion) 114 | # Criterion wants to compile something. 115 | # Cargo builds criterion even when it is not required for those tests. 116 | # Without those libs compilation failed with: 117 | # error: linking with `cc` failed: exit status: 1 118 | # | 119 | # = note: LC_ALL="C" PATH="..." ... 120 | # = note: /usr/bin/ld: cannot find Scrt1.o: No such file or directory 121 | # /usr/bin/ld: cannot find crti.o: No such file or directory 122 | # /usr/bin/ld: skipping incompatible /usr/lib/gcc/x86_64-linux-gnu/11/libgcc.a when searching for -lgcc 123 | # /usr/bin/ld: cannot find -lgcc: No such file or directory 124 | # collect2: error: ld returned 1 exit status 125 | # Fixed as suggested in this answer: 126 | # https://stackoverflow.com/a/16016792/7518605 127 | run: sudo apt install gcc-multilib 128 | - name: Run some tests on 32-bit target 129 | run: cargo test --target i686-unknown-linux-gnu --test issues 130 | - name: Run some tests on 32-bit target (async-tokio) 131 | run: cargo test --target i686-unknown-linux-gnu --features async-tokio --test async-tokio 132 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .project 3 | Cargo.lock 4 | # macOS hidden files 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "encoding"] 2 | path = test-gen/encoding 3 | url = https://github.com/whatwg/encoding.git 4 | shallow = true 5 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | edition = "2021" 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "quick-xml" 3 | version = "0.37.5" 4 | description = "High performance xml reader and writer" 5 | edition = "2021" 6 | 7 | documentation = "https://docs.rs/quick-xml" 8 | repository = "https://github.com/tafia/quick-xml" 9 | 10 | keywords = ["xml", "serde", "parser", "writer", "html"] 11 | categories = ["asynchronous", "encoding", "parsing", "parser-implementations"] 12 | license = "MIT" 13 | rust-version = "1.56" 14 | # We exclude tests & examples & benches to reduce the size of a package. 15 | # Unfortunately, this is source of warnings in latest cargo when packaging: 16 | # > warning: ignoring {context} `{name}` as `{path}` is not included in the published package 17 | # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491 18 | # will be resolved 19 | include = ["src/*", "LICENSE-MIT.md", "README.md"] 20 | 21 | [dependencies] 22 | arbitrary = { version = "1", features = ["derive"], optional = true } 23 | document-features = { version = "0.2", optional = true } 24 | encoding_rs = { version = "0.8", optional = true } 25 | serde = { version = ">=1.0.139", optional = true } 26 | tokio = { version = "1.10", optional = true, default-features = false, features = ["io-util"] } 27 | memchr = "2.1" 28 | 29 | [dev-dependencies] 30 | criterion = "0.4" 31 | pretty_assertions = "1.4" 32 | regex = "1" 33 | # https://github.com/serde-rs/serde/issues/1904 is fixed since 1.0.206 34 | # serde does not follow semver in numbering and their dependencies, so we specifying patch here 35 | serde_derive = { version = "1.0.206" } 36 | serde-value = "0.7" 37 | tokio = { version = "1.21", default-features = false, features = ["macros", "rt"] } 38 | tokio-test = "0.4" 39 | 40 | [lib] 41 | bench = false 42 | 43 | [[bench]] 44 | name = "microbenches" 45 | harness = false 46 | path = "benches/microbenches.rs" 47 | 48 | [[bench]] 49 | name = "macrobenches" 50 | harness = false 51 | path = "benches/macrobenches.rs" 52 | 53 | [features] 54 | default = [] 55 | 56 | ## Enables support for asynchronous reading and writing from `tokio`'s IO-Traits by enabling 57 | ## [reading events] from types implementing [`tokio::io::AsyncBufRead`]. 58 | ## 59 | ## [reading events]: crate::reader::Reader::read_event_into_async 60 | async-tokio = ["tokio"] 61 | 62 | ## Enables support of non-UTF-8 encoded documents. Encoding will be inferred from 63 | ## the XML declaration if it is found, otherwise UTF-8 is assumed. 64 | ## 65 | ## Currently, only ASCII-compatible encodings are supported. For example, 66 | ## UTF-16 will not work (therefore, `quick-xml` is not [standard compliant]). 67 | ## 68 | ## Thus, quick-xml supports all encodings of [`encoding_rs`] except these: 69 | ## - [UTF-16BE] 70 | ## - [UTF-16LE] 71 | ## - [ISO-2022-JP] 72 | ## 73 | ## You should stop processing a document when one of these encodings is detected, 74 | ## because generated events can be wrong and do not reflect a real document structure! 75 | ## 76 | ## Because these are the only supported encodings that are not ASCII compatible, you can 77 | ## check for them: 78 | ## 79 | ## ``` 80 | ## use quick_xml::events::Event; 81 | ## use quick_xml::reader::Reader; 82 | ## 83 | ## # fn to_utf16le_with_bom(string: &str) -> Vec { 84 | ## # let mut bytes = Vec::new(); 85 | ## # bytes.extend_from_slice(&[0xFF, 0xFE]); // UTF-16 LE BOM 86 | ## # for ch in string.encode_utf16() { 87 | ## # bytes.extend_from_slice(&ch.to_le_bytes()); 88 | ## # } 89 | ## # bytes 90 | ## # } 91 | ## let xml = to_utf16le_with_bom(r#""#); 92 | ## let mut reader = Reader::from_reader(xml.as_ref()); 93 | ## reader.config_mut().trim_text(true); 94 | ## 95 | ## let mut buf = Vec::new(); 96 | ## let mut unsupported = false; 97 | ## loop { 98 | ## if !reader.decoder().encoding().is_ascii_compatible() { 99 | ## unsupported = true; 100 | ## break; 101 | ## } 102 | ## buf.clear(); 103 | ## match reader.read_event_into(&mut buf).unwrap() { 104 | ## Event::Eof => break, 105 | ## _ => {} 106 | ## } 107 | ## } 108 | ## assert_eq!(unsupported, true); 109 | ## ``` 110 | ## This restriction will be eliminated once issue [#158] is resolved. 111 | ## 112 | ## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding 113 | ## [UTF-16BE]: encoding_rs::UTF_16BE 114 | ## [UTF-16LE]: encoding_rs::UTF_16LE 115 | ## [ISO-2022-JP]: encoding_rs::ISO_2022_JP 116 | ## [#158]: https://github.com/tafia/quick-xml/issues/158 117 | encoding = ["encoding_rs"] 118 | 119 | ## Enables support for recognizing all [HTML 5 entities] in [`unescape`] 120 | ## function. The full list of entities also can be found in 121 | ## . 122 | ## 123 | ## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref 124 | ## [`unescape`]: crate::escape::unescape 125 | escape-html = [] 126 | 127 | ## This feature is for the Serde deserializer that enables support for deserializing 128 | ## lists where tags are overlapped with tags that do not correspond to the list. 129 | ## 130 | ## When this feature is enabled, the XML: 131 | ## ```xml 132 | ## 133 | ## 134 | ## 135 | ## 136 | ## 137 | ## 138 | ## ``` 139 | ## could be deserialized to a struct: 140 | ## ```no_run 141 | ## # use serde::Deserialize; 142 | ## #[derive(Deserialize)] 143 | ## #[serde(rename_all = "kebab-case")] 144 | ## struct AnyName { 145 | ## item: Vec<()>, 146 | ## another_item: (), 147 | ## } 148 | ## ``` 149 | ## 150 | ## When this feature is not enabled (default), only the first element will be 151 | ## associated with the field, and the deserialized type will report an error 152 | ## (duplicated field) when the deserializer encounters a second ``. 153 | ## 154 | ## Note, that enabling this feature can lead to high and even unlimited memory 155 | ## consumption, because deserializer needs to check all events up to the end of a 156 | ## container tag (`` in this example) to figure out that there are no 157 | ## more items for a field. If `` or even EOF is not encountered, the 158 | ## parsing will never end which can lead to a denial-of-service (DoS) scenario. 159 | ## 160 | ## Having several lists and overlapped elements for them in XML could also lead 161 | ## to quadratic parsing time, because the deserializer must check the list of 162 | ## events as many times as the number of sequence fields present in the schema. 163 | ## 164 | ## To reduce negative consequences, always [limit] the maximum number of events 165 | ## that [`Deserializer`] will buffer. 166 | ## 167 | ## This feature works only with `serialize` feature and has no effect if `serialize` 168 | ## is not enabled. 169 | ## 170 | ## [limit]: crate::de::Deserializer::event_buffer_size 171 | ## [`Deserializer`]: crate::de::Deserializer 172 | overlapped-lists = [] 173 | 174 | ## Enables serialization of some quick-xml types using [`serde`]. This feature 175 | ## is rarely needed. 176 | ## 177 | ## This feature does NOT provide XML serializer or deserializer. You should use 178 | ## the `serialize` feature for that instead. 179 | # Cannot name "serde" to avoid clash with dependency. 180 | # "dep:" prefix only avalible from Rust 1.60 181 | serde-types = ["serde/derive"] 182 | 183 | ## Enables support for [`serde`] serialization and deserialization. When this 184 | ## feature is enabled, quick-xml provides serializer and deserializer for XML. 185 | ## 186 | ## This feature does NOT enables serializaton of the types inside quick-xml. 187 | ## If you need that, use the `serde-types` feature. 188 | serialize = ["serde"] # "dep:" prefix only avalible from Rust 1.60 189 | 190 | [package.metadata.docs.rs] 191 | # document all features 192 | all-features = true 193 | 194 | # Tests, benchmarks and examples doesn't included in package on crates.io, 195 | # so we need to specify a path, otherwise `cargo package` complains 196 | # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491 197 | # will be resolved 198 | 199 | [[test]] 200 | name = "async-tokio" 201 | required-features = ["async-tokio"] 202 | path = "tests/async-tokio.rs" 203 | 204 | [[test]] 205 | name = "encodings" 206 | required-features = ["encoding"] 207 | path = "tests/encodings.rs" 208 | 209 | [[test]] 210 | name = "html" 211 | required-features = ["escape-html"] 212 | path = "tests/html.rs" 213 | 214 | [[test]] 215 | name = "serde_roundtrip" 216 | required-features = ["serialize"] 217 | path = "tests/serde_roundtrip.rs" 218 | 219 | [[test]] 220 | name = "serde-de" 221 | required-features = ["serialize"] 222 | path = "tests/serde-de.rs" 223 | 224 | [[test]] 225 | name = "serde-de-enum" 226 | required-features = ["serialize"] 227 | path = "tests/serde-de-enum.rs" 228 | 229 | [[test]] 230 | name = "serde-de-seq" 231 | required-features = ["serialize"] 232 | path = "tests/serde-de-seq.rs" 233 | 234 | [[test]] 235 | name = "serde-de-xsi" 236 | required-features = ["serialize"] 237 | path = "tests/serde-de-xsi.rs" 238 | 239 | [[test]] 240 | name = "serde-se" 241 | required-features = ["serialize"] 242 | path = "tests/serde-se.rs" 243 | 244 | [[test]] 245 | name = "serde-migrated" 246 | required-features = ["serialize"] 247 | path = "tests/serde-migrated.rs" 248 | 249 | [[test]] 250 | name = "serde-issues" 251 | required-features = ["serialize"] 252 | path = "tests/serde-issues.rs" 253 | 254 | [[example]] 255 | name = "read_nodes_serde" 256 | required-features = ["serialize"] 257 | path = "examples/read_nodes_serde.rs" 258 | 259 | [[example]] 260 | name = "flattened_enum" 261 | required-features = ["serialize"] 262 | path = "examples/flattened_enum.rs" 263 | -------------------------------------------------------------------------------- /LICENSE-MIT.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Johann Tuffe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # quick-xml 2 | 3 | ![status](https://github.com/tafia/quick-xml/actions/workflows/rust.yml/badge.svg) 4 | [![Crate](https://img.shields.io/crates/v/quick-xml.svg)](https://crates.io/crates/quick-xml) 5 | [![docs.rs](https://docs.rs/quick-xml/badge.svg)](https://docs.rs/quick-xml) 6 | [![codecov](https://img.shields.io/codecov/c/github/tafia/quick-xml)](https://codecov.io/gh/tafia/quick-xml) 7 | [![MSRV](https://img.shields.io/badge/rustc-1.56.0+-ab6000.svg)](https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html) 8 | 9 | High performance xml pull reader/writer. 10 | 11 | The reader: 12 | - is almost zero-copy (use of `Cow` whenever possible) 13 | - is easy on memory allocation (the API provides a way to reuse buffers) 14 | - support various encoding (with `encoding` feature), namespaces resolution, special characters. 15 | 16 | Syntax is inspired by [xml-rs](https://github.com/netvl/xml-rs). 17 | 18 | ## Example 19 | 20 | ### Reader 21 | 22 | ```rust 23 | use quick_xml::events::Event; 24 | use quick_xml::reader::Reader; 25 | 26 | let xml = r#" 27 | Test 28 | Test 2 29 | "#; 30 | let mut reader = Reader::from_str(xml); 31 | reader.config_mut().trim_text(true); 32 | 33 | let mut count = 0; 34 | let mut txt = Vec::new(); 35 | let mut buf = Vec::new(); 36 | 37 | // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) 38 | loop { 39 | // NOTE: this is the generic case when we don't know about the input BufRead. 40 | // when the input is a &str or a &[u8], we don't actually need to use another 41 | // buffer, we could directly call `reader.read_event()` 42 | match reader.read_event_into(&mut buf) { 43 | Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), 44 | // exits the loop when reaching end of file 45 | Ok(Event::Eof) => break, 46 | 47 | Ok(Event::Start(e)) => { 48 | match e.name().as_ref() { 49 | b"tag1" => println!("attributes values: {:?}", 50 | e.attributes().map(|a| a.unwrap().value) 51 | .collect::>()), 52 | b"tag2" => count += 1, 53 | _ => (), 54 | } 55 | } 56 | Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), 57 | 58 | // There are several other `Event`s we do not consider here 59 | _ => (), 60 | } 61 | // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low 62 | buf.clear(); 63 | } 64 | ``` 65 | 66 | ### Writer 67 | 68 | ```rust 69 | use quick_xml::events::{Event, BytesEnd, BytesStart}; 70 | use quick_xml::reader::Reader; 71 | use quick_xml::writer::Writer; 72 | use std::io::Cursor; 73 | 74 | let xml = r#"text"#; 75 | let mut reader = Reader::from_str(xml); 76 | reader.config_mut().trim_text(true); 77 | let mut writer = Writer::new(Cursor::new(Vec::new())); 78 | loop { 79 | match reader.read_event() { 80 | Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => { 81 | 82 | // crates a new element ... alternatively we could reuse `e` by calling 83 | // `e.into_owned()` 84 | let mut elem = BytesStart::new("my_elem"); 85 | 86 | // collect existing attributes 87 | elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); 88 | 89 | // copy existing attributes, adds a new my-key="some value" attribute 90 | elem.push_attribute(("my-key", "some value")); 91 | 92 | // writes the event to the writer 93 | assert!(writer.write_event(Event::Start(elem)).is_ok()); 94 | }, 95 | Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { 96 | assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); 97 | }, 98 | Ok(Event::Eof) => break, 99 | // we can either move or borrow the event to write, depending on your use-case 100 | Ok(e) => assert!(writer.write_event(e).is_ok()), 101 | Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), 102 | } 103 | } 104 | 105 | let result = writer.into_inner().into_inner(); 106 | let expected = r#"text"#; 107 | assert_eq!(result, expected.as_bytes()); 108 | ``` 109 | 110 | ## Serde 111 | 112 | When using the `serialize` feature, quick-xml can be used with serde's `Serialize`/`Deserialize` traits. 113 | The mapping between XML and Rust types, and in particular the syntax that allows you to specify the 114 | distinction between *elements* and *attributes*, is described in detail in the documentation 115 | for [deserialization](https://docs.rs/quick-xml/latest/quick_xml/de/). 116 | 117 | ### Credits 118 | 119 | This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). 120 | quick-xml follows its convention for deserialization, including the 121 | [`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name. 122 | 123 | ### Parsing the "value" of a tag 124 | 125 | If you have an input of the form `bar`, and you want to get at the `bar`, 126 | you can use either the special name `$text`, or the special name `$value`: 127 | 128 | ```rust,ignore 129 | struct Foo { 130 | #[serde(rename = "@abc")] 131 | pub abc: String, 132 | #[serde(rename = "$text")] 133 | pub body: String, 134 | } 135 | ``` 136 | 137 | Read about the difference in the [documentation](https://docs.rs/quick-xml/latest/quick_xml/de/index.html#difference-between-text-and-value-special-names). 138 | 139 | ### Performance 140 | 141 | Note that despite not focusing on performance (there are several unnecessary copies), it remains about 10x faster than serde-xml-rs. 142 | 143 | # Features 144 | 145 | - `encoding`: support non utf8 xmls 146 | - `serialize`: support serde `Serialize`/`Deserialize` 147 | 148 | ## Performance 149 | 150 | Benchmarking is hard and the results depend on your input file and your machine. 151 | 152 | Here on my particular file, quick-xml is around **50 times faster** than [xml-rs](https://crates.io/crates/xml-rs) crate. 153 | 154 | ``` 155 | // quick-xml benches 156 | test bench_quick_xml ... bench: 198,866 ns/iter (+/- 9,663) 157 | test bench_quick_xml_escaped ... bench: 282,740 ns/iter (+/- 61,625) 158 | test bench_quick_xml_namespaced ... bench: 389,977 ns/iter (+/- 32,045) 159 | 160 | // same bench with xml-rs 161 | test bench_xml_rs ... bench: 14,468,930 ns/iter (+/- 321,171) 162 | 163 | // serde-xml-rs vs serialize feature 164 | test bench_serde_quick_xml ... bench: 1,181,198 ns/iter (+/- 138,290) 165 | test bench_serde_xml_rs ... bench: 15,039,564 ns/iter (+/- 783,485) 166 | ``` 167 | 168 | For a feature and performance comparison, you can also have a look at RazrFalcon's [parser comparison table](https://github.com/RazrFalcon/roxmltree#parsing). 169 | 170 | ## Contribute 171 | 172 | Any PR is welcomed! 173 | 174 | ## License 175 | 176 | MIT 177 | -------------------------------------------------------------------------------- /benches/macrobenches.rs: -------------------------------------------------------------------------------- 1 | use criterion::{self, criterion_group, criterion_main, Criterion, Throughput}; 2 | use quick_xml::events::Event; 3 | use quick_xml::reader::{NsReader, Reader}; 4 | use quick_xml::Result as XmlResult; 5 | 6 | static RPM_PRIMARY: &str = include_str!("../tests/documents/rpm_primary.xml"); 7 | static RPM_PRIMARY2: &str = include_str!("../tests/documents/rpm_primary2.xml"); 8 | static RPM_FILELISTS: &str = include_str!("../tests/documents/rpm_filelists.xml"); 9 | static RPM_OTHER: &str = include_str!("../tests/documents/rpm_other.xml"); 10 | static LIBREOFFICE_DOCUMENT: &str = include_str!("../tests/documents/libreoffice_document.fodt"); 11 | static DOCUMENT: &str = include_str!("../tests/documents/document.xml"); 12 | static TEST_WRITER_INDENT: &str = include_str!("../tests/documents/test_writer_indent.xml"); 13 | static SAMPLE_1: &str = include_str!("../tests/documents/sample_1.xml"); 14 | static LINESCORE: &str = include_str!("../tests/documents/linescore.xml"); 15 | static SAMPLE_RSS: &str = include_str!("../tests/documents/sample_rss.xml"); 16 | static SAMPLE_NS: &str = include_str!("../tests/documents/sample_ns.xml"); 17 | static PLAYERS: &str = include_str!("../tests/documents/players.xml"); 18 | 19 | static INPUTS: &[(&str, &str)] = &[ 20 | // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces 21 | ("rpm_primary.xml", RPM_PRIMARY), 22 | // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces 23 | ("rpm_primary2.xml", RPM_PRIMARY2), 24 | // long, mostly medium-length text elements, not much escaping 25 | ("rpm_filelists.xml", RPM_FILELISTS), 26 | // long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes 27 | ("rpm_other.xml", RPM_OTHER), 28 | // long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces 29 | ("libreoffice_document.fodt", LIBREOFFICE_DOCUMENT), 30 | // medium length, mostly empty tags, a few short attributes per element, no escaping 31 | ("document.xml", DOCUMENT), 32 | // medium length, lots of namespaces, no escaping 33 | ("test_writer_ident.xml", TEST_WRITER_INDENT), 34 | // short, mix of attributes and text, lots of escapes 35 | ("sample_1.xml", SAMPLE_1), 36 | // medium length, lots of attributes, short attributes, few escapes 37 | ("linescore.xml", LINESCORE), 38 | // short, lots of namespaces, no escapes 39 | ("sample_ns.xml", SAMPLE_NS), 40 | // long, few attributes, mix of attribute lengths, escapes in text content 41 | ("sample_rss.xml", SAMPLE_RSS), 42 | // long, lots of attributes, short attributes, no text, no escapes 43 | ("players.xml", PLAYERS), 44 | ]; 45 | 46 | // TODO: use fully normalized attribute values 47 | fn parse_document_from_str(doc: &str) -> XmlResult<()> { 48 | let mut r = Reader::from_str(doc); 49 | loop { 50 | match criterion::black_box(r.read_event()?) { 51 | Event::Start(e) | Event::Empty(e) => { 52 | for attr in e.attributes() { 53 | criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); 54 | } 55 | } 56 | Event::Text(e) => { 57 | criterion::black_box(e.decode()?); 58 | } 59 | Event::CData(e) => { 60 | criterion::black_box(e.into_inner()); 61 | } 62 | Event::End(_) => (), 63 | Event::Eof => break, 64 | _ => (), 65 | } 66 | } 67 | Ok(()) 68 | } 69 | 70 | // TODO: use fully normalized attribute values 71 | fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> { 72 | let mut r = Reader::from_reader(doc); 73 | let mut buf = Vec::new(); 74 | loop { 75 | match criterion::black_box(r.read_event_into(&mut buf)?) { 76 | Event::Start(e) | Event::Empty(e) => { 77 | for attr in e.attributes() { 78 | criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); 79 | } 80 | } 81 | Event::Text(e) => { 82 | criterion::black_box(e.decode()?); 83 | } 84 | Event::CData(e) => { 85 | criterion::black_box(e.into_inner()); 86 | } 87 | Event::End(_) => (), 88 | Event::Eof => break, 89 | _ => (), 90 | } 91 | buf.clear(); 92 | } 93 | Ok(()) 94 | } 95 | 96 | // TODO: use fully normalized attribute values 97 | fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> { 98 | let mut r = NsReader::from_str(doc); 99 | loop { 100 | match criterion::black_box(r.read_resolved_event()?) { 101 | (resolved_ns, Event::Start(e) | Event::Empty(e)) => { 102 | criterion::black_box(resolved_ns); 103 | for attr in e.attributes() { 104 | criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); 105 | } 106 | } 107 | (resolved_ns, Event::Text(e)) => { 108 | criterion::black_box(e.decode()?); 109 | criterion::black_box(resolved_ns); 110 | } 111 | (resolved_ns, Event::CData(e)) => { 112 | criterion::black_box(e.into_inner()); 113 | criterion::black_box(resolved_ns); 114 | } 115 | (_, Event::End(_)) => (), 116 | (_, Event::Eof) => break, 117 | _ => (), 118 | } 119 | } 120 | Ok(()) 121 | } 122 | 123 | // TODO: use fully normalized attribute values 124 | fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> { 125 | let mut r = NsReader::from_reader(doc); 126 | let mut buf = Vec::new(); 127 | loop { 128 | match criterion::black_box(r.read_resolved_event_into(&mut buf)?) { 129 | (resolved_ns, Event::Start(e) | Event::Empty(e)) => { 130 | criterion::black_box(resolved_ns); 131 | for attr in e.attributes() { 132 | criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); 133 | } 134 | } 135 | (resolved_ns, Event::Text(e)) => { 136 | criterion::black_box(e.decode()?); 137 | criterion::black_box(resolved_ns); 138 | } 139 | (resolved_ns, Event::CData(e)) => { 140 | criterion::black_box(e.into_inner()); 141 | criterion::black_box(resolved_ns); 142 | } 143 | (_, Event::End(_)) => (), 144 | (_, Event::Eof) => break, 145 | _ => (), 146 | } 147 | buf.clear(); 148 | } 149 | Ok(()) 150 | } 151 | 152 | /// Just parse - no decoding overhead 153 | pub fn bench_parse_document_nocopy(c: &mut Criterion) { 154 | let mut group = c.benchmark_group("parse_document_nocopy"); 155 | 156 | for (id, data) in INPUTS.iter() { 157 | group.throughput(Throughput::Bytes(data.len() as u64)); 158 | group.bench_with_input(*id, *data, |b, input| { 159 | b.iter(|| parse_document_from_str(input).unwrap()) 160 | }); 161 | } 162 | 163 | group.finish(); 164 | } 165 | 166 | /// Decode into a buffer, then parse 167 | pub fn bench_decode_and_parse_document(c: &mut Criterion) { 168 | let mut group = c.benchmark_group("decode_and_parse_document"); 169 | 170 | for (id, data) in INPUTS.iter() { 171 | group.throughput(Throughput::Bytes(data.len() as u64)); 172 | group.bench_with_input(*id, *data, |b, input| { 173 | b.iter(|| parse_document_from_bytes(input.as_bytes()).unwrap()) 174 | }); 175 | } 176 | 177 | group.finish(); 178 | } 179 | 180 | /// Just parse - no decoding overhead - including namespaces 181 | pub fn bench_parse_document_nocopy_with_namespaces(c: &mut Criterion) { 182 | let mut group = c.benchmark_group("parse_document_nocopy_with_namespaces"); 183 | 184 | for (id, data) in INPUTS.iter() { 185 | group.throughput(Throughput::Bytes(data.len() as u64)); 186 | group.bench_with_input(*id, *data, |b, input| { 187 | b.iter(|| parse_document_from_str_with_namespaces(input).unwrap()) 188 | }); 189 | } 190 | 191 | group.finish(); 192 | } 193 | 194 | /// Decode into a buffer, then parse - including namespaces 195 | pub fn bench_decode_and_parse_document_with_namespaces(c: &mut Criterion) { 196 | let mut group = c.benchmark_group("decode_and_parse_document_with_namespaces"); 197 | 198 | for (id, data) in INPUTS.iter() { 199 | group.throughput(Throughput::Bytes(data.len() as u64)); 200 | group.bench_with_input(*id, *data, |b, input| { 201 | b.iter(|| parse_document_from_bytes_with_namespaces(input.as_bytes()).unwrap()) 202 | }); 203 | } 204 | 205 | group.finish(); 206 | } 207 | 208 | criterion_group!( 209 | benches, 210 | bench_parse_document_nocopy, 211 | bench_decode_and_parse_document, 212 | bench_parse_document_nocopy_with_namespaces, 213 | bench_decode_and_parse_document_with_namespaces, 214 | ); 215 | criterion_main!(benches); 216 | -------------------------------------------------------------------------------- /compare/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "compare" 3 | version = "0.1.0" 4 | authors = ["Johann Tuffe "] 5 | publish = false 6 | edition = "2021" 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | 10 | [dev-dependencies] 11 | criterion = { version = "0.5", features = ["html_reports"] } 12 | maybe_xml = "0.10.1" 13 | quick-xml = { path = "..", features = ["serialize"] } 14 | rapid-xml = "0.2" 15 | rusty_xml = { version = "0.3", package = "RustyXML" } 16 | xml_oxide = "0.3" 17 | xml-rs = "0.8" 18 | xml5ever = "0.17" 19 | xmlparser = "0.13" 20 | serde-xml-rs = "0.6" 21 | # Do not use "derive" feature, because it slowdown compilation 22 | # See https://github.com/serde-rs/serde/pull/2588 23 | serde = "1.0" 24 | serde_derive = "1.0" 25 | pretty_assertions = "1.4" 26 | 27 | [[bench]] 28 | name = "bench" 29 | harness = false 30 | -------------------------------------------------------------------------------- /examples/custom_entities.rs: -------------------------------------------------------------------------------- 1 | //! This example demonstrate how custom entities can be extracted from the DOCTYPE, 2 | //! and later use to: 3 | //! - insert new pieces of document (particular case - insert only textual content) 4 | //! - decode attribute values 5 | //! 6 | //! NB: this example is deliberately kept simple: 7 | //! * it assumes that the XML file is UTF-8 encoded (custom_entities must only contain UTF-8 data) 8 | //! * it only handles internal entities; 9 | //! * the regex in this example is simple but brittle; 10 | //! * it does not support the use of entities in entity declaration. 11 | 12 | use std::borrow::Cow; 13 | use std::collections::{HashMap, VecDeque}; 14 | use std::str::from_utf8; 15 | 16 | use quick_xml::encoding::Decoder; 17 | use quick_xml::errors::Error; 18 | use quick_xml::escape::EscapeError; 19 | use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event}; 20 | use quick_xml::name::QName; 21 | use quick_xml::reader::Reader; 22 | use regex::bytes::Regex; 23 | 24 | use pretty_assertions::assert_eq; 25 | 26 | struct MyReader<'i> { 27 | /// Stack of readers, the first element is the initial reader, the other are 28 | /// readers created for each resolved entity 29 | readers: VecDeque>, 30 | /// Map of captured internal _parsed general entities_. _Parsed_ means that 31 | /// value of the entity is parsed by XML reader 32 | entities: HashMap<&'i [u8], &'i [u8]>, 33 | /// In this example we use simple regular expression to capture entities from DTD. 34 | /// In real application you should use DTD parser. 35 | entity_re: Regex, 36 | } 37 | impl<'i> MyReader<'i> { 38 | fn new(input: &'i str) -> Result { 39 | let mut reader = Reader::from_str(input); 40 | reader.config_mut().trim_text(true); 41 | 42 | let mut readers = VecDeque::new(); 43 | readers.push_back(reader); 44 | 45 | // Capture "name" and "content" from such string: 46 | // 47 | let entity_re = Regex::new(r#""#)?; 48 | Ok(Self { 49 | readers, 50 | entities: HashMap::new(), 51 | entity_re, 52 | }) 53 | } 54 | fn read_event(&mut self) -> Result, Error> { 55 | loop { 56 | if let Some(mut reader) = self.readers.pop_back() { 57 | match dbg!(reader.read_event())? { 58 | // Capture defined entities from the DTD inside document and skip that event 59 | Event::DocType(e) => { 60 | self.readers.push_back(reader); 61 | self.capture(e); 62 | continue; 63 | } 64 | // When entity is referenced, create new reader with the same settings as 65 | // the current reader have and push it to the top of stack. Then try to 66 | // read next event from it (on next iteration) 67 | Event::GeneralRef(e) => { 68 | if let Some(ch) = e.resolve_char_ref()? { 69 | self.readers.push_back(reader); 70 | return Ok(Event::Text(BytesText::from_escaped(ch.to_string()))); 71 | } 72 | let mut r = Reader::from_reader(self.resolve(&e)?); 73 | *r.config_mut() = reader.config().clone(); 74 | 75 | self.readers.push_back(reader); 76 | self.readers.push_back(r); 77 | continue; 78 | } 79 | // When reader is exhausted, do not return it to the stack 80 | Event::Eof => continue, 81 | 82 | // Return all other events to caller 83 | e => { 84 | self.readers.push_back(reader); 85 | return Ok(e); 86 | } 87 | } 88 | } 89 | return Ok(Event::Eof); 90 | } 91 | } 92 | 93 | /// In this example we use simple regular expression to capture entities from DTD. 94 | /// In real application you should use DTD parser 95 | fn capture(&mut self, doctype: BytesText<'i>) { 96 | let doctype = match doctype.into_inner() { 97 | Cow::Borrowed(doctype) => doctype, 98 | Cow::Owned(_) => unreachable!("We are sure that event will be borrowed"), 99 | }; 100 | for cap in self.entity_re.captures_iter(doctype) { 101 | self.entities.insert( 102 | cap.get(1).unwrap().as_bytes(), 103 | cap.get(2).unwrap().as_bytes(), 104 | ); 105 | } 106 | } 107 | 108 | fn resolve(&self, entity: &[u8]) -> Result<&'i [u8], EscapeError> { 109 | match self.entities.get(entity) { 110 | Some(replacement) => Ok(replacement), 111 | None => Err(EscapeError::UnrecognizedEntity( 112 | 0..0, 113 | String::from_utf8_lossy(entity).into_owned(), 114 | )), 115 | } 116 | } 117 | 118 | fn get_entity(&self, entity: &str) -> Option<&'i str> { 119 | self.entities 120 | .get(entity.as_bytes()) 121 | // SAFETY: We are sure that slices are correct UTF-8 because we get 122 | // them from rust string 123 | .map(|value| from_utf8(value).unwrap()) 124 | } 125 | 126 | fn decoder(&self) -> Decoder { 127 | self.readers.back().unwrap().decoder() 128 | } 129 | } 130 | 131 | fn main() -> Result<(), Box> { 132 | let mut reader = MyReader::new( 133 | r#" 134 | 136 | " > 137 | &element1;" > 138 | ]> 139 | '&element2;' 140 | "#, 141 | )?; 142 | 143 | let event = reader.read_event()?; 144 | assert_eq!( 145 | event, 146 | Event::Start(BytesStart::from_content( 147 | r#"test label="Message: &text;""#, 148 | 4 149 | )) 150 | ); 151 | if let Event::Start(e) = event { 152 | let mut attrs = e.attributes(); 153 | 154 | let label = attrs.next().unwrap()?; 155 | assert_eq!(label.key, QName(b"label")); 156 | assert_eq!( 157 | label.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?, 158 | "Message: hello world" 159 | ); 160 | 161 | assert_eq!(attrs.next(), None); 162 | } 163 | 164 | // This is decoded decimal character reference ' 165 | assert_eq!( 166 | reader.read_event()?, 167 | Event::Text(BytesText::from_escaped("'")) 168 | ); 169 | 170 | //-------------------------------------------------------------------------- 171 | // This part was inserted into original document from entity defined in DTD 172 | 173 | assert_eq!(reader.read_event()?, Event::Start(BytesStart::new("a"))); 174 | let event = reader.read_event()?; 175 | assert_eq!( 176 | event, 177 | Event::Empty(BytesStart::from_content( 178 | r#"dtd attr = 'Message: &text;'"#, 179 | 3 180 | )) 181 | ); 182 | if let Event::Start(e) = event { 183 | let mut attrs = e.attributes(); 184 | 185 | let attr = attrs.next().unwrap()?; 186 | assert_eq!(attr.key, QName(b"attr")); 187 | assert_eq!( 188 | attr.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?, 189 | "Message: hello world" 190 | ); 191 | 192 | assert_eq!(attrs.next(), None); 193 | } 194 | assert_eq!(reader.read_event()?, Event::End(BytesEnd::new("a"))); 195 | //-------------------------------------------------------------------------- 196 | 197 | // This is decoded hexadecimal character reference ' 198 | assert_eq!( 199 | reader.read_event()?, 200 | Event::Text(BytesText::from_escaped("'")) 201 | ); 202 | 203 | assert_eq!(reader.read_event()?, Event::End(BytesEnd::new("test"))); 204 | assert_eq!(reader.read_event()?, Event::Eof); 205 | 206 | Ok(()) 207 | } 208 | -------------------------------------------------------------------------------- /examples/flattened_enum.rs: -------------------------------------------------------------------------------- 1 | //! This example demonstrates how to deserialize and serialize enum nodes using an intermediate 2 | //! custom deserializer and seralizer. 3 | //! The `elem` node can either be a `Foo` or a `Bar` node, depending on the `type`. 4 | //! The `type` attribute is used to determine which variant to deserialize. 5 | //! This is a workaround for [serde's issue](https://github.com/serde-rs/serde/issues/1905) 6 | //! 7 | //! note: to use serde, the feature needs to be enabled 8 | //! run example with: 9 | //! cargo run --example flattened_enum --features="serialize" 10 | 11 | use std::fmt; 12 | 13 | use quick_xml::de::from_str; 14 | use quick_xml::se::to_string_with_root; 15 | use serde::de::value::MapAccessDeserializer; 16 | use serde::de::{Error, MapAccess, Visitor}; 17 | use serde::ser::SerializeMap; 18 | use serde::{Deserialize, Serialize}; 19 | 20 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 21 | struct Model { 22 | elem: Vec, 23 | } 24 | 25 | #[derive(Debug, PartialEq)] 26 | enum Elem { 27 | Foo(Foo), 28 | Bar(Bar), 29 | } 30 | 31 | impl<'de> Deserialize<'de> for Elem { 32 | fn deserialize(deserializer: D) -> Result 33 | where 34 | D: serde::Deserializer<'de>, 35 | { 36 | struct ElemVisitor; 37 | 38 | impl<'de> Visitor<'de> for ElemVisitor { 39 | type Value = Elem; 40 | 41 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 42 | formatter.write_str("an object with a `type` field") 43 | } 44 | 45 | fn visit_map(self, mut map: A) -> Result 46 | where 47 | A: MapAccess<'de>, 48 | { 49 | if let Some((key, value)) = map.next_entry::()? { 50 | return match key.as_str() { 51 | "@type" => match value.as_str() { 52 | "foo" => { 53 | let f = Foo::deserialize(MapAccessDeserializer::new(map))?; 54 | Ok(Elem::Foo(f)) 55 | } 56 | "bar" => { 57 | let f = Bar::deserialize(MapAccessDeserializer::new(map))?; 58 | Ok(Elem::Bar(f)) 59 | } 60 | t => Err(Error::custom(format!("unknown type attribute `{t}`"))), 61 | }, 62 | a => Err(Error::custom(format!( 63 | "expected attribute `type`, but found `{a}`" 64 | ))), 65 | }; 66 | } 67 | Err(Error::custom("expected `type` attribute")) 68 | } 69 | } 70 | deserializer.deserialize_map(ElemVisitor) 71 | } 72 | } 73 | 74 | impl Serialize for Elem { 75 | fn serialize(&self, serializer: S) -> Result 76 | where 77 | S: serde::Serializer, 78 | { 79 | match &self { 80 | Elem::Foo(f) => { 81 | let mut state = serializer.serialize_map(Some(3))?; 82 | state.serialize_entry("@type", "foo")?; 83 | state.serialize_entry("a", &f.a)?; 84 | state.serialize_entry("subfoo", &f.subfoo)?; 85 | state.end() 86 | } 87 | Elem::Bar(b) => { 88 | let mut state = serializer.serialize_map(Some(2))?; 89 | state.serialize_entry("@type", "bar")?; 90 | state.serialize_entry("b", &b.b)?; 91 | state.end() 92 | } 93 | } 94 | } 95 | } 96 | 97 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 98 | struct Foo { 99 | a: String, 100 | subfoo: SubFoo, 101 | } 102 | 103 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 104 | struct SubFoo { 105 | a1: String, 106 | a2: String, 107 | a3: String, 108 | } 109 | 110 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 111 | struct Bar { 112 | b: String, 113 | } 114 | 115 | fn main() { 116 | let x = r#" 117 | 118 | 119 | 1 120 | 121 | 2 122 | 42 123 | 1337 124 | 125 | 126 | 127 | 22 128 | 129 | 130 | "#; 131 | 132 | let model: Model = from_str(&x).unwrap(); 133 | println!("{:?}", model); 134 | // Model { elem: [Foo(Foo { a: "1", subfoo: SubFoo { a1: "2", a2: "42", a3: "1337" } }), Bar(Bar { b: "22" })] } 135 | 136 | let x = to_string_with_root("model", &model).unwrap(); 137 | println!("{}", x); 138 | // 1242133722 139 | } 140 | -------------------------------------------------------------------------------- /examples/nested_readers.rs: -------------------------------------------------------------------------------- 1 | use pretty_assertions::assert_eq; 2 | use quick_xml::events::Event; 3 | use quick_xml::reader::Reader; 4 | 5 | // a structure to capture the rows we've extracted 6 | // from a ECMA-376 table in document.xml 7 | #[derive(Debug, Clone)] 8 | struct TableStat { 9 | index: u8, 10 | rows: Vec>, 11 | } 12 | // demonstrate how to nest readers 13 | // This is useful for when you need to traverse 14 | // a few levels of a document to extract things. 15 | fn main() -> Result<(), quick_xml::Error> { 16 | let mut buf = Vec::new(); 17 | // buffer for nested reader 18 | let mut skip_buf = Vec::new(); 19 | let mut count = 0; 20 | let mut reader = Reader::from_file("tests/documents/document.xml")?; 21 | let mut found_tables = Vec::new(); 22 | loop { 23 | match reader.read_event_into(&mut buf)? { 24 | Event::Start(element) => { 25 | if let b"w:tbl" = element.name().as_ref() { 26 | count += 1; 27 | let mut stats = TableStat { 28 | index: count, 29 | rows: vec![], 30 | }; 31 | // must define stateful variables 32 | // outside the nested loop else they are overwritten 33 | let mut row_index = 0; 34 | loop { 35 | skip_buf.clear(); 36 | match reader.read_event_into(&mut skip_buf)? { 37 | Event::Start(element) => match element.name().as_ref() { 38 | b"w:tr" => { 39 | stats.rows.push(vec![]); 40 | row_index = stats.rows.len() - 1; 41 | } 42 | b"w:tc" => { 43 | stats.rows[row_index].push( 44 | String::from_utf8(element.name().as_ref().to_vec()) 45 | .unwrap(), 46 | ); 47 | } 48 | _ => {} 49 | }, 50 | Event::End(element) => { 51 | if element.name().as_ref() == b"w:tbl" { 52 | found_tables.push(stats); 53 | break; 54 | } 55 | } 56 | _ => {} 57 | } 58 | } 59 | } 60 | } 61 | Event::Eof => break, 62 | _ => {} 63 | } 64 | buf.clear(); 65 | } 66 | assert_eq!(found_tables.len(), 2); 67 | // pretty print the table 68 | println!("{:#?}", found_tables); 69 | assert_eq!(found_tables[0].index, 2); 70 | assert_eq!(found_tables[0].rows.len(), 2); 71 | assert_eq!(found_tables[0].rows[0].len(), 4); 72 | assert_eq!(found_tables[0].rows[1].len(), 4); 73 | 74 | assert_eq!(found_tables[1].index, 2); 75 | assert_eq!(found_tables[1].rows.len(), 2); 76 | assert_eq!(found_tables[1].rows[0].len(), 4); 77 | assert_eq!(found_tables[1].rows[1].len(), 4); 78 | Ok(()) 79 | } 80 | -------------------------------------------------------------------------------- /examples/read_buffered.rs: -------------------------------------------------------------------------------- 1 | // This example demonstrates how a reader (for example when reading from a file) 2 | // can be buffered. In that case, data read from the file is written to a supplied 3 | // buffer and returned XML events borrow from that buffer. 4 | // That way, allocations can be kept to a minimum. 5 | 6 | fn main() -> Result<(), quick_xml::Error> { 7 | use quick_xml::events::Event; 8 | use quick_xml::reader::Reader; 9 | 10 | let mut reader = Reader::from_file("tests/documents/document.xml")?; 11 | reader.config_mut().trim_text(true); 12 | 13 | let mut buf = Vec::new(); 14 | 15 | let mut count = 0; 16 | 17 | loop { 18 | match reader.read_event_into(&mut buf) { 19 | Ok(Event::Start(ref e)) => { 20 | let name = e.name(); 21 | let name = reader.decoder().decode(name.as_ref())?; 22 | println!("read start event {:?}", name.as_ref()); 23 | count += 1; 24 | } 25 | Ok(Event::Eof) => break, // exits the loop when reaching end of file 26 | Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), 27 | _ => (), // There are several other `Event`s we do not consider here 28 | } 29 | } 30 | 31 | println!("read {} start events in total", count); 32 | 33 | Ok(()) 34 | } 35 | -------------------------------------------------------------------------------- /examples/read_nodes.rs: -------------------------------------------------------------------------------- 1 | // example that separates logic for reading different top-level nodes of xml tree 2 | // Note: for this specific data set using serde feature would simplify 3 | // this simple data is purely to make it easier to understand the code 4 | 5 | use quick_xml::events::attributes::AttrError; 6 | use quick_xml::events::{BytesStart, Event}; 7 | use quick_xml::name::QName; 8 | use quick_xml::reader::Reader; 9 | use std::borrow::Cow; 10 | use std::collections::HashMap; 11 | use std::convert::Infallible; 12 | use std::str; 13 | 14 | const XML: &str = r#" 15 | 16 | 17 | 18 | 19 | こんにちは 20 | 21 | 22 | さようなら 23 | 24 | 25 | Hola 26 | 27 | 28 | Adiós 29 | 30 | 31 | "#; 32 | 33 | // Enum variants is not read in example, so suppress the warning 34 | #[allow(dead_code)] 35 | #[derive(Debug)] 36 | enum AppError { 37 | /// XML parsing error 38 | Xml(quick_xml::Error), 39 | /// The `Translation/Text` node is missed 40 | NoText(String), 41 | } 42 | 43 | impl From for AppError { 44 | fn from(error: quick_xml::Error) -> Self { 45 | Self::Xml(error) 46 | } 47 | } 48 | 49 | impl From for AppError { 50 | fn from(error: AttrError) -> Self { 51 | Self::Xml(quick_xml::Error::InvalidAttr(error)) 52 | } 53 | } 54 | 55 | #[derive(Debug)] 56 | struct Translation { 57 | tag: String, 58 | lang: String, 59 | text: String, 60 | } 61 | 62 | impl Translation { 63 | fn new_from_element( 64 | reader: &mut Reader<&[u8]>, 65 | element: BytesStart, 66 | ) -> Result { 67 | let mut tag = Cow::Borrowed(""); 68 | let mut lang = Cow::Borrowed(""); 69 | 70 | for attr_result in element.attributes() { 71 | let a = attr_result?; 72 | match a.key.as_ref() { 73 | b"Language" => lang = a.decode_and_unescape_value(reader.decoder())?, 74 | b"Tag" => tag = a.decode_and_unescape_value(reader.decoder())?, 75 | _ => (), 76 | } 77 | } 78 | let mut element_buf = Vec::new(); 79 | let event = reader.read_event_into(&mut element_buf)?; 80 | 81 | if let Event::Start(ref e) = event { 82 | let name = e.name(); 83 | if name == QName(b"Text") { 84 | // note: `read_text` does not support content as CDATA 85 | let text_content = reader.read_text(e.name())?; 86 | Ok(Translation { 87 | tag: tag.into(), 88 | lang: lang.into(), 89 | text: text_content.into(), 90 | }) 91 | } else { 92 | dbg!("Expected Event::Start for Text, got: {:?}", &event); 93 | let name_string = reader 94 | .decoder() 95 | .decode(name.as_ref()) 96 | .map_err(quick_xml::Error::Encoding)?; 97 | Err(AppError::NoText(name_string.into())) 98 | } 99 | } else { 100 | let event_string = format!("{:?}", event); 101 | Err(AppError::NoText(event_string)) 102 | } 103 | } 104 | } 105 | 106 | fn main() -> Result<(), AppError> { 107 | // In a real-world use case, Settings would likely be a struct 108 | // HashMap here is just to make the sample code short 109 | let mut settings: HashMap; 110 | let mut translations: Vec = Vec::new(); 111 | 112 | let mut reader = Reader::from_str(XML); 113 | let config = reader.config_mut(); 114 | 115 | config.trim_text(true); 116 | // == Handling empty elements == 117 | // To simply our processing code 118 | // we want the same events for empty elements, like: 119 | // 120 | // 121 | config.expand_empty_elements = true; 122 | 123 | let mut buf = Vec::new(); 124 | 125 | loop { 126 | let event = reader.read_event_into(&mut buf)?; 127 | 128 | match event { 129 | Event::Start(element) => match element.name().as_ref() { 130 | b"DefaultSettings" => { 131 | // Note: real app would handle errors with good defaults or halt program with nice message 132 | // This illustrates decoding an attribute's key and value with error handling 133 | settings = element 134 | .attributes() 135 | .map(|attr_result| { 136 | match attr_result { 137 | Ok(a) => { 138 | let key = reader.decoder().decode(a.key.local_name().as_ref()) 139 | .or_else(|err| { 140 | dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err); 141 | Ok::, Infallible>(std::borrow::Cow::from("")) 142 | }) 143 | .unwrap().to_string(); 144 | let value = a.decode_and_unescape_value(reader.decoder()).or_else(|err| { 145 | dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err); 146 | Ok::, Infallible>(std::borrow::Cow::from("")) 147 | }).unwrap().to_string(); 148 | (key, value) 149 | }, 150 | Err(err) => { 151 | dbg!("unable to read key in DefaultSettings, err = {:?}", err); 152 | (String::new(), String::new()) 153 | } 154 | } 155 | }) 156 | .collect(); 157 | assert_eq!(settings["Language"], "es"); 158 | assert_eq!(settings["Greeting"], "HELLO"); 159 | reader.read_to_end(element.name())?; 160 | } 161 | b"Translation" => { 162 | translations.push(Translation::new_from_element(&mut reader, element)?); 163 | } 164 | _ => (), 165 | }, 166 | 167 | Event::Eof => break, // exits the loop when reaching end of file 168 | _ => (), // There are `Event` types not considered here 169 | } 170 | } 171 | dbg!("{:?}", &translations); 172 | assert_eq!(translations.len(), 4); 173 | assert_eq!(translations[2].tag, "HELLO"); 174 | assert_eq!(translations[2].text, "Hola"); 175 | assert_eq!(translations[2].lang, "es"); 176 | 177 | Ok(()) 178 | } 179 | -------------------------------------------------------------------------------- /examples/read_nodes_serde.rs: -------------------------------------------------------------------------------- 1 | // note: to use serde, the feature needs to be enabled 2 | // run example with: 3 | // cargo run --example read_nodes_serde --features="serialize" 4 | 5 | use quick_xml::de::from_str; 6 | use serde::Deserialize; 7 | 8 | #[derive(Debug, PartialEq, Default, Deserialize)] 9 | #[serde(default)] 10 | struct Translation { 11 | #[serde(rename = "@Tag")] 12 | tag: String, 13 | #[serde(rename = "@Language")] 14 | lang: String, 15 | #[serde(rename = "$text")] 16 | text: String, 17 | } 18 | 19 | #[derive(Debug, PartialEq, Default, Deserialize)] 20 | #[serde(default)] 21 | struct DefaultSettings { 22 | #[serde(rename = "@Language")] 23 | language: String, 24 | #[serde(rename = "@Greeting")] 25 | greeting: String, 26 | } 27 | 28 | #[derive(Debug, PartialEq, Default, Deserialize)] 29 | #[serde(default, rename_all = "PascalCase")] 30 | struct Config { 31 | #[serde(rename = "DefaultSettings")] 32 | settings: DefaultSettings, 33 | localization: Localization, 34 | } 35 | #[derive(Debug, PartialEq, Default, Deserialize)] 36 | #[serde(rename_all = "PascalCase")] 37 | struct Localization { 38 | translation: Vec, 39 | } 40 | 41 | const XML: &str = r#" 42 | 43 | 44 | 45 | 46 | 47 | こんにちは 48 | 49 | 50 | さようなら 51 | 52 | 53 | Hola 54 | 55 | 56 | Adiós 57 | 58 | 59 | 60 | "#; 61 | 62 | const ONE_TRANSLATION_XML: &str = r#" 63 | 64 | こんにちは 65 | 66 | "#; 67 | 68 | fn main() -> Result<(), quick_xml::DeError> { 69 | let t: Translation = from_str(ONE_TRANSLATION_XML)?; 70 | assert_eq!(t.tag, "HELLO"); 71 | assert_eq!(t.lang, "ja"); 72 | assert_eq!(t.text, "こんにちは"); 73 | 74 | let config: Config = from_str(XML)?; 75 | dbg!("{:?}", &config); 76 | 77 | assert_eq!(config.settings.language, "es"); 78 | assert_eq!(config.settings.greeting, "HELLO"); 79 | 80 | let translations = config.localization.translation; 81 | assert_eq!(translations.len(), 4); 82 | assert_eq!(translations[2].tag, "HELLO"); 83 | assert_eq!(translations[2].text, "Hola"); 84 | assert_eq!(translations[2].lang, "es"); 85 | Ok(()) 86 | } 87 | -------------------------------------------------------------------------------- /examples/read_texts.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | use quick_xml::events::Event; 3 | use quick_xml::reader::Reader; 4 | 5 | let xml = "text1text2\ 6 | text3text4"; 7 | 8 | let mut reader = Reader::from_str(xml); 9 | reader.config_mut().trim_text(true); 10 | 11 | loop { 12 | match reader.read_event() { 13 | Ok(Event::Start(e)) if e.name().as_ref() == b"tag2" => { 14 | // read_text_into for buffered readers not implemented 15 | let txt = reader 16 | .read_text(e.name()) 17 | .expect("Cannot decode text value"); 18 | println!("{:?}", txt); 19 | } 20 | Ok(Event::Eof) => break, // exits the loop when reaching end of file 21 | Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), 22 | _ => (), // There are several other `Event`s we do not consider here 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | coverage 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "quick-xml-fuzz" 3 | version = "0.0.0" 4 | authors = ["Automatically generated"] 5 | publish = false 6 | edition = "2021" 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | arbitrary = { version = "1.3", features = ["derive"] } 13 | libfuzzer-sys = "0.4" 14 | 15 | [dependencies.quick-xml] 16 | path = ".." 17 | features = ["arbitrary"] 18 | 19 | # Prevent this from interfering with workspaces 20 | [workspace] 21 | members = ["."] 22 | 23 | [[bin]] 24 | name = "fuzz_target_1" 25 | path = "fuzz_targets/fuzz_target_1.rs" 26 | test = false 27 | doc = false 28 | 29 | [[bin]] 30 | name = "structured_roundtrip" 31 | path = "fuzz_targets/structured_roundtrip.rs" 32 | test = false 33 | doc = false 34 | -------------------------------------------------------------------------------- /fuzz/README.md: -------------------------------------------------------------------------------- 1 | Run fuzzing with `-O` to avoid false positives at `debug_assert!`, e.g.: 2 | 3 | ```bash 4 | cargo fuzz run -O -j4 fuzz_target_1 5 | ``` 6 | 7 | See also: https://github.com/rust-fuzz/cargo-fuzz 8 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fuzz_target_1.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | use libfuzzer_sys::fuzz_target; 3 | use std::hint::black_box; 4 | 5 | use quick_xml::{events::Event, reader::Reader, writer::Writer}; 6 | use std::io::Cursor; 7 | 8 | macro_rules! debug_format { 9 | ($x:expr) => { 10 | let _unused = std::hint::black_box(format!("{:?}", $x)); 11 | }; 12 | } 13 | 14 | fn round_trip(reader: &mut Reader) -> () 15 | where 16 | R: std::io::BufRead, 17 | { 18 | let mut writer = Writer::new(Cursor::new(Vec::new())); 19 | let mut buf = vec![]; 20 | let config = reader.config_mut(); 21 | config.expand_empty_elements = true; 22 | config.trim_text(true); 23 | loop { 24 | let event_result = reader.read_event_into(&mut buf); 25 | if let Ok(ref event) = event_result { 26 | let _event = black_box(event.borrow()); 27 | let _event = black_box(event.as_ref()); 28 | debug_format!(event); 29 | debug_format!(writer.write_event(event.borrow())); 30 | } 31 | match event_result { 32 | Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { 33 | debug_format!(e); 34 | debug_format!(e.name()); 35 | for a in e.attributes() { 36 | debug_format!(a); 37 | if a.ok().map_or(false, |a| a.unescape_value().is_err()) { 38 | break; 39 | } 40 | } 41 | } 42 | Ok(Event::Text(ref e)) 43 | | Ok(Event::Comment(ref e)) 44 | | Ok(Event::DocType(ref e)) => { 45 | debug_format!(e); 46 | if let Err(err) = e.decode() { 47 | debug_format!(err); 48 | break; 49 | } 50 | } 51 | Ok(Event::CData(e)) => { 52 | if let Err(err) = e.escape() { 53 | let _displayed = black_box(format!("{}", err)); 54 | debug_format!(err); 55 | break; 56 | } 57 | } 58 | Ok(Event::GeneralRef(ref e)) => { 59 | debug_format!(e); 60 | debug_format!(e.is_char_ref()); 61 | debug_format!(e.resolve_char_ref()); 62 | } 63 | Ok(Event::PI(ref e)) => { 64 | debug_format!(e); 65 | } 66 | Ok(Event::Decl(ref e)) => { 67 | debug_format!(e); 68 | let _ = black_box(e.version()); 69 | let _ = black_box(e.encoding()); 70 | let _ = black_box(e.standalone()); 71 | } 72 | Ok(Event::End(e)) => { 73 | debug_format!(e.local_name()); 74 | let name = e.name(); 75 | debug_format!(name); 76 | debug_format!(name.prefix()); 77 | debug_format!(name.local_name()); 78 | debug_format!(name.decompose()); 79 | debug_format!(name.as_namespace_binding()); 80 | debug_format!(e); 81 | } 82 | Err(e) => { 83 | debug_format!(e); 84 | break; 85 | } 86 | Ok(Event::Eof) => break, 87 | } 88 | buf.clear(); 89 | } 90 | let _round_trip = std::hint::black_box(writer.into_inner().into_inner()); 91 | } 92 | 93 | fuzz_target!(|data: &[u8]| { 94 | // From reader 95 | let cursor = Cursor::new(data); 96 | let mut reader = Reader::from_reader(cursor); 97 | _ = std::hint::black_box(round_trip(&mut reader)); 98 | 99 | // From str 100 | if let Ok(s) = std::str::from_utf8(data) { 101 | let mut reader = Reader::from_str(s); 102 | _ = std::hint::black_box(round_trip(&mut reader)); 103 | } 104 | }); 105 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/structured_roundtrip.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use arbitrary::{Arbitrary, Unstructured}; 4 | use libfuzzer_sys::fuzz_target; 5 | use quick_xml::events::{BytesCData, BytesPI, BytesText, Event}; 6 | use quick_xml::reader::{Config, NsReader, Reader}; 7 | use quick_xml::writer::Writer; 8 | use std::{hint::black_box, io::Cursor}; 9 | 10 | #[derive(Debug, arbitrary::Arbitrary)] 11 | enum ElementWriterFunc<'a> { 12 | WriteTextContent(&'a str), 13 | WriteCDataContent(&'a str), 14 | WritePiContent(&'a str), 15 | WriteEmpty, 16 | // TODO: We can't automatically generate an arbitrary function 17 | // WriteInnerContent, 18 | } 19 | 20 | fn arbitrary_name(u: &mut Unstructured) -> arbitrary::Result { 21 | let s = String::arbitrary(u)?; 22 | if s.is_empty() || !s.chars().all(char::is_alphanumeric) { 23 | return Err(arbitrary::Error::IncorrectFormat); 24 | } 25 | return Ok(s); 26 | } 27 | 28 | #[derive(Debug, arbitrary::Arbitrary)] 29 | enum WriterFunc<'a> { 30 | WriteEvent(Event<'a>), 31 | WriteBom, 32 | WriteIndent, 33 | CreateElement { 34 | #[arbitrary(with = arbitrary_name)] 35 | name: String, 36 | func: ElementWriterFunc<'a>, 37 | attributes: Vec<(&'a str, &'a str)>, 38 | }, 39 | } 40 | 41 | #[derive(Debug, arbitrary::Arbitrary)] 42 | struct Driver<'a> { 43 | writer_funcs: Vec>, 44 | reader_config: Config, 45 | } 46 | 47 | fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> { 48 | let mut writer = Writer::new(Cursor::new(Vec::new())); 49 | let writer_funcs = driver.writer_funcs; 50 | for writer_func in writer_funcs.iter() { 51 | // TODO: Handle error cases. 52 | use WriterFunc::*; 53 | match writer_func { 54 | WriteEvent(event) => writer.write_event(event.borrow())?, 55 | WriteBom => writer.write_bom()?, 56 | WriteIndent => writer.write_indent()?, 57 | CreateElement { 58 | name, 59 | func, 60 | attributes, 61 | } => { 62 | let element_writer = writer 63 | .create_element(name) 64 | .with_attributes(attributes.into_iter().copied()); 65 | use ElementWriterFunc::*; 66 | match func { 67 | WriteTextContent(text) => { 68 | element_writer.write_text_content(BytesText::from_escaped(*text))?; 69 | } 70 | WriteCDataContent(text) => { 71 | _ = element_writer.write_cdata_content(BytesCData::new(*text))?; 72 | } 73 | WritePiContent(text) => { 74 | _ = element_writer.write_pi_content(BytesPI::new(*text))?; 75 | } 76 | WriteEmpty => { 77 | _ = element_writer.write_empty()?; 78 | } 79 | } 80 | } 81 | } 82 | } 83 | let xml = writer.into_inner().into_inner(); 84 | // The str should be valid as we just generated it, unwrapping **should** be safe. 85 | let mut reader = Reader::from_str(std::str::from_utf8(&xml).unwrap()); 86 | *reader.config_mut() = driver.reader_config.clone(); 87 | 88 | loop { 89 | let event = black_box(reader.read_event()?); 90 | if event == Event::Eof { 91 | break; 92 | } 93 | } 94 | 95 | let mut reader = NsReader::from_reader(&xml[..]); 96 | *reader.config_mut() = driver.reader_config; 97 | 98 | loop { 99 | let event = black_box(reader.read_event()?); 100 | if event == Event::Eof { 101 | break; 102 | } 103 | } 104 | Ok(()) 105 | } 106 | 107 | fuzz_target!(|driver: Driver| { 108 | if let Err(e) = fuzz_round_trip(driver) { 109 | black_box(format!("{e:?}")); 110 | } 111 | }); 112 | -------------------------------------------------------------------------------- /src/de/attributes.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of the deserializer from attributes 2 | 3 | use std::borrow::Cow; 4 | 5 | use serde::de::{DeserializeSeed, Deserializer, Error, IntoDeserializer, MapAccess, Visitor}; 6 | use serde::forward_to_deserialize_any; 7 | 8 | use crate::de::key::QNameDeserializer; 9 | use crate::de::SimpleTypeDeserializer; 10 | use crate::errors::serialize::DeError; 11 | use crate::events::attributes::Attributes; 12 | 13 | impl<'i> Attributes<'i> { 14 | /// Converts this iterator into a serde's [`MapAccess`] trait to use with serde. 15 | /// The returned object also implements the [`Deserializer`] trait. 16 | /// 17 | /// # Parameters 18 | /// - `prefix`: a prefix of the field names in structs that should be stripped 19 | /// to get the local attribute name. The [`crate::de::Deserializer`] uses `"@"` 20 | /// as a prefix, but [`Self::into_deserializer()`] uses empy string, which mean 21 | /// that we do not strip anything. 22 | /// 23 | /// # Example 24 | /// ``` 25 | /// # use pretty_assertions::assert_eq; 26 | /// use quick_xml::events::BytesStart; 27 | /// use serde::Deserialize; 28 | /// use serde::de::IntoDeserializer; 29 | /// 30 | /// #[derive(Debug, PartialEq, Deserialize)] 31 | /// struct MyData<'i> { 32 | /// question: &'i str, 33 | /// answer: u32, 34 | /// } 35 | /// 36 | /// #[derive(Debug, PartialEq, Deserialize)] 37 | /// struct MyDataPrefixed<'i> { 38 | /// #[serde(rename = "@question")] question: &'i str, 39 | /// #[serde(rename = "@answer")] answer: u32, 40 | /// } 41 | /// 42 | /// let tag = BytesStart::from_content( 43 | /// "tag 44 | /// question = 'The Ultimate Question of Life, the Universe, and Everything' 45 | /// answer = '42'", 46 | /// 3 47 | /// ); 48 | /// // Strip nothing from the field names 49 | /// let de = tag.attributes().clone().into_deserializer(); 50 | /// assert_eq!( 51 | /// MyData::deserialize(de).unwrap(), 52 | /// MyData { 53 | /// question: "The Ultimate Question of Life, the Universe, and Everything", 54 | /// answer: 42, 55 | /// } 56 | /// ); 57 | /// 58 | /// // Strip "@" from the field name 59 | /// let de = tag.attributes().into_map_access("@"); 60 | /// assert_eq!( 61 | /// MyDataPrefixed::deserialize(de).unwrap(), 62 | /// MyDataPrefixed { 63 | /// question: "The Ultimate Question of Life, the Universe, and Everything", 64 | /// answer: 42, 65 | /// } 66 | /// ); 67 | /// ``` 68 | #[inline] 69 | pub const fn into_map_access(self, prefix: &'static str) -> AttributesDeserializer<'i> { 70 | AttributesDeserializer { 71 | iter: self, 72 | value: None, 73 | prefix, 74 | key_buf: String::new(), 75 | } 76 | } 77 | } 78 | 79 | impl<'de> IntoDeserializer<'de, DeError> for Attributes<'de> { 80 | type Deserializer = AttributesDeserializer<'de>; 81 | 82 | #[inline] 83 | fn into_deserializer(self) -> Self::Deserializer { 84 | self.into_map_access("") 85 | } 86 | } 87 | 88 | //////////////////////////////////////////////////////////////////////////////////////////////////// 89 | 90 | /// A deserializer used to make possible to pack all attributes into a struct. 91 | /// It is created by [`Attributes::into_map_access`] or [`Attributes::into_deserializer`] 92 | /// methods. 93 | /// 94 | /// This deserializer always call [`Visitor::visit_map`] with self as [`MapAccess`]. 95 | /// 96 | /// # Lifetime 97 | /// 98 | /// `'i` is a lifetime of the original buffer from which attributes were parsed. 99 | /// In particular, when reader was created from a string, this is lifetime of the 100 | /// string. 101 | #[derive(Debug, Clone)] 102 | pub struct AttributesDeserializer<'i> { 103 | iter: Attributes<'i>, 104 | /// The value of the attribute, read in last call to `next_key_seed`. 105 | value: Option>, 106 | /// This prefix will be stripped from struct fields before match against attribute name. 107 | prefix: &'static str, 108 | /// Buffer to store attribute name as a field name exposed to serde consumers. 109 | /// Keeped in the serializer to avoid many small allocations 110 | key_buf: String, 111 | } 112 | 113 | impl<'de> Deserializer<'de> for AttributesDeserializer<'de> { 114 | type Error = DeError; 115 | 116 | #[inline] 117 | fn deserialize_any(self, visitor: V) -> Result 118 | where 119 | V: Visitor<'de>, 120 | { 121 | visitor.visit_map(self) 122 | } 123 | 124 | forward_to_deserialize_any! { 125 | bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string 126 | bytes byte_buf option unit unit_struct newtype_struct seq tuple 127 | tuple_struct map struct enum identifier ignored_any 128 | } 129 | } 130 | 131 | impl<'de> MapAccess<'de> for AttributesDeserializer<'de> { 132 | type Error = DeError; 133 | 134 | fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> 135 | where 136 | K: DeserializeSeed<'de>, 137 | { 138 | debug_assert_eq!(self.value, None); 139 | 140 | match self.iter.next() { 141 | None => Ok(None), 142 | Some(Ok(attr)) => { 143 | self.value = Some(attr.value); 144 | self.key_buf.clear(); 145 | self.key_buf.push_str(self.prefix); 146 | let de = 147 | QNameDeserializer::from_attr(attr.key, self.iter.decoder(), &mut self.key_buf)?; 148 | seed.deserialize(de).map(Some) 149 | } 150 | Some(Err(err)) => Err(Error::custom(err)), 151 | } 152 | } 153 | 154 | fn next_value_seed(&mut self, seed: V) -> Result 155 | where 156 | V: DeserializeSeed<'de>, 157 | { 158 | match self.value.take() { 159 | Some(value) => { 160 | let de = 161 | SimpleTypeDeserializer::from_part(&value, 0..value.len(), self.iter.decoder()); 162 | seed.deserialize(de) 163 | } 164 | None => Err(DeError::KeyNotRead), 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/de/resolver.rs: -------------------------------------------------------------------------------- 1 | //! Entity resolver module 2 | 3 | use std::convert::Infallible; 4 | use std::error::Error; 5 | 6 | use crate::escape::resolve_predefined_entity; 7 | use crate::events::BytesText; 8 | 9 | /// Used to resolve unknown entities while parsing 10 | /// 11 | /// # Example 12 | /// 13 | /// ``` 14 | /// # use serde::Deserialize; 15 | /// # use pretty_assertions::assert_eq; 16 | /// use regex::bytes::Regex; 17 | /// use std::collections::BTreeMap; 18 | /// use std::string::FromUtf8Error; 19 | /// use quick_xml::de::{Deserializer, EntityResolver}; 20 | /// use quick_xml::events::BytesText; 21 | /// 22 | /// struct DocTypeEntityResolver { 23 | /// re: Regex, 24 | /// map: BTreeMap, 25 | /// } 26 | /// 27 | /// impl Default for DocTypeEntityResolver { 28 | /// fn default() -> Self { 29 | /// Self { 30 | /// // We do not focus on true parsing in this example 31 | /// // You should use special libraries to parse DTD 32 | /// re: Regex::new(r#""#).unwrap(), 33 | /// map: BTreeMap::new(), 34 | /// } 35 | /// } 36 | /// } 37 | /// 38 | /// impl EntityResolver for DocTypeEntityResolver { 39 | /// type Error = FromUtf8Error; 40 | /// 41 | /// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> { 42 | /// for cap in self.re.captures_iter(&doctype) { 43 | /// self.map.insert( 44 | /// String::from_utf8(cap[1].to_vec())?, 45 | /// String::from_utf8(cap[2].to_vec())?, 46 | /// ); 47 | /// } 48 | /// Ok(()) 49 | /// } 50 | /// 51 | /// fn resolve(&self, entity: &str) -> Option<&str> { 52 | /// self.map.get(entity).map(|s| s.as_str()) 53 | /// } 54 | /// } 55 | /// 56 | /// let xml_reader = br#" 57 | /// ]> 58 | /// 59 | /// &e1; 60 | /// 61 | /// "#.as_ref(); 62 | /// 63 | /// let mut de = Deserializer::with_resolver( 64 | /// xml_reader, 65 | /// DocTypeEntityResolver::default(), 66 | /// ); 67 | /// let data: BTreeMap = BTreeMap::deserialize(&mut de).unwrap(); 68 | /// 69 | /// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string())); 70 | /// ``` 71 | pub trait EntityResolver { 72 | /// The error type that represents DTD parse error 73 | type Error: Error; 74 | 75 | /// Called on contents of [`Event::DocType`] to capture declared entities. 76 | /// Can be called multiple times, for each parsed `` declaration. 77 | /// 78 | /// [`Event::DocType`]: crate::events::Event::DocType 79 | fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>; 80 | 81 | /// Called when an entity needs to be resolved. 82 | /// 83 | /// `None` is returned if a suitable value can not be found. 84 | /// In that case an [`EscapeError::UnrecognizedEntity`] will be returned by 85 | /// a deserializer. 86 | /// 87 | /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity 88 | fn resolve(&self, entity: &str) -> Option<&str>; 89 | } 90 | 91 | /// An [`EntityResolver`] that resolves only predefined entities: 92 | /// 93 | /// | Entity | Resolution 94 | /// |--------|------------ 95 | /// |`<` | `<` 96 | /// |`>` | `>` 97 | /// |`&` | `&` 98 | /// |`'`| `'` 99 | /// |`"`| `"` 100 | #[derive(Default, Copy, Clone)] 101 | pub struct PredefinedEntityResolver; 102 | 103 | impl EntityResolver for PredefinedEntityResolver { 104 | type Error = Infallible; 105 | 106 | #[inline] 107 | fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> { 108 | Ok(()) 109 | } 110 | 111 | #[inline] 112 | fn resolve(&self, entity: &str) -> Option<&str> { 113 | resolve_predefined_entity(entity) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/de/text.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | de::simple_type::SimpleTypeDeserializer, 3 | de::{Text, TEXT_KEY}, 4 | errors::serialize::DeError, 5 | utils::CowRef, 6 | }; 7 | use serde::de::value::BorrowedStrDeserializer; 8 | use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor}; 9 | use serde::serde_if_integer128; 10 | use std::borrow::Cow; 11 | 12 | /// A deserializer for a single text node of a mixed sequence of tags and text. 13 | /// 14 | /// This deserializer are very similar to a [`MapValueDeserializer`] (when it 15 | /// processes the [`DeEvent::Text`] event). The only difference in the 16 | /// `deserialize_seq` method. This deserializer will perform deserialization 17 | /// from a textual content, whereas the [`MapValueDeserializer`] will iterate 18 | /// over tags / text within it's parent tag. 19 | /// 20 | /// This deserializer processes items as following: 21 | /// - numbers are parsed from a text content using [`FromStr`]; in case of error 22 | /// [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], or [`Visitor::visit_string`] 23 | /// is called; it is responsibility of the type to return an error if it does 24 | /// not able to process passed data; 25 | /// - booleans converted from the text according to the XML [specification]: 26 | /// - `"true"` and `"1"` converted to `true`; 27 | /// - `"false"` and `"0"` converted to `false`; 28 | /// - everything else calls [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], 29 | /// or [`Visitor::visit_string`]; it is responsibility of the type to return 30 | /// an error if it does not able to process passed data; 31 | /// - strings returned as is; 32 | /// - characters also returned as strings. If string contain more than one character 33 | /// or empty, it is responsibility of a type to return an error; 34 | /// - `Option`: 35 | /// - empty text is deserialized as `None`; 36 | /// - everything else is deserialized as `Some` using the same deserializer; 37 | /// - units (`()`) and unit structs always deserialized successfully, the content is ignored; 38 | /// - newtype structs forwards deserialization to the inner type using the same 39 | /// deserializer; 40 | /// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`] 41 | /// (this is the difference): text content passed to the deserializer directly; 42 | /// - structs and maps calls [`Visitor::visit_borrowed_str`] or [`Visitor::visit_string`], 43 | /// it is responsibility of the type to return an error if it do not able to process 44 | /// this data; 45 | /// - enums: 46 | /// - the variant name is deserialized as `$text`; 47 | /// - the content is deserialized using the same deserializer: 48 | /// - unit variants: just return `()`; 49 | /// - newtype variants forwards deserialization to the inner type using the 50 | /// same deserializer; 51 | /// - tuple and struct variants are deserialized using [`SimpleTypeDeserializer`]. 52 | /// 53 | /// [`MapValueDeserializer`]: ../map/struct.MapValueDeserializer.html 54 | /// [`DeEvent::Text`]: crate::de::DeEvent::Text 55 | /// [`FromStr`]: std::str::FromStr 56 | /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean 57 | pub struct TextDeserializer<'de>(pub Text<'de>); 58 | 59 | impl<'de> TextDeserializer<'de> { 60 | /// Returns a next string as concatenated content of consequent [`Text`] and 61 | /// [`CData`] events, used inside [`deserialize_primitives!()`]. 62 | /// 63 | /// [`Text`]: crate::events::Event::Text 64 | /// [`CData`]: crate::events::Event::CData 65 | #[inline] 66 | fn read_string(self) -> Result, DeError> { 67 | Ok(self.0.text) 68 | } 69 | } 70 | 71 | impl<'de> Deserializer<'de> for TextDeserializer<'de> { 72 | type Error = DeError; 73 | 74 | deserialize_primitives!(); 75 | 76 | fn deserialize_unit(self, visitor: V) -> Result 77 | where 78 | V: Visitor<'de>, 79 | { 80 | visitor.visit_unit() 81 | } 82 | 83 | fn deserialize_option(self, visitor: V) -> Result 84 | where 85 | V: Visitor<'de>, 86 | { 87 | if self.0.is_empty() { 88 | visitor.visit_none() 89 | } else { 90 | visitor.visit_some(self) 91 | } 92 | } 93 | 94 | /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] 95 | /// with this deserializer. 96 | fn deserialize_newtype_struct( 97 | self, 98 | _name: &'static str, 99 | visitor: V, 100 | ) -> Result 101 | where 102 | V: Visitor<'de>, 103 | { 104 | visitor.visit_newtype_struct(self) 105 | } 106 | 107 | /// This method deserializes a sequence inside of element that itself is a 108 | /// sequence element: 109 | /// 110 | /// ```xml 111 | /// <> 112 | /// ... 113 | /// inner sequence as xs:list 114 | /// ... 115 | /// 116 | /// ``` 117 | fn deserialize_seq(self, visitor: V) -> Result 118 | where 119 | V: Visitor<'de>, 120 | { 121 | SimpleTypeDeserializer::from_text_content(self.0).deserialize_seq(visitor) 122 | } 123 | 124 | #[inline] 125 | fn deserialize_struct( 126 | self, 127 | _name: &'static str, 128 | _fields: &'static [&'static str], 129 | visitor: V, 130 | ) -> Result 131 | where 132 | V: Visitor<'de>, 133 | { 134 | // Deserializer methods are only hints, if deserializer could not satisfy 135 | // request, it should return the data that it has. It is responsibility 136 | // of a Visitor to return an error if it does not understand the data 137 | self.deserialize_str(visitor) 138 | } 139 | 140 | fn deserialize_enum( 141 | self, 142 | _name: &'static str, 143 | _variants: &'static [&'static str], 144 | visitor: V, 145 | ) -> Result 146 | where 147 | V: Visitor<'de>, 148 | { 149 | visitor.visit_enum(self) 150 | } 151 | 152 | #[inline] 153 | fn deserialize_any(self, visitor: V) -> Result 154 | where 155 | V: Visitor<'de>, 156 | { 157 | self.deserialize_str(visitor) 158 | } 159 | } 160 | 161 | impl<'de> EnumAccess<'de> for TextDeserializer<'de> { 162 | type Error = DeError; 163 | type Variant = Self; 164 | 165 | fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> 166 | where 167 | V: DeserializeSeed<'de>, 168 | { 169 | let name = seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?; 170 | Ok((name, self)) 171 | } 172 | } 173 | 174 | impl<'de> VariantAccess<'de> for TextDeserializer<'de> { 175 | type Error = DeError; 176 | 177 | #[inline] 178 | fn unit_variant(self) -> Result<(), Self::Error> { 179 | Ok(()) 180 | } 181 | 182 | fn newtype_variant_seed(self, seed: T) -> Result 183 | where 184 | T: DeserializeSeed<'de>, 185 | { 186 | seed.deserialize(self) 187 | } 188 | 189 | #[inline] 190 | fn tuple_variant(self, len: usize, visitor: V) -> Result 191 | where 192 | V: Visitor<'de>, 193 | { 194 | self.deserialize_tuple(len, visitor) 195 | } 196 | 197 | #[inline] 198 | fn struct_variant( 199 | self, 200 | fields: &'static [&'static str], 201 | visitor: V, 202 | ) -> Result 203 | where 204 | V: Visitor<'de>, 205 | { 206 | self.deserialize_struct("", fields, visitor) 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/de/var.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | de::key::QNameDeserializer, 3 | de::map::ElementMapAccess, 4 | de::resolver::EntityResolver, 5 | de::simple_type::SimpleTypeDeserializer, 6 | de::{DeEvent, Deserializer, XmlRead, TEXT_KEY}, 7 | errors::serialize::DeError, 8 | }; 9 | use serde::de::value::BorrowedStrDeserializer; 10 | use serde::de::{self, DeserializeSeed, Deserializer as _, Visitor}; 11 | 12 | /// An enum access 13 | pub struct EnumAccess<'de, 'd, R, E> 14 | where 15 | R: XmlRead<'de>, 16 | E: EntityResolver, 17 | { 18 | de: &'d mut Deserializer<'de, R, E>, 19 | } 20 | 21 | impl<'de, 'd, R, E> EnumAccess<'de, 'd, R, E> 22 | where 23 | R: XmlRead<'de>, 24 | E: EntityResolver, 25 | { 26 | pub fn new(de: &'d mut Deserializer<'de, R, E>) -> Self { 27 | EnumAccess { de } 28 | } 29 | } 30 | 31 | impl<'de, 'd, R, E> de::EnumAccess<'de> for EnumAccess<'de, 'd, R, E> 32 | where 33 | R: XmlRead<'de>, 34 | E: EntityResolver, 35 | { 36 | type Error = DeError; 37 | type Variant = VariantAccess<'de, 'd, R, E>; 38 | 39 | fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> 40 | where 41 | V: DeserializeSeed<'de>, 42 | { 43 | let (name, is_text) = match self.de.peek()? { 44 | DeEvent::Start(e) => (seed.deserialize(QNameDeserializer::from_elem(e)?)?, false), 45 | DeEvent::Text(_) => ( 46 | seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?, 47 | true, 48 | ), 49 | // SAFETY: The reader is guaranteed that we don't have unmatched tags 50 | // If we here, then out deserializer has a bug 51 | DeEvent::End(e) => unreachable!("{:?}", e), 52 | DeEvent::Eof => return Err(DeError::UnexpectedEof), 53 | }; 54 | Ok(( 55 | name, 56 | VariantAccess { 57 | de: self.de, 58 | is_text, 59 | }, 60 | )) 61 | } 62 | } 63 | 64 | pub struct VariantAccess<'de, 'd, R, E> 65 | where 66 | R: XmlRead<'de>, 67 | E: EntityResolver, 68 | { 69 | de: &'d mut Deserializer<'de, R, E>, 70 | /// `true` if variant should be deserialized from a textual content 71 | /// and `false` if from tag 72 | is_text: bool, 73 | } 74 | 75 | impl<'de, 'd, R, E> de::VariantAccess<'de> for VariantAccess<'de, 'd, R, E> 76 | where 77 | R: XmlRead<'de>, 78 | E: EntityResolver, 79 | { 80 | type Error = DeError; 81 | 82 | fn unit_variant(self) -> Result<(), Self::Error> { 83 | match self.de.next()? { 84 | // Consume subtree 85 | DeEvent::Start(e) => self.de.read_to_end(e.name()), 86 | // Does not needed to deserialize using SimpleTypeDeserializer, because 87 | // it returns `()` when `deserialize_unit()` is requested 88 | DeEvent::Text(_) => Ok(()), 89 | // SAFETY: the other events are filtered in `variant_seed()` 90 | _ => unreachable!("Only `Start` or `Text` events are possible here"), 91 | } 92 | } 93 | 94 | fn newtype_variant_seed(self, seed: T) -> Result 95 | where 96 | T: DeserializeSeed<'de>, 97 | { 98 | if self.is_text { 99 | match self.de.next()? { 100 | DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)), 101 | // SAFETY: the other events are filtered in `variant_seed()` 102 | _ => unreachable!("Only `Text` events are possible here"), 103 | } 104 | } else { 105 | seed.deserialize(self.de) 106 | } 107 | } 108 | 109 | fn tuple_variant(self, len: usize, visitor: V) -> Result 110 | where 111 | V: Visitor<'de>, 112 | { 113 | if self.is_text { 114 | match self.de.next()? { 115 | DeEvent::Text(e) => { 116 | SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor) 117 | } 118 | // SAFETY: the other events are filtered in `variant_seed()` 119 | _ => unreachable!("Only `Text` events are possible here"), 120 | } 121 | } else { 122 | self.de.deserialize_tuple(len, visitor) 123 | } 124 | } 125 | 126 | fn struct_variant( 127 | self, 128 | fields: &'static [&'static str], 129 | visitor: V, 130 | ) -> Result 131 | where 132 | V: Visitor<'de>, 133 | { 134 | match self.de.next()? { 135 | DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.de, e, fields)?), 136 | DeEvent::Text(e) => { 137 | SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor) 138 | } 139 | // SAFETY: the other events are filtered in `variant_seed()` 140 | _ => unreachable!("Only `Start` or `Text` events are possible here"), 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/encoding.rs: -------------------------------------------------------------------------------- 1 | //! A module for wrappers that encode / decode data. 2 | 3 | use std::borrow::Cow; 4 | use std::str::Utf8Error; 5 | 6 | #[cfg(feature = "encoding")] 7 | use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8}; 8 | 9 | /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8. 10 | /// See 11 | pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; 12 | /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with little-endian byte order. 13 | /// See 14 | #[cfg(feature = "encoding")] 15 | pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE]; 16 | /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with big-endian byte order. 17 | /// See 18 | #[cfg(feature = "encoding")] 19 | pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF]; 20 | 21 | /// An error when decoding or encoding 22 | /// 23 | /// If feature [`encoding`] is disabled, the [`EncodingError`] is always [`EncodingError::Utf8`] 24 | /// 25 | /// [`encoding`]: ../index.html#encoding 26 | #[derive(Clone, Debug, PartialEq, Eq)] 27 | #[non_exhaustive] 28 | pub enum EncodingError { 29 | /// Input was not valid UTF-8 30 | Utf8(Utf8Error), 31 | /// Input did not adhere to the given encoding 32 | #[cfg(feature = "encoding")] 33 | Other(&'static Encoding), 34 | } 35 | 36 | impl From for EncodingError { 37 | #[inline] 38 | fn from(e: Utf8Error) -> Self { 39 | Self::Utf8(e) 40 | } 41 | } 42 | 43 | impl std::error::Error for EncodingError { 44 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 45 | match self { 46 | Self::Utf8(e) => Some(e), 47 | #[cfg(feature = "encoding")] 48 | Self::Other(_) => None, 49 | } 50 | } 51 | } 52 | 53 | impl std::fmt::Display for EncodingError { 54 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 55 | match self { 56 | Self::Utf8(e) => write!(f, "cannot decode input using UTF-8: {}", e), 57 | #[cfg(feature = "encoding")] 58 | Self::Other(encoding) => write!(f, "cannot decode input using {}", encoding.name()), 59 | } 60 | } 61 | } 62 | 63 | /// Decoder of byte slices into strings. 64 | /// 65 | /// If feature [`encoding`] is enabled, this encoding taken from the `"encoding"` 66 | /// XML declaration or assumes UTF-8, if XML has no declaration, encoding 67 | /// key is not defined or contains unknown encoding. 68 | /// 69 | /// The library supports any UTF-8 compatible encodings that crate `encoding_rs` 70 | /// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16]. 71 | /// 72 | /// If feature [`encoding`] is disabled, the decoder is always UTF-8 decoder: 73 | /// any XML declarations are ignored. 74 | /// 75 | /// [utf16]: https://github.com/tafia/quick-xml/issues/158 76 | /// [`encoding`]: ../index.html#encoding 77 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 78 | pub struct Decoder { 79 | #[cfg(feature = "encoding")] 80 | pub(crate) encoding: &'static Encoding, 81 | } 82 | 83 | impl Decoder { 84 | pub(crate) const fn utf8() -> Self { 85 | Decoder { 86 | #[cfg(feature = "encoding")] 87 | encoding: UTF_8, 88 | } 89 | } 90 | 91 | #[cfg(all(test, feature = "encoding", feature = "serialize"))] 92 | pub(crate) const fn utf16() -> Self { 93 | Decoder { encoding: UTF_16LE } 94 | } 95 | } 96 | 97 | impl Decoder { 98 | /// Returns the `Reader`s encoding. 99 | /// 100 | /// This encoding will be used by [`decode`]. 101 | /// 102 | /// [`decode`]: Self::decode 103 | #[cfg(feature = "encoding")] 104 | pub const fn encoding(&self) -> &'static Encoding { 105 | self.encoding 106 | } 107 | 108 | /// ## Without `encoding` feature 109 | /// 110 | /// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM 111 | /// if it is present in the `bytes`. 112 | /// 113 | /// ## With `encoding` feature 114 | /// 115 | /// Decodes specified bytes using encoding, declared in the XML, if it was 116 | /// declared there, or UTF-8 otherwise, and ignoring BOM if it is present 117 | /// in the `bytes`. 118 | /// 119 | /// ---- 120 | /// Returns an error in case of malformed sequences in the `bytes`. 121 | pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result, EncodingError> { 122 | #[cfg(not(feature = "encoding"))] 123 | let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?)); 124 | 125 | #[cfg(feature = "encoding")] 126 | let decoded = decode(bytes, self.encoding); 127 | 128 | decoded 129 | } 130 | 131 | /// Like [`decode`][Self::decode] but using a pre-allocated buffer. 132 | pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<(), EncodingError> { 133 | #[cfg(not(feature = "encoding"))] 134 | buf.push_str(std::str::from_utf8(bytes)?); 135 | 136 | #[cfg(feature = "encoding")] 137 | decode_into(bytes, self.encoding, buf)?; 138 | 139 | Ok(()) 140 | } 141 | 142 | /// Decodes the `Cow` buffer, preserves the lifetime 143 | pub(crate) fn decode_cow<'b>( 144 | &self, 145 | bytes: &Cow<'b, [u8]>, 146 | ) -> Result, EncodingError> { 147 | match bytes { 148 | Cow::Borrowed(bytes) => self.decode(bytes), 149 | // Convert to owned, because otherwise Cow will be bound with wrong lifetime 150 | Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()), 151 | } 152 | } 153 | } 154 | 155 | /// Decodes the provided bytes using the specified encoding. 156 | /// 157 | /// Returns an error in case of malformed or non-representable sequences in the `bytes`. 158 | #[cfg(feature = "encoding")] 159 | pub fn decode<'b>( 160 | bytes: &'b [u8], 161 | encoding: &'static Encoding, 162 | ) -> Result, EncodingError> { 163 | encoding 164 | .decode_without_bom_handling_and_without_replacement(bytes) 165 | .ok_or(EncodingError::Other(encoding)) 166 | } 167 | 168 | /// Like [`decode`] but using a pre-allocated buffer. 169 | #[cfg(feature = "encoding")] 170 | pub fn decode_into( 171 | bytes: &[u8], 172 | encoding: &'static Encoding, 173 | buf: &mut String, 174 | ) -> Result<(), EncodingError> { 175 | if encoding == UTF_8 { 176 | buf.push_str(std::str::from_utf8(bytes)?); 177 | return Ok(()); 178 | } 179 | 180 | let mut decoder = encoding.new_decoder_without_bom_handling(); 181 | buf.reserve( 182 | decoder 183 | .max_utf8_buffer_length_without_replacement(bytes.len()) 184 | // SAFETY: None can be returned only if required size will overflow usize, 185 | // but in that case String::reserve also panics 186 | .unwrap(), 187 | ); 188 | let (result, read) = decoder.decode_to_string_without_replacement(bytes, buf, true); 189 | match result { 190 | DecoderResult::InputEmpty => { 191 | debug_assert_eq!(read, bytes.len()); 192 | Ok(()) 193 | } 194 | DecoderResult::Malformed(_, _) => Err(EncodingError::Other(encoding)), 195 | // SAFETY: We allocate enough space above 196 | DecoderResult::OutputFull => unreachable!(), 197 | } 198 | } 199 | 200 | /// Automatic encoding detection of XML files based using the 201 | /// [recommended algorithm](https://www.w3.org/TR/xml11/#sec-guessing). 202 | /// 203 | /// If encoding is detected, `Some` is returned with an encoding and size of BOM 204 | /// in bytes, if detection was performed using BOM, or zero, if detection was 205 | /// performed without BOM. 206 | /// 207 | /// IF encoding was not recognized, `None` is returned. 208 | /// 209 | /// Because the [`encoding_rs`] crate supports only subset of those encodings, only 210 | /// the supported subset are detected, which is UTF-8, UTF-16 BE and UTF-16 LE. 211 | /// 212 | /// The algorithm suggests examine up to the first 4 bytes to determine encoding 213 | /// according to the following table: 214 | /// 215 | /// | Bytes |Detected encoding 216 | /// |-------------|------------------------------------------ 217 | /// | **BOM** 218 | /// |`FE_FF_##_##`|UTF-16, big-endian 219 | /// |`FF FE ## ##`|UTF-16, little-endian 220 | /// |`EF BB BF` |UTF-8 221 | /// | **No BOM** 222 | /// |`00 3C 00 3F`|UTF-16 BE or ISO-10646-UCS-2 BE or similar 16-bit BE (use declared encoding to find the exact one) 223 | /// |`3C 00 3F 00`|UTF-16 LE or ISO-10646-UCS-2 LE or similar 16-bit LE (use declared encoding to find the exact one) 224 | /// |`3C 3F 78 6D`|UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns for the relevant ASCII characters, the encoding declaration itself may be read reliably 225 | #[cfg(feature = "encoding")] 226 | pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> { 227 | match bytes { 228 | // with BOM 229 | _ if bytes.starts_with(UTF16_BE_BOM) => Some((UTF_16BE, 2)), 230 | _ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)), 231 | _ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)), 232 | 233 | // without BOM 234 | _ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2 235 | _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), // Some LE encoding, for example, UTF-16 or ISO-10646-UCS-2 236 | _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)), // Some ASCII compatible 237 | 238 | _ => None, 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! High performance XML reader/writer. 2 | //! 3 | //! # Description 4 | //! 5 | //! quick-xml contains two modes of operation: 6 | //! 7 | //! A streaming API based on the [StAX] model. This is suited for larger XML documents which 8 | //! cannot completely read into memory at once. 9 | //! 10 | //! The user has to explicitly _ask_ for the next XML event, similar to a database cursor. 11 | //! This is achieved by the following two structs: 12 | //! 13 | //! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user. 14 | //! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs. 15 | //! 16 | //! Especially for nested XML elements, the user must keep track _where_ (how deep) 17 | //! in the XML document the current event is located. 18 | //! 19 | //! quick-xml contains optional support of asynchronous reading and writing using [tokio]. 20 | //! To get it enable the [`async-tokio`](#async-tokio) feature. 21 | //! 22 | //! Furthermore, quick-xml also contains optional [Serde] support to directly 23 | //! serialize and deserialize from structs, without having to deal with the XML events. 24 | //! To get it enable the [`serialize`](#serialize) feature. Read more about mapping Rust types 25 | //! to XML in the documentation of [`de`] module. Also check [`serde_helpers`] 26 | //! module. 27 | //! 28 | //! # Examples 29 | //! 30 | //! - For a reading example see [`Reader`] 31 | //! - For a writing example see [`Writer`] 32 | //! 33 | //! # Features 34 | //! 35 | //! `quick-xml` supports the following features: 36 | //! 37 | //! [StAX]: https://en.wikipedia.org/wiki/StAX 38 | //! [tokio]: https://tokio.rs/ 39 | //! [Serde]: https://serde.rs/ 40 | //! [`de`]: ./de/index.html 41 | #![cfg_attr( 42 | feature = "document-features", 43 | cfg_attr(doc, doc = ::document_features::document_features!( 44 | feature_label = "{feature}" 45 | )) 46 | )] 47 | #![forbid(unsafe_code)] 48 | #![deny(missing_docs)] 49 | #![recursion_limit = "1024"] 50 | // Enable feature requirements in the docs from 1.57 51 | // See https://stackoverflow.com/questions/61417452 52 | // docs.rs defines `docsrs` when building documentation 53 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 54 | 55 | #[cfg(feature = "serialize")] 56 | pub mod de; 57 | pub mod encoding; 58 | pub mod errors; 59 | pub mod escape; 60 | pub mod events; 61 | pub mod name; 62 | pub mod parser; 63 | pub mod reader; 64 | #[cfg(feature = "serialize")] 65 | pub mod se; 66 | #[cfg(feature = "serde-types")] 67 | pub mod serde_helpers; 68 | /// Not an official API, public for integration tests 69 | #[doc(hidden)] 70 | pub mod utils; 71 | pub mod writer; 72 | 73 | // reexports 74 | pub use crate::encoding::Decoder; 75 | #[cfg(feature = "serialize")] 76 | pub use crate::errors::serialize::{DeError, SeError}; 77 | pub use crate::errors::{Error, Result}; 78 | pub use crate::reader::{NsReader, Reader}; 79 | pub use crate::writer::{ElementWriter, Writer}; 80 | -------------------------------------------------------------------------------- /src/parser/element.rs: -------------------------------------------------------------------------------- 1 | //! Contains a parser for an XML element. 2 | 3 | use crate::errors::SyntaxError; 4 | use crate::parser::Parser; 5 | 6 | /// A parser that search a `>` symbol in the slice outside of quoted regions. 7 | /// 8 | /// The parser considers two quoted regions: a double-quoted (`"..."`) and 9 | /// a single-quoted (`'...'`) region. Matches found inside those regions are not 10 | /// considered as results. Each region starts and ends by its quote symbol, 11 | /// which cannot be escaped (but can be encoded as XML character entity or named 12 | /// entity. Anyway, that encoding does not contain literal quotes). 13 | /// 14 | /// To use a parser create an instance of parser and [`feed`] data into it. 15 | /// After successful search the parser will return [`Some`] with position of 16 | /// found symbol. If search is unsuccessful, a [`None`] will be returned. You 17 | /// typically would expect positive result of search, so that you should feed 18 | /// new data until you get it. 19 | /// 20 | /// NOTE: after successful match the parser does not returned to the initial 21 | /// state and should not be used anymore. Create a new parser if you want to perform 22 | /// new search. 23 | /// 24 | /// # Example 25 | /// 26 | /// ``` 27 | /// # use pretty_assertions::assert_eq; 28 | /// use quick_xml::parser::{ElementParser, Parser}; 29 | /// 30 | /// let mut parser = ElementParser::default(); 31 | /// 32 | /// // Parse `and the text follow...` 33 | /// // splitted into three chunks 34 | /// assert_eq!(parser.feed(b"and the text follow..."), Some(8)); 39 | /// // ^ ^ 40 | /// // 0 8 41 | /// ``` 42 | /// 43 | /// [`feed`]: Self::feed() 44 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 45 | pub enum ElementParser { 46 | /// The initial state (inside element, but outside of attribute value). 47 | Outside, 48 | /// Inside a single-quoted region (`'...'`). 49 | SingleQ, 50 | /// Inside a double-quoted region (`"..."`). 51 | DoubleQ, 52 | } 53 | 54 | impl Parser for ElementParser { 55 | /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`. 56 | #[inline] 57 | fn feed(&mut self, bytes: &[u8]) -> Option { 58 | for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) { 59 | *self = match (*self, bytes[i]) { 60 | // only allowed to match `>` while we are in state `Outside` 61 | (Self::Outside, b'>') => return Some(i), 62 | (Self::Outside, b'\'') => Self::SingleQ, 63 | (Self::Outside, b'\"') => Self::DoubleQ, 64 | 65 | // the only end_byte that gets us out if the same character 66 | (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside, 67 | 68 | // all other bytes: no state change 69 | _ => continue, 70 | }; 71 | } 72 | None 73 | } 74 | 75 | #[inline] 76 | fn eof_error() -> SyntaxError { 77 | SyntaxError::UnclosedTag 78 | } 79 | } 80 | 81 | impl Default for ElementParser { 82 | #[inline] 83 | fn default() -> Self { 84 | Self::Outside 85 | } 86 | } 87 | 88 | #[test] 89 | fn parse() { 90 | use pretty_assertions::assert_eq; 91 | use ElementParser::*; 92 | 93 | /// Returns `Ok(pos)` with the position in the buffer where element is ended. 94 | /// 95 | /// Returns `Err(internal_state)` if parsing does not done yet. 96 | fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result { 97 | match parser.feed(bytes) { 98 | Some(i) => Ok(i), 99 | None => Err(parser), 100 | } 101 | } 102 | 103 | assert_eq!(parse_element(b"", Outside), Err(Outside)); 104 | assert_eq!(parse_element(b"", SingleQ), Err(SingleQ)); 105 | assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ)); 106 | 107 | assert_eq!(parse_element(b"'", Outside), Err(SingleQ)); 108 | assert_eq!(parse_element(b"'", SingleQ), Err(Outside)); 109 | assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ)); 110 | 111 | assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ)); 112 | assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ)); 113 | assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside)); 114 | 115 | assert_eq!(parse_element(b">", Outside), Ok(0)); 116 | assert_eq!(parse_element(b">", SingleQ), Err(SingleQ)); 117 | assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ)); 118 | 119 | assert_eq!(parse_element(b"''>", Outside), Ok(2)); 120 | assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ)); 121 | assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ)); 122 | } 123 | -------------------------------------------------------------------------------- /src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | //! Contains low-level parsers of different XML pieces. 2 | 3 | use crate::errors::SyntaxError; 4 | 5 | mod element; 6 | mod pi; 7 | 8 | pub use element::ElementParser; 9 | pub use pi::PiParser; 10 | 11 | /// Used to decouple reading of data from data source and parsing XML structure from it. 12 | /// This is a state preserved between getting chunks of bytes from the reader. 13 | /// 14 | /// This trait is implemented for every parser that processes piece of XML grammar. 15 | pub trait Parser { 16 | /// Process new data and try to determine end of the parsed thing. 17 | /// 18 | /// Returns position of the end of thing in `bytes` in case of successful search 19 | /// and `None` otherwise. 20 | /// 21 | /// # Parameters 22 | /// - `bytes`: a slice to find the end of a thing. 23 | /// Should contain text in ASCII-compatible encoding 24 | fn feed(&mut self, bytes: &[u8]) -> Option; 25 | 26 | /// Returns parse error produced by this parser in case of reaching end of 27 | /// input without finding the end of a parsed thing. 28 | fn eof_error() -> SyntaxError; 29 | } 30 | -------------------------------------------------------------------------------- /src/parser/pi.rs: -------------------------------------------------------------------------------- 1 | //! Contains a parser for an XML processing instruction. 2 | 3 | use crate::errors::SyntaxError; 4 | use crate::parser::Parser; 5 | 6 | /// A parser that search a `?>` sequence in the slice. 7 | /// 8 | /// To use a parser create an instance of parser and [`feed`] data into it. 9 | /// After successful search the parser will return [`Some`] with position where 10 | /// processing instruction is ended (the position after `?>`). If search was 11 | /// unsuccessful, a [`None`] will be returned. You typically would expect positive 12 | /// result of search, so that you should feed new data until you get it. 13 | /// 14 | /// NOTE: after successful match the parser does not returned to the initial 15 | /// state and should not be used anymore. Create a new parser if you want to perform 16 | /// new search. 17 | /// 18 | /// # Example 19 | /// 20 | /// ``` 21 | /// # use pretty_assertions::assert_eq; 22 | /// use quick_xml::parser::{Parser, PiParser}; 23 | /// 24 | /// let mut parser = PiParser::default(); 25 | /// 26 | /// // Parse ` and ?' inside?>and the text follow...` 27 | /// // splitted into three chunks 28 | /// assert_eq!(parser.feed(b" and ?"), None); 31 | /// // ...get another chunk of data 32 | /// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9)); 33 | /// // ^ ^ 34 | /// // 0 9 35 | /// ``` 36 | /// 37 | /// [`feed`]: Self::feed() 38 | #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] 39 | pub struct PiParser( 40 | /// A flag that indicates was the `bytes` in the previous attempt to find the 41 | /// end ended with `?`. 42 | pub bool, 43 | ); 44 | 45 | impl Parser for PiParser { 46 | /// Determines the end position of a processing instruction in the provided slice. 47 | /// Processing instruction ends on the first occurrence of `?>` which cannot be 48 | /// escaped. 49 | /// 50 | /// Returns position after the `?>` or `None` if such sequence was not found. 51 | /// 52 | /// [Section 2.6]: Parameter entity references MUST NOT be recognized within 53 | /// processing instructions, so parser do not search for them. 54 | /// 55 | /// # Parameters 56 | /// - `bytes`: a slice to find the end of a processing instruction. 57 | /// Should contain text in ASCII-compatible encoding 58 | /// 59 | /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi 60 | #[inline] 61 | fn feed(&mut self, bytes: &[u8]) -> Option { 62 | for i in memchr::memchr_iter(b'>', bytes) { 63 | match i { 64 | 0 if self.0 => return Some(0), 65 | // If the previous byte is `?`, then we found `?>` 66 | i if i > 0 && bytes[i - 1] == b'?' => return Some(i), 67 | _ => {} 68 | } 69 | } 70 | self.0 = bytes.last().copied() == Some(b'?'); 71 | None 72 | } 73 | 74 | #[inline] 75 | fn eof_error() -> SyntaxError { 76 | SyntaxError::UnclosedPIOrXmlDecl 77 | } 78 | } 79 | 80 | #[test] 81 | fn pi() { 82 | use pretty_assertions::assert_eq; 83 | 84 | /// Returns `Ok(pos)` with the position in the buffer where processing 85 | /// instruction is ended. 86 | /// 87 | /// Returns `Err(internal_state)` if parsing is not done yet. 88 | fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result { 89 | let mut parser = PiParser(had_question_mark); 90 | match parser.feed(bytes) { 91 | Some(i) => Ok(i), 92 | None => Err(parser.0), 93 | } 94 | } 95 | 96 | // Comments shows which character was seen the last before calling `feed`. 97 | // `x` means any character, pipe denotes start of the buffer that passed to `feed` 98 | 99 | assert_eq!(parse_pi(b"", false), Err(false)); // x| 100 | assert_eq!(parse_pi(b"", true), Err(false)); // ?| 101 | 102 | assert_eq!(parse_pi(b"?", false), Err(true)); // x|? 103 | assert_eq!(parse_pi(b"?", true), Err(true)); // ?|? 104 | 105 | assert_eq!(parse_pi(b">", false), Err(false)); // x|> 106 | assert_eq!(parse_pi(b">", true), Ok(0)); // ?|> 107 | 108 | assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?> 109 | assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?> 110 | 111 | assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?> 112 | assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?> 113 | } 114 | -------------------------------------------------------------------------------- /src/se/text.rs: -------------------------------------------------------------------------------- 1 | //! Contains serializer for a special `&text` field 2 | 3 | use crate::de::TEXT_KEY; 4 | use crate::se::simple_type::{SimpleSeq, SimpleTypeSerializer}; 5 | use crate::se::SeError; 6 | use serde::ser::{Impossible, Serialize, Serializer}; 7 | use serde::serde_if_integer128; 8 | use std::fmt::Write; 9 | 10 | macro_rules! write_primitive { 11 | ($method:ident ( $ty:ty )) => { 12 | #[inline] 13 | fn $method(self, value: $ty) -> Result { 14 | self.0.$method(value) 15 | } 16 | }; 17 | } 18 | 19 | //////////////////////////////////////////////////////////////////////////////////////////////////// 20 | 21 | /// A serializer used to serialize a `$text` field of a struct or map. 22 | /// 23 | /// This serializer a very similar to [`SimpleTypeSerializer`], but different 24 | /// from it in how it processes unit enum variants. Unlike [`SimpleTypeSerializer`] 25 | /// this serializer does not write anything for the unit variant. 26 | pub struct TextSerializer(pub SimpleTypeSerializer); 27 | 28 | impl Serializer for TextSerializer { 29 | type Ok = W; 30 | type Error = SeError; 31 | 32 | type SerializeSeq = SimpleSeq; 33 | type SerializeTuple = SimpleSeq; 34 | type SerializeTupleStruct = SimpleSeq; 35 | type SerializeTupleVariant = SimpleSeq; 36 | type SerializeMap = Impossible; 37 | type SerializeStruct = Impossible; 38 | type SerializeStructVariant = Impossible; 39 | 40 | write_primitive!(serialize_bool(bool)); 41 | 42 | write_primitive!(serialize_i8(i8)); 43 | write_primitive!(serialize_i16(i16)); 44 | write_primitive!(serialize_i32(i32)); 45 | write_primitive!(serialize_i64(i64)); 46 | 47 | write_primitive!(serialize_u8(u8)); 48 | write_primitive!(serialize_u16(u16)); 49 | write_primitive!(serialize_u32(u32)); 50 | write_primitive!(serialize_u64(u64)); 51 | 52 | serde_if_integer128! { 53 | write_primitive!(serialize_i128(i128)); 54 | write_primitive!(serialize_u128(u128)); 55 | } 56 | 57 | write_primitive!(serialize_f32(f32)); 58 | write_primitive!(serialize_f64(f64)); 59 | 60 | write_primitive!(serialize_char(char)); 61 | write_primitive!(serialize_str(&str)); 62 | write_primitive!(serialize_bytes(&[u8])); 63 | 64 | #[inline] 65 | fn serialize_none(self) -> Result { 66 | self.0.serialize_none() 67 | } 68 | 69 | fn serialize_some(self, value: &T) -> Result { 70 | value.serialize(self) 71 | } 72 | 73 | #[inline] 74 | fn serialize_unit(self) -> Result { 75 | self.0.serialize_unit() 76 | } 77 | 78 | #[inline] 79 | fn serialize_unit_struct(self, name: &'static str) -> Result { 80 | self.0.serialize_unit_struct(name) 81 | } 82 | 83 | #[inline] 84 | fn serialize_unit_variant( 85 | self, 86 | name: &'static str, 87 | variant_index: u32, 88 | variant: &'static str, 89 | ) -> Result { 90 | if variant == TEXT_KEY { 91 | Ok(self.0.writer) 92 | } else { 93 | self.0.serialize_unit_variant(name, variant_index, variant) 94 | } 95 | } 96 | 97 | fn serialize_newtype_struct( 98 | self, 99 | _name: &'static str, 100 | value: &T, 101 | ) -> Result { 102 | value.serialize(self) 103 | } 104 | 105 | #[inline] 106 | fn serialize_newtype_variant( 107 | self, 108 | name: &'static str, 109 | _variant_index: u32, 110 | variant: &'static str, 111 | _value: &T, 112 | ) -> Result { 113 | Err(SeError::Unsupported( 114 | format!( 115 | "cannot serialize enum newtype variant `{}::{}` as text content value", 116 | name, variant 117 | ) 118 | .into(), 119 | )) 120 | } 121 | 122 | #[inline] 123 | fn serialize_seq(self, len: Option) -> Result { 124 | self.0.serialize_seq(len) 125 | } 126 | 127 | #[inline] 128 | fn serialize_tuple(self, len: usize) -> Result { 129 | self.0.serialize_tuple(len) 130 | } 131 | 132 | #[inline] 133 | fn serialize_tuple_struct( 134 | self, 135 | name: &'static str, 136 | len: usize, 137 | ) -> Result { 138 | self.0.serialize_tuple_struct(name, len) 139 | } 140 | 141 | #[inline] 142 | fn serialize_tuple_variant( 143 | self, 144 | name: &'static str, 145 | _variant_index: u32, 146 | variant: &'static str, 147 | _len: usize, 148 | ) -> Result { 149 | Err(SeError::Unsupported( 150 | format!( 151 | "cannot serialize enum tuple variant `{}::{}` as text content value", 152 | name, variant 153 | ) 154 | .into(), 155 | )) 156 | } 157 | 158 | #[inline] 159 | fn serialize_map(self, _len: Option) -> Result { 160 | Err(SeError::Unsupported( 161 | "cannot serialize map as text content value".into(), 162 | )) 163 | } 164 | 165 | #[inline] 166 | fn serialize_struct( 167 | self, 168 | name: &'static str, 169 | _len: usize, 170 | ) -> Result { 171 | Err(SeError::Unsupported( 172 | format!("cannot serialize struct `{}` as text content value", name).into(), 173 | )) 174 | } 175 | 176 | #[inline] 177 | fn serialize_struct_variant( 178 | self, 179 | name: &'static str, 180 | _variant_index: u32, 181 | variant: &'static str, 182 | _len: usize, 183 | ) -> Result { 184 | Err(SeError::Unsupported( 185 | format!( 186 | "cannot serialize enum struct variant `{}::{}` as text content value", 187 | name, variant 188 | ) 189 | .into(), 190 | )) 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /test-gen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "test-gen" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | encoding_rs = "0.8" 10 | serde = { version = "1.0", features = ["derive"] } 11 | serde_json = "1.0" 12 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Document descriptions 2 | 3 | document.xml 4 | medium length, mostly empty tags, a few short attributes per element, no escaping 5 | html5.html 6 | html5.txt 7 | libreoffice_document.fodt 8 | long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces 9 | linescore.xml 10 | medium length, lots of attributes, short attributes, few escapes 11 | opennews_all.rss 12 | players.xml 13 | long, lots of attributes, short attributes, no text, no escapes 14 | rpm_filelists.xml 15 | long, mostly medium-length text elements, not much escaping 16 | rpm_other.xml 17 | long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes 18 | rpm_primary.xml 19 | long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces 20 | rpm_primary2.xml 21 | long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces 22 | sample_1.xml 23 | short, mix of attributes and text, lots of escapes 24 | sample_ns.xml 25 | short, lots of namespaces, no escapes 26 | sample_rss.xml 27 | long, few attributes, mix of attribute lengths, escapes in text content 28 | test_writer_indent_cdata.xml 29 | test_writer_indent.xml 30 | medium length, lots of namespaces, no escaping 31 | test_writer.xml 32 | utf16be.xml 33 | utf16le.xml 34 | -------------------------------------------------------------------------------- /tests/async-tokio.rs: -------------------------------------------------------------------------------- 1 | use std::io::Cursor; 2 | use std::iter; 3 | 4 | use pretty_assertions::assert_eq; 5 | use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*}; 6 | use quick_xml::name::QName; 7 | use quick_xml::reader::Reader; 8 | use quick_xml::utils::Bytes; 9 | use tokio::io::{AsyncBufReadExt, AsyncReadExt, BufReader}; 10 | 11 | // Import `small_buffers_tests!` 12 | #[macro_use] 13 | mod helpers; 14 | 15 | small_buffers_tests!( 16 | #[tokio::test] 17 | read_event_into_async: tokio::io::BufReader<_>, 18 | async, await 19 | ); 20 | 21 | #[tokio::test] 22 | async fn test_sample() { 23 | let src = include_str!("documents/sample_rss.xml"); 24 | let mut reader = Reader::from_reader(src.as_bytes()); 25 | let mut buf = Vec::new(); 26 | let mut count = 0; 27 | // Expected number of iterations, to prevent infinity loops if refactoring breaks test 28 | let mut reads = 0; 29 | loop { 30 | reads += 1; 31 | assert!( 32 | reads <= 10000, 33 | "too many events, possible infinity loop: {reads}" 34 | ); 35 | match reader.read_event_into_async(&mut buf).await { 36 | Ok(Start(_)) => count += 1, 37 | Ok(Decl(e)) => assert_eq!(e.version().unwrap(), b"1.0".as_ref()), 38 | Ok(Eof) => break, 39 | Ok(_) => (), 40 | Err(e) => panic!("{} at {}", e, reader.error_position()), 41 | } 42 | buf.clear(); 43 | } 44 | assert_eq!((count, reads), (1247, 5457)); 45 | } 46 | 47 | /// This tests checks that read_to_end() correctly returns span even when 48 | /// text is trimmed from both sides 49 | mod read_to_end { 50 | use super::*; 51 | use pretty_assertions::assert_eq; 52 | 53 | #[tokio::test] 54 | async fn text() { 55 | let mut r = Reader::from_str(" text "); 56 | // ^0 ^5 ^11 57 | r.config_mut().trim_text(true); 58 | 59 | let mut buf = Vec::new(); 60 | assert_eq!( 61 | r.read_event_into_async(&mut buf).await.unwrap(), 62 | Start(BytesStart::new("tag")) 63 | ); 64 | assert_eq!( 65 | r.read_to_end_into_async(QName(b"tag"), &mut buf) 66 | .await 67 | .unwrap(), 68 | 5..11 69 | ); 70 | assert_eq!(r.read_event_into_async(&mut buf).await.unwrap(), Eof); 71 | } 72 | 73 | #[tokio::test] 74 | async fn tag() { 75 | let mut r = Reader::from_str(" "); 76 | // ^0 ^5 ^16 77 | r.config_mut().trim_text(true); 78 | 79 | let mut buf = Vec::new(); 80 | assert_eq!( 81 | r.read_event_into_async(&mut buf).await.unwrap(), 82 | Start(BytesStart::new("tag")) 83 | ); 84 | assert_eq!( 85 | r.read_to_end_into_async(QName(b"tag"), &mut buf) 86 | .await 87 | .unwrap(), 88 | 5..16 89 | ); 90 | assert_eq!(r.read_event_into_async(&mut buf).await.unwrap(), Eof); 91 | } 92 | } 93 | 94 | #[tokio::test] 95 | async fn issue623() { 96 | let mut buf = Vec::new(); 97 | let mut reader = Reader::from_reader(Cursor::new( 98 | b" 99 | 100 | _binary << data&> 101 | 102 | ", 103 | )); 104 | reader.config_mut().trim_text(true); 105 | 106 | assert_eq!( 107 | ( 108 | reader.read_event_into_async(&mut buf).await.unwrap(), 109 | reader.buffer_position() 110 | ), 111 | (Start(BytesStart::new("AppendedData")), 23) 112 | ); 113 | 114 | let mut inner = reader.stream(); 115 | // Read to start of data marker 116 | inner.read_until(b'_', &mut buf).await.unwrap(); 117 | 118 | // Read binary data. We must know its size 119 | let mut binary = [0u8; 16]; 120 | inner.read_exact(&mut binary).await.unwrap(); 121 | assert_eq!(Bytes(&binary), Bytes(b"binary << data&>")); 122 | assert_eq!(inner.offset(), 53); 123 | assert_eq!(reader.buffer_position(), 53); 124 | 125 | assert_eq!( 126 | ( 127 | reader.read_event_into_async(&mut buf).await.unwrap(), 128 | reader.buffer_position() 129 | ), 130 | (End(BytesEnd::new("AppendedData")), 77) 131 | ); 132 | 133 | assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Eof); 134 | } 135 | 136 | /// Regression test for https://github.com/tafia/quick-xml/issues/751 137 | /// 138 | /// Actually, that error was not found in async reader, but we would to test it as well. 139 | #[tokio::test] 140 | async fn issue751() { 141 | let mut text = Vec::new(); 142 | let mut chunk = Vec::new(); 143 | chunk.extend_from_slice(b""); 144 | for data in iter::repeat(b"some text inside").take(1000) { 145 | chunk.extend_from_slice(data); 146 | text.extend_from_slice(data); 147 | } 148 | chunk.extend_from_slice(b""); 149 | 150 | let mut reader = Reader::from_reader(quick_xml::utils::Fountain { 151 | chunk: &chunk, 152 | consumed: 0, 153 | overall_read: 0, 154 | }); 155 | let mut buf = Vec::new(); 156 | let mut starts = 0u64; 157 | let mut ends = 0u64; 158 | let mut texts = 0u64; 159 | loop { 160 | buf.clear(); 161 | match reader.read_event_into_async(&mut buf).await { 162 | Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), 163 | Ok(Eof) => break, 164 | 165 | Ok(Start(e)) => { 166 | starts += 1; 167 | assert_eq!( 168 | e.name(), 169 | QName(b"content"), 170 | "starts: {starts}, ends: {ends}, texts: {texts}" 171 | ); 172 | } 173 | Ok(End(e)) => { 174 | ends += 1; 175 | assert_eq!( 176 | e.name(), 177 | QName(b"content"), 178 | "starts: {starts}, ends: {ends}, texts: {texts}" 179 | ); 180 | } 181 | Ok(Text(e)) => { 182 | texts += 1; 183 | assert_eq!( 184 | e.as_ref(), 185 | text, 186 | "starts: {starts}, ends: {ends}, texts: {texts}" 187 | ); 188 | } 189 | _ => (), 190 | } 191 | // If we successfully read more than `u32::MAX`, the test is passed 192 | if reader.get_ref().overall_read >= u32::MAX as u64 { 193 | break; 194 | } 195 | } 196 | } 197 | 198 | /// Regression test for https://github.com/tafia/quick-xml/issues/774 199 | /// 200 | /// Capacity of the buffer selected in that way, that "text" will be read into 201 | /// one internal buffer of `BufReader` in one `fill_buf()` call and `<` of the 202 | /// closing tag in the next call. 203 | #[tokio::test] 204 | async fn issue774() { 205 | let xml = BufReader::with_capacity(9, b"text" as &[u8]); 206 | // ^0 ^9 207 | let mut reader = Reader::from_reader(xml); 208 | let mut buf = Vec::new(); 209 | 210 | assert_eq!( 211 | reader.read_event_into_async(&mut buf).await.unwrap(), 212 | Start(BytesStart::new("tag")) 213 | ); 214 | assert_eq!( 215 | reader.read_event_into_async(&mut buf).await.unwrap(), 216 | Text(BytesText::new("text")) 217 | ); 218 | assert_eq!( 219 | reader.read_event_into_async(&mut buf).await.unwrap(), 220 | End(BytesEnd::new("tag")) 221 | ); 222 | } 223 | -------------------------------------------------------------------------------- /tests/documents/encoding/Big5.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/Big5.xml -------------------------------------------------------------------------------- /tests/documents/encoding/EUC-JP.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/EUC-JP.xml -------------------------------------------------------------------------------- /tests/documents/encoding/EUC-KR.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/EUC-KR.xml -------------------------------------------------------------------------------- /tests/documents/encoding/GBK.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/GBK.xml -------------------------------------------------------------------------------- /tests/documents/encoding/IBM866.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/IBM866.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-10.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-10.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-13.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-13.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-14.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-14.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-15.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-15.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-16.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-16.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-2.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-2.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-3.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-3.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-4.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-4.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-5.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-5.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-6.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-6.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-7.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-7.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-8-I.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-8-I.xml -------------------------------------------------------------------------------- /tests/documents/encoding/ISO-8859-8.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-8.xml -------------------------------------------------------------------------------- /tests/documents/encoding/KOI8-R.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/KOI8-R.xml -------------------------------------------------------------------------------- /tests/documents/encoding/KOI8-U.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/KOI8-U.xml -------------------------------------------------------------------------------- /tests/documents/encoding/Shift_JIS.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/Shift_JIS.xml -------------------------------------------------------------------------------- /tests/documents/encoding/gb18030.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/gb18030.xml -------------------------------------------------------------------------------- /tests/documents/encoding/macintosh.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/macintosh.xml -------------------------------------------------------------------------------- /tests/documents/encoding/utf16be-bom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/utf16be-bom.xml -------------------------------------------------------------------------------- /tests/documents/encoding/utf16be.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0"?> 2 | <project name="project-name"> 3 | </project> 4 | -------------------------------------------------------------------------------- /tests/documents/encoding/utf16le-bom.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/utf16le-bom.xml -------------------------------------------------------------------------------- /tests/documents/encoding/utf16le.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0"?> 2 | <project name="project-name"> 3 | </project> 4 | -------------------------------------------------------------------------------- /tests/documents/encoding/utf8-bom.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/documents/encoding/utf8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1250.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1250.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1251.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1251.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1252.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1252.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1253.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1253.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1254.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1254.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1255.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1255.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1256.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1256.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1257.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1257.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-1258.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1258.xml -------------------------------------------------------------------------------- /tests/documents/encoding/windows-874.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-874.xml -------------------------------------------------------------------------------- /tests/documents/encoding/x-mac-cyrillic.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/x-mac-cyrillic.xml -------------------------------------------------------------------------------- /tests/documents/encoding/x-user-defined.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/x-user-defined.xml -------------------------------------------------------------------------------- /tests/documents/html5.html: -------------------------------------------------------------------------------- 1 | 2 | Hey 3 |   4 | -------------------------------------------------------------------------------- /tests/documents/html5.txt: -------------------------------------------------------------------------------- 1 | DocType(html) 2 | Characters( 3 | ) 4 | StartElement(a, attr-error: position 7: attribute value must be enclosed in `"` or `'`) 5 | Characters(Hey) 6 | EndElement(a) 7 | Characters( 8 | ) 9 | Reference(nbsp) 10 | Characters( 11 | ) 12 | EndDocument 13 | -------------------------------------------------------------------------------- /tests/documents/linescore.xml: -------------------------------------------------------------------------------- 1 | 2 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 89 | 98 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /tests/documents/opennews_all.rss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/opennews_all.rss -------------------------------------------------------------------------------- /tests/documents/rpm_primary2.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | complex-package 6 | x86_64 7 | 8 | bbb7b0e9350a0f75b923bdd0ef4f9af39765c668a3e70bfd3486ea9f0f618aaf 9 | A package for exercising many different features of RPM metadata 10 | Complex package 11 | Michael Bluth 12 | http://bobloblaw.com 13 | 65 | 66 | rpm-empty 67 | x86_64 68 | 69 | 90fbba546300f507473547f33e229ee7bad94bbbe6e84b21d485e8e43b5f1132 70 | "" 71 | 72 | 73 | 74 | 90 | 91 | rpm-with-invalid-chars 92 | noarch 93 | 94 | 64f1444f8e86a9ae6accdc2c4b12cb4a87fb2414c0998df461a8623a52eb3cc4 95 | An RPM file with invalid characters in its description. 96 | This RPM that contains XML-illegal characters such as ampersand & and less-than < greater-than > in its </description>. 97 | These must be escaped in the final XML metadata. The XML spec does not strictly require escaping 'single' or "double" quotes 98 | within text content, and not all XML libraries do so. However, it is generally recommended. 99 | 100 | https://github.com/dralley/rpmrepo_rs/ 101 | 116 | 117 | rpm-with-non-ascii 118 | noarch 119 | 120 | 957de8a966af8fe8e55102489099d8b20bbecc23954c8c2bd88fb59625260393 121 | An RPM file with non-ascii characters in its metadata. 122 | This file contains unicode characters and should be encoded as UTF-8. The 123 | following code points are all outside the "Basic Latin (ASCII)" code point 124 | block: 125 | 126 | * U+0080: € 127 | * U+0100: Ā 128 | * U+0180: ƀ 129 | * U+0250: ɐ 130 | * U+02B0: ʰ 131 | * U+0041 0x0300: À 132 | * U+0370: Ͱ 133 | 134 | See: http://www.unicode.org/charts/ 135 | 136 | https://github.com/dralley/rpmrepo_rs/ 137 | 152 | 153 | -------------------------------------------------------------------------------- /tests/documents/sample_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Some <java> class 11 | 12 | 13 | Another "java" class 14 | 15 | 16 | Weird 'XML' config 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | JavaScript & program 27 | 28 | 29 | Cascading style sheet: © - ҉ 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /tests/documents/sample_ns.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | H 6 | N 7 | 8 | 9 | Name 10 | Another name 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tests/documents/test_writer.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/documents/test_writer_indent.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | This is a simple test document to demonstrate the DocumentLoader example! 41 | This is a simple test document to demonstrate the DocumentLoader example! 42 | This is a simple test document to demonstrate the DocumentLoader example! 43 | This is a simple test document to demonstrate the DocumentLoader example! 44 | This is a simple test document to demonstrate the DocumentLoader example! 45 | 46 | 47 | -------------------------------------------------------------------------------- /tests/documents/test_writer_indent_cdata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /tests/escape.rs: -------------------------------------------------------------------------------- 1 | use pretty_assertions::assert_eq; 2 | use quick_xml::escape::{self, EscapeError, ParseCharRefError}; 3 | use std::borrow::Cow; 4 | use std::num::IntErrorKind; 5 | 6 | #[test] 7 | fn escape() { 8 | let unchanged = escape::escape("test"); 9 | // assert_eq does not check that Cow is borrowed, but we explicitly use Cow 10 | // because it influences diff 11 | // TODO: use assert_matches! when stabilized and other features will bump MSRV 12 | assert_eq!(unchanged, Cow::Borrowed("test")); 13 | assert!(matches!(unchanged, Cow::Borrowed(_))); 14 | 15 | assert_eq!(escape::escape("<&\"'>"), "<&"'>"); 16 | assert_eq!(escape::escape(""), "<test>"); 17 | assert_eq!(escape::escape("\"a\"bc"), ""a"bc"); 18 | assert_eq!(escape::escape("\"a\"b&c"), ""a"b&c"); 19 | assert_eq!( 20 | escape::escape("prefix_\"a\"b&<>c"), 21 | "prefix_"a"b&<>c" 22 | ); 23 | } 24 | 25 | #[test] 26 | fn partial_escape() { 27 | let unchanged = escape::partial_escape("test"); 28 | // assert_eq does not check that Cow is borrowed, but we explicitly use Cow 29 | // because it influences diff 30 | // TODO: use assert_matches! when stabilized and other features will bump MSRV 31 | assert_eq!(unchanged, Cow::Borrowed("test")); 32 | assert!(matches!(unchanged, Cow::Borrowed(_))); 33 | 34 | assert_eq!(escape::partial_escape("<&\"'>"), "<&\"'>"); 35 | assert_eq!(escape::partial_escape(""), "<test>"); 36 | assert_eq!(escape::partial_escape("\"a\"bc"), "\"a\"bc"); 37 | assert_eq!(escape::partial_escape("\"a\"b&c"), "\"a\"b&c"); 38 | assert_eq!( 39 | escape::partial_escape("prefix_\"a\"b&<>c"), 40 | "prefix_\"a\"b&<>c" 41 | ); 42 | } 43 | 44 | #[test] 45 | fn minimal_escape() { 46 | assert_eq!(escape::minimal_escape("test"), Cow::Borrowed("test")); 47 | assert_eq!(escape::minimal_escape("<&\"'>"), "<&\"'>"); 48 | assert_eq!(escape::minimal_escape(""), "<test>"); 49 | assert_eq!(escape::minimal_escape("\"a\"bc"), "\"a\"bc"); 50 | assert_eq!(escape::minimal_escape("\"a\"b&c"), "\"a\"b&c"); 51 | assert_eq!( 52 | escape::minimal_escape("prefix_\"a\"b&<>c"), 53 | "prefix_\"a\"b&<>c" 54 | ); 55 | } 56 | 57 | #[test] 58 | fn unescape() { 59 | let unchanged = escape::unescape("test"); 60 | // assert_eq does not check that Cow is borrowed, but we explicitly use Cow 61 | // because it influences diff 62 | // TODO: use assert_matches! when stabilized and other features will bump MSRV 63 | assert_eq!(unchanged, Ok(Cow::Borrowed("test"))); 64 | assert!(matches!(unchanged, Ok(Cow::Borrowed(_)))); 65 | 66 | assert_eq!( 67 | escape::unescape("<&test'">"), 68 | Ok("<&test'\">".into()) 69 | ); 70 | assert_eq!(escape::unescape("0"), Ok("0".into())); 71 | assert_eq!(escape::unescape("0"), Ok("0".into())); 72 | assert_eq!( 73 | escape::unescape("&foo;"), 74 | Err(EscapeError::UnrecognizedEntity(1..4, "foo".into())) 75 | ); 76 | } 77 | 78 | /// XML allows any number of leading zeroes. That is not explicitly mentioned 79 | /// in the specification, but enforced by the conformance test suite 80 | /// (https://www.w3.org/XML/Test/) 81 | /// 100 digits should be enough to ensure that any artificial restrictions 82 | /// (such as maximal string of u128 representation) does not applied 83 | #[test] 84 | fn unescape_long() { 85 | assert_eq!( 86 | escape::unescape("0"), 87 | Ok("0".into()), 88 | ); 89 | assert_eq!( 90 | escape::unescape("0"), 91 | Ok("0".into()), 92 | ); 93 | 94 | // Too big numbers for u32 should produce errors 95 | match escape::unescape(&format!("&#{};", u32::MAX as u64 + 1)) { 96 | Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { 97 | assert_eq!(err.kind(), &IntErrorKind::PosOverflow) 98 | } 99 | x => panic!( 100 | "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", 101 | x 102 | ), 103 | } 104 | match escape::unescape(&format!("&#x{:x};", u32::MAX as u64 + 1)) { 105 | Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { 106 | assert_eq!(err.kind(), &IntErrorKind::PosOverflow) 107 | } 108 | x => panic!( 109 | "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", 110 | x 111 | ), 112 | } 113 | } 114 | 115 | #[test] 116 | fn unescape_sign() { 117 | assert_eq!( 118 | escape::unescape("&#+48;"), 119 | Err(EscapeError::InvalidCharRef( 120 | ParseCharRefError::UnexpectedSign 121 | )), 122 | ); 123 | assert_eq!( 124 | escape::unescape("&#x+30;"), 125 | Err(EscapeError::InvalidCharRef( 126 | ParseCharRefError::UnexpectedSign 127 | )), 128 | ); 129 | 130 | assert_eq!( 131 | escape::unescape("&#-48;"), 132 | Err(EscapeError::InvalidCharRef( 133 | ParseCharRefError::UnexpectedSign 134 | )), 135 | ); 136 | assert_eq!( 137 | escape::unescape("&#x-30;"), 138 | Err(EscapeError::InvalidCharRef( 139 | ParseCharRefError::UnexpectedSign 140 | )), 141 | ); 142 | } 143 | 144 | #[test] 145 | fn unescape_with() { 146 | let custom_entities = |ent: &str| match ent { 147 | "foo" => Some("BAR"), 148 | _ => None, 149 | }; 150 | 151 | let unchanged = escape::unescape_with("test", custom_entities); 152 | // assert_eq does not check that Cow is borrowed, but we explicitly use Cow 153 | // because it influences diff 154 | // TODO: use assert_matches! when stabilized and other features will bump MSRV 155 | assert_eq!(unchanged, Ok(Cow::Borrowed("test"))); 156 | assert!(matches!(unchanged, Ok(Cow::Borrowed(_)))); 157 | 158 | assert_eq!( 159 | escape::unescape_with("<", custom_entities), 160 | Err(EscapeError::UnrecognizedEntity(1..3, "lt".into())), 161 | ); 162 | assert_eq!( 163 | escape::unescape_with("0", custom_entities), 164 | Ok("0".into()) 165 | ); 166 | assert_eq!( 167 | escape::unescape_with("0", custom_entities), 168 | Ok("0".into()) 169 | ); 170 | assert_eq!( 171 | escape::unescape_with("&foo;", custom_entities), 172 | Ok("BAR".into()) 173 | ); 174 | assert_eq!( 175 | escape::unescape_with("&fop;", custom_entities), 176 | Err(EscapeError::UnrecognizedEntity(1..4, "fop".into())) 177 | ); 178 | } 179 | 180 | /// XML allows any number of leading zeroes. That is not explicitly mentioned 181 | /// in the specification, but enforced by the conformance test suite 182 | /// (https://www.w3.org/XML/Test/) 183 | /// 100 digits should be enough to ensure that any artificial restrictions 184 | /// (such as maximal string of u128 representation) does not applied 185 | #[test] 186 | fn unescape_with_long() { 187 | assert_eq!( 188 | escape::unescape_with("0", |_| None), 189 | Ok("0".into()), 190 | ); 191 | assert_eq!( 192 | escape::unescape_with("0", |_| None), 193 | Ok("0".into()), 194 | ); 195 | 196 | // Too big numbers for u32 should produce errors 197 | match escape::unescape_with(&format!("&#{};", u32::MAX as u64 + 1), |_| None) { 198 | Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { 199 | assert_eq!(err.kind(), &IntErrorKind::PosOverflow) 200 | } 201 | x => panic!( 202 | "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", 203 | x 204 | ), 205 | } 206 | match escape::unescape_with(&format!("&#x{:x};", u32::MAX as u64 + 1), |_| None) { 207 | Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { 208 | assert_eq!(err.kind(), &IntErrorKind::PosOverflow) 209 | } 210 | x => panic!( 211 | "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", 212 | x 213 | ), 214 | } 215 | } 216 | 217 | #[test] 218 | fn unescape_with_sign() { 219 | assert_eq!( 220 | escape::unescape_with("&#+48;", |_| None), 221 | Err(EscapeError::InvalidCharRef( 222 | ParseCharRefError::UnexpectedSign 223 | )), 224 | ); 225 | assert_eq!( 226 | escape::unescape_with("&#x+30;", |_| None), 227 | Err(EscapeError::InvalidCharRef( 228 | ParseCharRefError::UnexpectedSign 229 | )), 230 | ); 231 | 232 | assert_eq!( 233 | escape::unescape_with("&#-48;", |_| None), 234 | Err(EscapeError::InvalidCharRef( 235 | ParseCharRefError::UnexpectedSign 236 | )), 237 | ); 238 | assert_eq!( 239 | escape::unescape_with("&#x-30;", |_| None), 240 | Err(EscapeError::InvalidCharRef( 241 | ParseCharRefError::UnexpectedSign 242 | )), 243 | ); 244 | } 245 | -------------------------------------------------------------------------------- /tests/fuzzing.rs: -------------------------------------------------------------------------------- 1 | //! Cases that was found by fuzzing 2 | 3 | use quick_xml::errors::{Error, IllFormedError}; 4 | use quick_xml::events::Event; 5 | use quick_xml::reader::Reader; 6 | 7 | #[test] 8 | fn fuzz_53() { 9 | let data: &[u8] = b"\xe9\x00\x00\x00\x00\x00\x00\x00\x00\ 10 | \x00\x00\x00\x00\n(\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\ 11 | \x00<>\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<<\x00\x00\x00"; 12 | let mut reader = Reader::from_reader(data); 13 | let mut buf = vec![]; 14 | loop { 15 | match reader.read_event_into(&mut buf) { 16 | Ok(Event::Eof) | Err(..) => break, 17 | _ => buf.clear(), 18 | } 19 | } 20 | } 21 | 22 | #[test] 23 | fn fuzz_101() { 24 | let data: &[u8] = b"\x00\x00<\x00\x00\x0a>�?#\x0a413518\ 25 | #\x0a\x0a\x0a;<:<)(<:\x0a\x0a\x0a\x0a;<:\x0a\x0a\ 26 | <:\x0a\x0a\x0a\x0a\x0a<\x00*\x00\x00\x00\x00"; 27 | let mut reader = Reader::from_reader(data); 28 | let mut buf = vec![]; 29 | loop { 30 | match reader.read_event_into(&mut buf) { 31 | Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { 32 | for a in e.attributes() { 33 | if a.ok().map_or(true, |a| { 34 | a.decode_and_unescape_value(reader.decoder()).is_err() 35 | }) { 36 | break; 37 | } 38 | } 39 | } 40 | Ok(Event::Text(e)) => { 41 | if e.decode().is_err() { 42 | break; 43 | } 44 | } 45 | Ok(Event::Eof) | Err(..) => break, 46 | _ => (), 47 | } 48 | buf.clear(); 49 | } 50 | } 51 | 52 | #[test] 53 | fn fuzz_empty_doctype() { 54 | let data: &[u8] = b""; 55 | let mut reader = Reader::from_reader(data); 56 | let mut buf = Vec::new(); 57 | assert!(matches!( 58 | reader.read_event_into(&mut buf).unwrap_err(), 59 | Error::IllFormed(IllFormedError::MissingDoctypeName) 60 | )); 61 | assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); 62 | } 63 | -------------------------------------------------------------------------------- /tests/helpers/mod.rs: -------------------------------------------------------------------------------- 1 | //! Utility functions for integration tests 2 | 3 | /// Tests for https://github.com/tafia/quick-xml/issues/469 4 | /// Exported to reuse in `async-tokio` tests. 5 | #[macro_export] 6 | macro_rules! small_buffers_tests { 7 | ( 8 | #[$test:meta] 9 | $read_event:ident: $BufReader:ty 10 | $(, $async:ident, $await:ident)? 11 | ) => { 12 | mod small_buffers { 13 | use quick_xml::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event}; 14 | use quick_xml::reader::Reader; 15 | use pretty_assertions::assert_eq; 16 | 17 | #[$test] 18 | $($async)? fn decl() { 19 | let xml = ""; 20 | // ^^^^^^^ data that fit into buffer 21 | let size = xml.match_indices("?>").next().unwrap().0 + 1; 22 | let br = <$BufReader>::with_capacity(size, xml.as_bytes()); 23 | let mut reader = Reader::from_reader(br); 24 | let mut buf = Vec::new(); 25 | 26 | assert_eq!( 27 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 28 | Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) 29 | ); 30 | assert_eq!( 31 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 32 | Event::Eof 33 | ); 34 | } 35 | 36 | #[$test] 37 | $($async)? fn pi() { 38 | let xml = ""; 39 | // ^^^^^ data that fit into buffer 40 | let size = xml.match_indices("?>").next().unwrap().0 + 1; 41 | let br = <$BufReader>::with_capacity(size, xml.as_bytes()); 42 | let mut reader = Reader::from_reader(br); 43 | let mut buf = Vec::new(); 44 | 45 | assert_eq!( 46 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 47 | Event::PI(BytesPI::new("pi")) 48 | ); 49 | assert_eq!( 50 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 51 | Event::Eof 52 | ); 53 | } 54 | 55 | #[$test] 56 | $($async)? fn empty() { 57 | let xml = ""; 58 | // ^^^^^^^ data that fit into buffer 59 | let size = xml.match_indices("/>").next().unwrap().0 + 1; 60 | let br = <$BufReader>::with_capacity(size, xml.as_bytes()); 61 | let mut reader = Reader::from_reader(br); 62 | let mut buf = Vec::new(); 63 | 64 | assert_eq!( 65 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 66 | Event::Empty(BytesStart::new("empty")) 67 | ); 68 | assert_eq!( 69 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 70 | Event::Eof 71 | ); 72 | } 73 | 74 | #[$test] 75 | $($async)? fn cdata1() { 76 | let xml = ""; 77 | // ^^^^^^^^^^^^^^^ data that fit into buffer 78 | let size = xml.match_indices("]]>").next().unwrap().0 + 1; 79 | let br = <$BufReader>::with_capacity(size, xml.as_bytes()); 80 | let mut reader = Reader::from_reader(br); 81 | let mut buf = Vec::new(); 82 | 83 | assert_eq!( 84 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 85 | Event::CData(BytesCData::new("cdata")) 86 | ); 87 | assert_eq!( 88 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 89 | Event::Eof 90 | ); 91 | } 92 | 93 | #[$test] 94 | $($async)? fn cdata2() { 95 | let xml = ""; 96 | // ^^^^^^^^^^^^^^^^ data that fit into buffer 97 | let size = xml.match_indices("]]>").next().unwrap().0 + 2; 98 | let br = <$BufReader>::with_capacity(size, xml.as_bytes()); 99 | let mut reader = Reader::from_reader(br); 100 | let mut buf = Vec::new(); 101 | 102 | assert_eq!( 103 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 104 | Event::CData(BytesCData::new("cdata")) 105 | ); 106 | assert_eq!( 107 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 108 | Event::Eof 109 | ); 110 | } 111 | 112 | #[$test] 113 | $($async)? fn comment1() { 114 | let xml = ""; 115 | // ^^^^^^^^^^^^ data that fit into buffer 116 | let size = xml.match_indices("-->").next().unwrap().0 + 1; 117 | let br = <$BufReader>::with_capacity(size, xml.as_bytes()); 118 | let mut reader = Reader::from_reader(br); 119 | let mut buf = Vec::new(); 120 | 121 | assert_eq!( 122 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 123 | Event::Comment(BytesText::new("comment")) 124 | ); 125 | assert_eq!( 126 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 127 | Event::Eof 128 | ); 129 | } 130 | 131 | #[$test] 132 | $($async)? fn comment2() { 133 | let xml = ""; 134 | // ^^^^^^^^^^^^^ data that fit into buffer 135 | let size = xml.match_indices("-->").next().unwrap().0 + 2; 136 | let br = <$BufReader>::with_capacity(size, xml.as_bytes()); 137 | let mut reader = Reader::from_reader(br); 138 | let mut buf = Vec::new(); 139 | 140 | assert_eq!( 141 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 142 | Event::Comment(BytesText::new("comment")) 143 | ); 144 | assert_eq!( 145 | reader.$read_event(&mut buf) $(.$await)? .unwrap(), 146 | Event::Eof 147 | ); 148 | } 149 | } 150 | }; 151 | } 152 | -------------------------------------------------------------------------------- /tests/html.rs: -------------------------------------------------------------------------------- 1 | use pretty_assertions::assert_eq; 2 | use quick_xml::encoding::Decoder; 3 | use quick_xml::escape::unescape; 4 | use quick_xml::events::{BytesStart, Event}; 5 | use quick_xml::name::{QName, ResolveResult}; 6 | use quick_xml::reader::NsReader; 7 | use std::str::from_utf8; 8 | 9 | #[test] 10 | fn html5() { 11 | test( 12 | include_str!("documents/html5.html"), 13 | include_str!("documents/html5.txt"), 14 | false, 15 | ); 16 | } 17 | 18 | #[test] 19 | fn escaped_characters_html() { 20 | test( 21 | r#"╔╗╔╗╔╗"#, 22 | r#" 23 | |StartElement(e [attr="ℏÈℓ𝕝⨀"]) 24 | |Reference(boxDR) 25 | |Reference(boxDL) 26 | |Reference(#x02554) 27 | |Reference(#x02557) 28 | |Reference(#9556) 29 | |Reference(#9559) 30 | |EndElement(e) 31 | |EndDocument 32 | "#, 33 | true, 34 | ) 35 | } 36 | 37 | #[track_caller] 38 | fn test(input: &str, output: &str, trim: bool) { 39 | test_bytes(input.as_bytes(), output.as_bytes(), trim); 40 | } 41 | 42 | #[track_caller] 43 | fn test_bytes(input: &[u8], output: &[u8], trim: bool) { 44 | let mut reader = NsReader::from_reader(input); 45 | let config = reader.config_mut(); 46 | config.trim_text(trim); 47 | config.check_comments = true; 48 | 49 | let mut spec_lines = SpecIter(output).enumerate(); 50 | 51 | let mut decoder = reader.decoder(); 52 | loop { 53 | let line = match reader.read_resolved_event() { 54 | Ok((_, Event::Decl(e))) => { 55 | // Declaration could change decoder 56 | decoder = reader.decoder(); 57 | 58 | let version_cow = e.version().unwrap(); 59 | let version = decoder.decode(version_cow.as_ref()).unwrap(); 60 | let encoding_cow = e.encoding().unwrap().unwrap(); 61 | let encoding = decoder.decode(encoding_cow.as_ref()).unwrap(); 62 | format!("StartDocument({}, {})", version, encoding) 63 | } 64 | Ok((_, Event::PI(e))) => { 65 | format!("ProcessingInstruction(PI={})", decoder.decode(&e).unwrap()) 66 | } 67 | Ok((_, Event::DocType(e))) => format!("DocType({})", decoder.decode(&e).unwrap()), 68 | Ok((n, Event::Start(e))) => { 69 | let name = namespace_name(n, e.name(), decoder); 70 | match make_attrs(&e, decoder) { 71 | Ok(attrs) if attrs.is_empty() => format!("StartElement({})", &name), 72 | Ok(attrs) => format!("StartElement({} [{}])", &name, &attrs), 73 | Err(e) => format!("StartElement({}, attr-error: {})", &name, &e), 74 | } 75 | } 76 | Ok((n, Event::Empty(e))) => { 77 | let name = namespace_name(n, e.name(), decoder); 78 | match make_attrs(&e, decoder) { 79 | Ok(attrs) if attrs.is_empty() => format!("EmptyElement({})", &name), 80 | Ok(attrs) => format!("EmptyElement({} [{}])", &name, &attrs), 81 | Err(e) => format!("EmptyElement({}, attr-error: {})", &name, &e), 82 | } 83 | } 84 | Ok((n, Event::End(e))) => { 85 | let name = namespace_name(n, e.name(), decoder); 86 | format!("EndElement({})", name) 87 | } 88 | Ok((_, Event::Comment(e))) => format!("Comment({})", decoder.decode(&e).unwrap()), 89 | Ok((_, Event::CData(e))) => format!("CData({})", decoder.decode(&e).unwrap()), 90 | Ok((_, Event::Text(e))) => match unescape(&decoder.decode(&e).unwrap()) { 91 | Ok(c) => format!("Characters({})", &c), 92 | Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err), 93 | }, 94 | Ok((_, Event::GeneralRef(e))) => match unescape(&decoder.decode(&e).unwrap()) { 95 | Ok(c) => format!("Reference({})", &c), 96 | Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err), 97 | }, 98 | Ok((_, Event::Eof)) => "EndDocument".to_string(), 99 | Err(e) => format!("Error: {}", e), 100 | }; 101 | if let Some((n, spec)) = spec_lines.next() { 102 | if spec.trim() == "EndDocument" { 103 | break; 104 | } 105 | assert_eq!( 106 | line.trim(), 107 | spec.trim(), 108 | "Unexpected event at line {}", 109 | n + 1 110 | ); 111 | } else { 112 | if line == "EndDocument" { 113 | break; 114 | } 115 | panic!("Unexpected event: {}", line); 116 | } 117 | } 118 | } 119 | 120 | fn namespace_name(n: ResolveResult, name: QName, decoder: Decoder) -> String { 121 | let name = decoder.decode(name.as_ref()).unwrap(); 122 | match n { 123 | // Produces string '{namespace}prefixed_name' 124 | ResolveResult::Bound(n) => format!("{{{}}}{}", decoder.decode(n.as_ref()).unwrap(), name), 125 | _ => name.to_string(), 126 | } 127 | } 128 | 129 | fn make_attrs(e: &BytesStart, decoder: Decoder) -> ::std::result::Result { 130 | let mut atts = Vec::new(); 131 | for a in e.attributes() { 132 | match a { 133 | Ok(a) => { 134 | if a.key.as_namespace_binding().is_none() { 135 | let key = decoder.decode(a.key.as_ref()).unwrap(); 136 | let value = decoder.decode(a.value.as_ref()).unwrap(); 137 | let unescaped_value = unescape(&value).unwrap(); 138 | atts.push(format!( 139 | "{}=\"{}\"", 140 | key, 141 | // unescape does not change validity of an UTF-8 string 142 | &unescaped_value 143 | )); 144 | } 145 | } 146 | Err(e) => return Err(e.to_string()), 147 | } 148 | } 149 | Ok(atts.join(", ")) 150 | } 151 | 152 | struct SpecIter<'a>(&'a [u8]); 153 | 154 | impl<'a> Iterator for SpecIter<'a> { 155 | type Item = &'a str; 156 | fn next(&mut self) -> Option<&'a str> { 157 | let start = self 158 | .0 159 | .iter() 160 | .position(|b| !matches!(*b, b' ' | b'\r' | b'\n' | b'\t' | b'|' | b':' | b'0'..=b'9')) 161 | .unwrap_or(0); 162 | 163 | if let Some(p) = self.0.windows(3).position(|w| w == b")\r\n") { 164 | let (prev, next) = self.0.split_at(p + 1); 165 | self.0 = &next[1..]; 166 | Some(from_utf8(&prev[start..]).expect("Error decoding to uft8")) 167 | } else if let Some(p) = self.0.windows(2).position(|w| w == b")\n") { 168 | let (prev, next) = self.0.split_at(p + 1); 169 | self.0 = next; 170 | Some(from_utf8(&prev[start..]).expect("Error decoding to uft8")) 171 | } else if self.0.is_empty() { 172 | None 173 | } else { 174 | let p = self.0; 175 | self.0 = &[]; 176 | Some(from_utf8(&p[start..]).unwrap()) 177 | } 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /tests/reader-attributes.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | use quick_xml::events::attributes::Attribute; 4 | use quick_xml::events::{BytesEnd, Event::*}; 5 | use quick_xml::name::QName; 6 | use quick_xml::reader::Reader; 7 | 8 | use pretty_assertions::assert_eq; 9 | 10 | #[test] 11 | fn single_gt() { 12 | let mut reader = Reader::from_str(""); 13 | match reader.read_event() { 14 | Ok(Start(e)) => { 15 | let mut attrs = e.attributes(); 16 | assert_eq!( 17 | attrs.next(), 18 | Some(Ok(Attribute { 19 | key: QName(b"attr"), 20 | value: Cow::Borrowed(b">"), 21 | })) 22 | ); 23 | assert_eq!( 24 | attrs.next(), 25 | Some(Ok(Attribute { 26 | key: QName(b"check"), 27 | value: Cow::Borrowed(b"2"), 28 | })) 29 | ); 30 | assert_eq!(attrs.next(), None); 31 | } 32 | x => panic!("expected , got {:?}", x), 33 | } 34 | assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); 35 | } 36 | 37 | #[test] 38 | fn single_gt_quot() { 39 | let mut reader = Reader::from_str(r#""#); 40 | match reader.read_event() { 41 | Ok(Start(e)) => { 42 | let mut attrs = e.attributes(); 43 | assert_eq!( 44 | attrs.next(), 45 | Some(Ok(Attribute { 46 | key: QName(b"attr"), 47 | value: Cow::Borrowed(br#"">""#), 48 | })) 49 | ); 50 | assert_eq!( 51 | attrs.next(), 52 | Some(Ok(Attribute { 53 | key: QName(b"check"), 54 | value: Cow::Borrowed(br#""2""#), 55 | })) 56 | ); 57 | assert_eq!(attrs.next(), None); 58 | } 59 | x => panic!("expected , got {:?}", x), 60 | } 61 | assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); 62 | } 63 | 64 | #[test] 65 | fn double_gt() { 66 | let mut reader = Reader::from_str(r#""#); 67 | match reader.read_event() { 68 | Ok(Start(e)) => { 69 | let mut attrs = e.attributes(); 70 | assert_eq!( 71 | attrs.next(), 72 | Some(Ok(Attribute { 73 | key: QName(b"attr"), 74 | value: Cow::Borrowed(b">"), 75 | })) 76 | ); 77 | assert_eq!( 78 | attrs.next(), 79 | Some(Ok(Attribute { 80 | key: QName(b"check"), 81 | value: Cow::Borrowed(b"2"), 82 | })) 83 | ); 84 | assert_eq!(attrs.next(), None); 85 | } 86 | x => panic!("expected , got {:?}", x), 87 | } 88 | assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); 89 | } 90 | 91 | #[test] 92 | fn double_gt_apos() { 93 | let mut reader = Reader::from_str(r#""#); 94 | match reader.read_event() { 95 | Ok(Start(e)) => { 96 | let mut attrs = e.attributes(); 97 | assert_eq!( 98 | attrs.next(), 99 | Some(Ok(Attribute { 100 | key: QName(b"attr"), 101 | value: Cow::Borrowed(b"'>'"), 102 | })) 103 | ); 104 | assert_eq!( 105 | attrs.next(), 106 | Some(Ok(Attribute { 107 | key: QName(b"check"), 108 | value: Cow::Borrowed(b"'2'"), 109 | })) 110 | ); 111 | assert_eq!(attrs.next(), None); 112 | } 113 | x => panic!("expected , got {:?}", x), 114 | } 115 | assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a"))); 116 | } 117 | 118 | #[test] 119 | fn empty_tag() { 120 | let mut reader = Reader::from_str(""); 121 | match reader.read_event() { 122 | Ok(Empty(e)) => { 123 | let mut attrs = e.attributes(); 124 | assert_eq!( 125 | attrs.next(), 126 | Some(Ok(Attribute { 127 | key: QName(b"att1"), 128 | value: Cow::Borrowed(b"a"), 129 | })) 130 | ); 131 | assert_eq!( 132 | attrs.next(), 133 | Some(Ok(Attribute { 134 | key: QName(b"att2"), 135 | value: Cow::Borrowed(b"b"), 136 | })) 137 | ); 138 | assert_eq!(attrs.next(), None); 139 | } 140 | e => panic!("Expecting Empty event, got {:?}", e), 141 | } 142 | } 143 | 144 | #[test] 145 | fn equal_sign_in_value() { 146 | let mut reader = Reader::from_str(""); 147 | match reader.read_event() { 148 | Ok(Empty(e)) => { 149 | let mut attrs = e.attributes(); 150 | assert_eq!( 151 | attrs.next(), 152 | Some(Ok(Attribute { 153 | key: QName(b"att1"), 154 | value: Cow::Borrowed(b"a=b"), 155 | })) 156 | ); 157 | assert_eq!(attrs.next(), None); 158 | } 159 | e => panic!("Expecting Empty event, got {:?}", e), 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /tests/roundtrip.rs: -------------------------------------------------------------------------------- 1 | //! Contains tests that checks that writing events from a reader produces the same documents. 2 | 3 | use quick_xml::events::attributes::AttrError; 4 | use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*}; 5 | use quick_xml::reader::Reader; 6 | use quick_xml::writer::Writer; 7 | 8 | use pretty_assertions::assert_eq; 9 | 10 | mod events { 11 | use super::*; 12 | use pretty_assertions::assert_eq; 13 | 14 | /// Test start and end together because reading only end event requires special 15 | /// setting on the reader 16 | #[test] 17 | fn start_end() { 18 | let input = r#""#; 19 | let mut reader = Reader::from_str(input); 20 | let mut writer = Writer::new(Vec::new()); 21 | loop { 22 | match reader.read_event().unwrap() { 23 | Eof => break, 24 | e => assert!(writer.write_event(e).is_ok()), 25 | } 26 | } 27 | 28 | let result = writer.into_inner(); 29 | assert_eq!(String::from_utf8(result).unwrap(), input); 30 | } 31 | 32 | #[test] 33 | fn empty() { 34 | let input = r#""#; 35 | let mut reader = Reader::from_str(input); 36 | let mut writer = Writer::new(Vec::new()); 37 | loop { 38 | match reader.read_event().unwrap() { 39 | Eof => break, 40 | e => assert!(writer.write_event(e).is_ok()), 41 | } 42 | } 43 | 44 | let result = writer.into_inner(); 45 | assert_eq!(String::from_utf8(result).unwrap(), input); 46 | } 47 | 48 | #[test] 49 | fn text() { 50 | let input = "it is just arbitrary text & some character reference"; 51 | let mut reader = Reader::from_str(input); 52 | let mut writer = Writer::new(Vec::new()); 53 | loop { 54 | match reader.read_event().unwrap() { 55 | Eof => break, 56 | e => assert!(writer.write_event(e).is_ok()), 57 | } 58 | } 59 | 60 | let result = writer.into_inner(); 61 | assert_eq!(String::from_utf8(result).unwrap(), input); 62 | } 63 | 64 | #[test] 65 | fn cdata() { 66 | let input = ""; 67 | let mut reader = Reader::from_str(input); 68 | let mut writer = Writer::new(Vec::new()); 69 | loop { 70 | match reader.read_event().unwrap() { 71 | Eof => break, 72 | e => assert!(writer.write_event(e).is_ok()), 73 | } 74 | } 75 | 76 | let result = writer.into_inner(); 77 | assert_eq!(String::from_utf8(result).unwrap(), input); 78 | } 79 | 80 | #[test] 81 | fn pi() { 82 | let input = ""; 83 | let mut reader = Reader::from_str(input); 84 | let mut writer = Writer::new(Vec::new()); 85 | loop { 86 | match reader.read_event().unwrap() { 87 | Eof => break, 88 | e => assert!(writer.write_event(e).is_ok()), 89 | } 90 | } 91 | 92 | let result = writer.into_inner(); 93 | assert_eq!(String::from_utf8(result).unwrap(), input); 94 | } 95 | 96 | #[test] 97 | fn decl() { 98 | let input = ""; 99 | let mut reader = Reader::from_str(input); 100 | let mut writer = Writer::new(Vec::new()); 101 | loop { 102 | match reader.read_event().unwrap() { 103 | Eof => break, 104 | e => assert!(writer.write_event(e).is_ok()), 105 | } 106 | } 107 | 108 | let result = writer.into_inner(); 109 | assert_eq!(String::from_utf8(result).unwrap(), input); 110 | } 111 | 112 | #[test] 113 | fn comment() { 114 | let input = ""; 115 | let mut reader = Reader::from_str(input); 116 | let mut writer = Writer::new(Vec::new()); 117 | loop { 118 | match reader.read_event().unwrap() { 119 | Eof => break, 120 | e => assert!(writer.write_event(e).is_ok()), 121 | } 122 | } 123 | 124 | let result = writer.into_inner(); 125 | assert_eq!(String::from_utf8(result).unwrap(), input); 126 | } 127 | } 128 | 129 | /// Indent of the last tag mismatched intentionally 130 | const XML: &str = r#" 131 | 132 |
133 |
134 |
135 |
data <escaped>
136 |
137 | "#; 138 | 139 | /// Directly write event from reader without any processing. 140 | #[test] 141 | fn simple() { 142 | let mut reader = Reader::from_str(XML); 143 | let mut writer = Writer::new(Vec::new()); 144 | loop { 145 | match reader.read_event().unwrap() { 146 | Eof => break, 147 | e => assert!(writer.write_event(e).is_ok()), 148 | } 149 | } 150 | 151 | let result = writer.into_inner(); 152 | assert_eq!(String::from_utf8(result).unwrap(), XML); 153 | } 154 | 155 | /// Directly write event from reader without processing (except auto-trimming text). 156 | #[test] 157 | fn with_trim() { 158 | let input = include_str!("documents/test_writer.xml").trim(); 159 | let mut reader = Reader::from_str(input); 160 | reader.config_mut().trim_text(true); 161 | let mut writer = Writer::new(Vec::new()); 162 | loop { 163 | match reader.read_event().unwrap() { 164 | Eof => break, 165 | e => assert!(writer.write_event(e).is_ok()), 166 | } 167 | } 168 | 169 | let result = writer.into_inner(); 170 | assert_eq!(String::from_utf8(result).unwrap(), input); 171 | } 172 | 173 | /// Directly write reference to event from reader without processing (except auto-trimming text). 174 | #[test] 175 | fn with_trim_ref() { 176 | let input = include_str!("documents/test_writer.xml").trim(); 177 | let mut reader = Reader::from_str(input); 178 | reader.config_mut().trim_text(true); 179 | let mut writer = Writer::new(Vec::new()); 180 | loop { 181 | match reader.read_event().unwrap() { 182 | Eof => break, 183 | e => assert!(writer.write_event(e.borrow()).is_ok()), // either `e` or `&e` 184 | } 185 | } 186 | 187 | let result = writer.into_inner(); 188 | assert_eq!(String::from_utf8(result).unwrap(), input); 189 | } 190 | 191 | /// Directly write event from reader without processing (except auto-trimming text) 192 | /// with the same indentation settings as in the original document. 193 | #[test] 194 | fn with_indent() { 195 | let input = include_str!("documents/test_writer_indent.xml"); 196 | let mut reader = Reader::from_str(input); 197 | reader.config_mut().trim_text(true); 198 | let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); 199 | loop { 200 | match reader.read_event().unwrap() { 201 | Eof => break, 202 | e => assert!(writer.write_event(e).is_ok()), 203 | } 204 | } 205 | 206 | let result = writer.into_inner(); 207 | assert_eq!(String::from_utf8(result).unwrap(), input); 208 | } 209 | 210 | /// Directly write event from reader without processing (except auto-trimming text) 211 | /// with the same indentation settings as in the original document. 212 | /// Document contains CDATA section. 213 | #[test] 214 | fn with_indent_cdata() { 215 | let input = include_str!("documents/test_writer_indent_cdata.xml"); 216 | let mut reader = Reader::from_str(input); 217 | reader.config_mut().trim_text(true); 218 | let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4); 219 | loop { 220 | match reader.read_event().unwrap() { 221 | Eof => break, 222 | e => assert!(writer.write_event(e).is_ok()), 223 | } 224 | } 225 | 226 | let result = writer.into_inner(); 227 | assert_eq!(String::from_utf8(result).unwrap(), input); 228 | } 229 | 230 | /// Directly write event from reader with unescaping and re-escaping content of the `Text` events. 231 | #[test] 232 | fn reescape_text() { 233 | let mut reader = Reader::from_str(XML); 234 | let mut writer = Writer::new(Vec::new()); 235 | loop { 236 | match reader.read_event().unwrap() { 237 | Eof => break, 238 | Text(e) => { 239 | let t = e.decode().unwrap(); 240 | assert!(writer.write_event(Text(BytesText::new(&t))).is_ok()); 241 | } 242 | e => assert!(writer.write_event(e).is_ok()), 243 | } 244 | } 245 | 246 | let result = writer.into_inner(); 247 | assert_eq!(String::from_utf8(result).unwrap(), XML); 248 | } 249 | 250 | /// Rewrite some events during processing 251 | #[test] 252 | fn partial_rewrite() { 253 | type AttrResult = std::result::Result; 254 | 255 | let str_from = r#""#; 256 | let expected = r#""#; 257 | let mut reader = Reader::from_str(str_from); 258 | let mut writer = Writer::new(Vec::new()); 259 | loop { 260 | let event = match reader.read_event().unwrap() { 261 | Eof => break, 262 | Start(elem) => { 263 | let mut attrs = elem.attributes().collect::>>().unwrap(); 264 | attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); 265 | let mut elem = BytesStart::new("copy"); 266 | elem.extend_attributes(attrs); 267 | elem.push_attribute(("x", "y\"z")); 268 | Start(elem) 269 | } 270 | End(_) => End(BytesEnd::new("copy")), 271 | e => e, 272 | }; 273 | assert!(writer.write_event(event).is_ok()); 274 | } 275 | 276 | let result = writer.into_inner(); 277 | assert_eq!(String::from_utf8(result).unwrap(), expected); 278 | } 279 | -------------------------------------------------------------------------------- /tests/serde-migrated.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use quick_xml::de::from_str; 4 | use serde::{de, ser}; 5 | use serde::{Deserialize, Serialize}; 6 | 7 | use pretty_assertions::assert_eq; 8 | 9 | #[derive(PartialEq, Debug, Serialize, Deserialize)] 10 | struct Simple { 11 | a: (), 12 | b: usize, 13 | c: String, 14 | d: Option, 15 | } 16 | 17 | #[track_caller] 18 | fn test_parse_ok<'a, T: std::fmt::Debug>(errors: &[(&'a str, T)]) 19 | where 20 | T: PartialEq + Debug + ser::Serialize + for<'de> de::Deserialize<'de>, 21 | { 22 | for (i, &(s, ref value)) in errors.iter().enumerate() { 23 | match from_str::(s) { 24 | Ok(v) => assert_eq!( 25 | v, *value, 26 | "{} error, expected: {:?}, found: {:?}", 27 | i, value, v 28 | ), 29 | Err(e) => panic!("{} error, expected {:?}, found error {}", i, value, e), 30 | } 31 | 32 | // // Make sure we can deserialize into an `Element`. 33 | // let xml_value: Element = from_str(s).unwrap(); 34 | 35 | // // Make sure we can deserialize from an `Element`. 36 | // let v: T = from_value(xml_value.clone()).unwrap(); 37 | // assert_eq!(v, *value); 38 | } 39 | } 40 | 41 | #[track_caller] 42 | fn test_parse_err<'a, T>(errors: &[&'a str]) 43 | where 44 | T: PartialEq + Debug + ser::Serialize + for<'de> de::Deserialize<'de>, 45 | { 46 | for &s in errors { 47 | assert!(from_str::(s).is_err()); 48 | } 49 | } 50 | 51 | #[test] 52 | fn test_namespaces() { 53 | #[derive(PartialEq, Serialize, Deserialize, Debug)] 54 | struct Envelope { 55 | subject: String, 56 | } 57 | let s = r#" 58 | 59 | 60 | Reference rates 61 | "#; 62 | test_parse_ok(&[( 63 | s, 64 | Envelope { 65 | subject: "Reference rates".to_string(), 66 | }, 67 | )]); 68 | } 69 | 70 | #[test] 71 | #[ignore] // FIXME 72 | fn test_forwarded_namespace() { 73 | #[derive(PartialEq, Serialize, Deserialize, Debug)] 74 | struct Graphml { 75 | #[serde(rename = "xsi:schemaLocation")] 76 | schema_location: String, 77 | } 78 | let s = r#" 79 | 80 | 84 | "#; 85 | test_parse_ok(&[( 86 | s, 87 | Graphml { 88 | schema_location: "http://graphml.graphdrawing.org/xmlns 89 | http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd" 90 | .to_string(), 91 | }, 92 | )]); 93 | } 94 | 95 | #[test] 96 | fn test_parse_string() { 97 | test_parse_ok(&[ 98 | ( 99 | "This is a String", 100 | "This is a String".to_string(), 101 | ), 102 | ("", "".to_string()), 103 | (" ", "".to_string()), 104 | ("<boom/>", "".to_string()), 105 | ("", "♫".to_string()), 106 | ("", "♫".to_string()), 107 | //( 108 | // "]]>♫", 109 | // "♫♫".to_string(), 110 | //), 111 | ]); 112 | } 113 | 114 | #[test] 115 | #[ignore] // FIXME 116 | fn test_parse_string_not_trim() { 117 | test_parse_ok(&[(" ", " ".to_string())]); 118 | } 119 | 120 | #[test] 121 | fn test_option() { 122 | test_parse_ok(&[ 123 | ("
", Some("".to_string())), 124 | ("", Some("".to_string())), 125 | (" ", Some("".to_string())), 126 | ("42", Some("42".to_string())), 127 | ]); 128 | } 129 | 130 | #[test] 131 | #[ignore] // FIXME 132 | fn test_option_not_trim() { 133 | test_parse_ok(&[(" ", Some(" ".to_string()))]); 134 | } 135 | 136 | #[test] 137 | fn test_parse_unfinished() { 138 | test_parse_err::(&[" 139 | abc 140 | 141 | 2 142 | "]); 143 | } 144 | 145 | #[test] 146 | fn test_things_qc_found() { 147 | test_parse_err::(&["<\u{0}:/"]); 148 | } 149 | -------------------------------------------------------------------------------- /tests/serde_helpers/mod.rs: -------------------------------------------------------------------------------- 1 | //! Utility functions for serde integration tests 2 | 3 | use quick_xml::de::Deserializer; 4 | use quick_xml::DeError; 5 | use serde::Deserialize; 6 | 7 | /// Deserialize an instance of type T from a string of XML text. 8 | /// If deserialization was succeeded checks that all XML events was consumed 9 | pub fn from_str<'de, T>(source: &'de str) -> Result 10 | where 11 | T: Deserialize<'de>, 12 | { 13 | // Log XML that we try to deserialize to see it in the failed tests output 14 | dbg!(source); 15 | let mut de = Deserializer::from_str(source); 16 | let result = T::deserialize(&mut de); 17 | 18 | // If type was deserialized, the whole XML document should be consumed 19 | if let Ok(_) = result { 20 | assert!(de.is_empty(), "the whole XML document should be consumed"); 21 | } 22 | 23 | result 24 | } 25 | -------------------------------------------------------------------------------- /tests/serde_roundtrip.rs: -------------------------------------------------------------------------------- 1 | use quick_xml::{de::from_str, se::to_string}; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | use pretty_assertions::assert_eq; 5 | 6 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 7 | enum Node { 8 | Boolean(bool), 9 | Identifier { value: String, index: u32 }, 10 | EOF, 11 | } 12 | 13 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 14 | struct Nodes { 15 | #[serde(rename = "$value")] 16 | items: Vec, 17 | } 18 | 19 | #[test] 20 | #[ignore] 21 | fn round_trip_list_of_enums() { 22 | // Construct some inputs 23 | let nodes = Nodes { 24 | items: vec![ 25 | Node::Boolean(true), 26 | Node::Identifier { 27 | value: "foo".to_string(), 28 | index: 5, 29 | }, 30 | Node::EOF, 31 | ], 32 | }; 33 | 34 | let should_be = r#" 35 | 36 | 37 | true 38 | 39 | 40 | foo 41 | 5 42 | 43 | 44 | "#; 45 | 46 | let serialized_nodes = to_string(&nodes).unwrap(); 47 | assert_eq!(serialized_nodes, should_be); 48 | 49 | // Then turn it back into a `Nodes` struct and make sure it's the same 50 | // as the original 51 | let deserialized_nodes: Nodes = from_str(serialized_nodes.as_str()).unwrap(); 52 | assert_eq!(deserialized_nodes, nodes); 53 | } 54 | -------------------------------------------------------------------------------- /tests/writer.rs: -------------------------------------------------------------------------------- 1 | use quick_xml::events::{ 2 | BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event::*, 3 | }; 4 | use quick_xml::writer::Writer; 5 | 6 | use pretty_assertions::assert_eq; 7 | 8 | mod declaration { 9 | use super::*; 10 | use pretty_assertions::assert_eq; 11 | 12 | /// Written: version, encoding, standalone 13 | #[test] 14 | fn full() { 15 | let mut writer = Writer::new(Vec::new()); 16 | writer 17 | .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), Some("yo")))) 18 | .expect("writing xml decl should succeed"); 19 | 20 | let result = writer.into_inner(); 21 | assert_eq!( 22 | String::from_utf8(result).expect("utf-8 output"), 23 | "", 24 | "writer output (LHS)" 25 | ); 26 | } 27 | 28 | /// Written: version, standalone 29 | #[test] 30 | fn standalone() { 31 | let mut writer = Writer::new(Vec::new()); 32 | writer 33 | .write_event(Decl(BytesDecl::new("1.2", None, Some("yo")))) 34 | .expect("writing xml decl should succeed"); 35 | 36 | let result = writer.into_inner(); 37 | assert_eq!( 38 | String::from_utf8(result).expect("utf-8 output"), 39 | "", 40 | "writer output (LHS)" 41 | ); 42 | } 43 | 44 | /// Written: version, encoding 45 | #[test] 46 | fn encoding() { 47 | let mut writer = Writer::new(Vec::new()); 48 | writer 49 | .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), None))) 50 | .expect("writing xml decl should succeed"); 51 | 52 | let result = writer.into_inner(); 53 | assert_eq!( 54 | String::from_utf8(result).expect("utf-8 output"), 55 | "", 56 | "writer output (LHS)" 57 | ); 58 | } 59 | 60 | /// Written: version 61 | #[test] 62 | fn version() { 63 | let mut writer = Writer::new(Vec::new()); 64 | writer 65 | .write_event(Decl(BytesDecl::new("1.2", None, None))) 66 | .expect("writing xml decl should succeed"); 67 | 68 | let result = writer.into_inner(); 69 | assert_eq!( 70 | String::from_utf8(result).expect("utf-8 output"), 71 | "", 72 | "writer output (LHS)" 73 | ); 74 | } 75 | 76 | /// This test ensures that empty XML declaration attribute values are not a problem. 77 | #[test] 78 | fn empty() { 79 | let mut writer = Writer::new(Vec::new()); 80 | // An empty version should arguably be an error, but we don't expect anyone to actually supply 81 | // an empty version. 82 | writer 83 | .write_event(Decl(BytesDecl::new("", Some(""), Some("")))) 84 | .expect("writing xml decl should succeed"); 85 | 86 | let result = writer.into_inner(); 87 | assert_eq!( 88 | String::from_utf8(result).expect("utf-8 output"), 89 | "", 90 | "writer output (LHS)" 91 | ); 92 | } 93 | } 94 | 95 | #[test] 96 | fn pi() { 97 | let mut writer = Writer::new(Vec::new()); 98 | writer 99 | .write_event(PI(BytesPI::new("xml-stylesheet href='theme.xls' "))) 100 | .expect("writing processing instruction should succeed"); 101 | 102 | let result = writer.into_inner(); 103 | assert_eq!( 104 | String::from_utf8(result).expect("utf-8 output"), 105 | "", 106 | "writer output (LHS)" 107 | ); 108 | } 109 | 110 | #[test] 111 | fn empty() { 112 | let mut writer = Writer::new(Vec::new()); 113 | writer 114 | .write_event(Empty( 115 | BytesStart::new("game").with_attributes([("publisher", "Blizzard")]), 116 | )) 117 | .expect("writing empty tag should succeed"); 118 | 119 | let result = writer.into_inner(); 120 | assert_eq!( 121 | String::from_utf8(result).expect("utf-8 output"), 122 | r#""#, 123 | "writer output (LHS)" 124 | ); 125 | } 126 | 127 | #[test] 128 | fn start() { 129 | let mut writer = Writer::new(Vec::new()); 130 | writer 131 | .write_event(Start( 132 | BytesStart::new("info").with_attributes([("genre", "RTS")]), 133 | )) 134 | .expect("writing start tag should succeed"); 135 | 136 | let result = writer.into_inner(); 137 | assert_eq!( 138 | String::from_utf8(result).expect("utf-8 output"), 139 | r#""#, 140 | "writer output (LHS)" 141 | ); 142 | } 143 | 144 | #[test] 145 | fn end() { 146 | let mut writer = Writer::new(Vec::new()); 147 | writer 148 | .write_event(End(BytesEnd::new("info"))) 149 | .expect("writing end tag should succeed"); 150 | 151 | let result = writer.into_inner(); 152 | assert_eq!( 153 | String::from_utf8(result).expect("utf-8 output"), 154 | "", 155 | "writer output (LHS)" 156 | ); 157 | } 158 | 159 | #[test] 160 | fn text() { 161 | let mut writer = Writer::new(Vec::new()); 162 | writer 163 | .write_event(Text(BytesText::new( 164 | "Kerrigan & Raynor: The Z[erg] programming language", 165 | ))) 166 | .expect("writing text should succeed"); 167 | 168 | let result = writer.into_inner(); 169 | assert_eq!( 170 | String::from_utf8(result).expect("utf-8 output"), 171 | "Kerrigan & Raynor: The Z[erg] programming language", 172 | "writer output (LHS)" 173 | ); 174 | } 175 | 176 | #[test] 177 | fn cdata() { 178 | let mut writer = Writer::new(Vec::new()); 179 | writer 180 | .write_event(CData(BytesCData::new( 181 | "Kerrigan & Raynor: The Z[erg] programming language", 182 | ))) 183 | .expect("writing CDATA section should succeed"); 184 | 185 | let result = writer.into_inner(); 186 | assert_eq!( 187 | String::from_utf8(result).expect("utf-8 output"), 188 | "", 189 | "writer output (LHS)" 190 | ); 191 | } 192 | 193 | #[test] 194 | fn comment() { 195 | let mut writer = Writer::new(Vec::new()); 196 | writer 197 | .write_event(Comment(BytesText::from_escaped( 198 | "Kerrigan & Raynor: The Z[erg] programming language", 199 | ))) 200 | .expect("writing comment should succeed"); 201 | 202 | let result = writer.into_inner(); 203 | assert_eq!( 204 | String::from_utf8(result).expect("utf-8 output"), 205 | "", 206 | "writer output (LHS)" 207 | ); 208 | } 209 | 210 | #[test] 211 | fn doctype() { 212 | let mut writer = Writer::new(Vec::new()); 213 | writer 214 | .write_event(DocType(BytesText::new("some DTD here..."))) 215 | .expect("writing DTD should succeed"); 216 | 217 | let result = writer.into_inner(); 218 | assert_eq!( 219 | String::from_utf8(result).expect("utf-8 output"), 220 | "", 221 | "writer output (LHS)" 222 | ); 223 | } 224 | 225 | #[test] 226 | fn eof() { 227 | let mut writer = Writer::new(Vec::new()); 228 | writer.write_event(Eof).expect("writing EOF should succeed"); 229 | 230 | let result = writer.into_inner(); 231 | assert_eq!( 232 | String::from_utf8(result).expect("utf-8 output"), 233 | "", 234 | "writer output (LHS)" 235 | ); 236 | } 237 | --------------------------------------------------------------------------------