├── .gitattributes
├── .github
    └── workflows
    │   ├── cifuzz.yml
    │   └── rust.yml
├── .gitignore
├── .gitmodules
├── .rustfmt.toml
├── Cargo.toml
├── Changelog.md
├── LICENSE-MIT.md
├── README.md
├── benches
    ├── macrobenches.rs
    └── microbenches.rs
├── compare
    ├── Cargo.toml
    └── benches
    │   └── bench.rs
├── examples
    ├── custom_entities.rs
    ├── flattened_enum.rs
    ├── nested_readers.rs
    ├── read_buffered.rs
    ├── read_nodes.rs
    ├── read_nodes_serde.rs
    └── read_texts.rs
├── fuzz
    ├── .gitignore
    ├── Cargo.toml
    ├── README.md
    └── fuzz_targets
    │   ├── fuzz_target_1.rs
    │   └── structured_roundtrip.rs
├── src
    ├── de
    │   ├── attributes.rs
    │   ├── key.rs
    │   ├── map.rs
    │   ├── mod.rs
    │   ├── resolver.rs
    │   ├── simple_type.rs
    │   ├── text.rs
    │   └── var.rs
    ├── encoding.rs
    ├── errors.rs
    ├── escape.rs
    ├── events
    │   ├── attributes.rs
    │   └── mod.rs
    ├── lib.rs
    ├── name.rs
    ├── parser
    │   ├── element.rs
    │   ├── mod.rs
    │   └── pi.rs
    ├── reader
    │   ├── async_tokio.rs
    │   ├── buffered_reader.rs
    │   ├── mod.rs
    │   ├── ns_reader.rs
    │   ├── slice_reader.rs
    │   └── state.rs
    ├── se
    │   ├── content.rs
    │   ├── element.rs
    │   ├── key.rs
    │   ├── mod.rs
    │   ├── simple_type.rs
    │   └── text.rs
    ├── serde_helpers.rs
    ├── utils.rs
    ├── writer.rs
    └── writer
    │   └── async_tokio.rs
├── test-gen
    ├── Cargo.toml
    └── src
    │   └── main.rs
└── tests
    ├── README.md
    ├── async-tokio.rs
    ├── documents
        ├── document.xml
        ├── encoding
        │   ├── Big5.xml
        │   ├── EUC-JP.xml
        │   ├── EUC-KR.xml
        │   ├── GBK.xml
        │   ├── IBM866.xml
        │   ├── ISO-2022-JP.xml
        │   ├── ISO-8859-10.xml
        │   ├── ISO-8859-13.xml
        │   ├── ISO-8859-14.xml
        │   ├── ISO-8859-15.xml
        │   ├── ISO-8859-16.xml
        │   ├── ISO-8859-2.xml
        │   ├── ISO-8859-3.xml
        │   ├── ISO-8859-4.xml
        │   ├── ISO-8859-5.xml
        │   ├── ISO-8859-6.xml
        │   ├── ISO-8859-7.xml
        │   ├── ISO-8859-8-I.xml
        │   ├── ISO-8859-8.xml
        │   ├── KOI8-R.xml
        │   ├── KOI8-U.xml
        │   ├── Shift_JIS.xml
        │   ├── gb18030.xml
        │   ├── macintosh.xml
        │   ├── utf16be-bom.xml
        │   ├── utf16be.xml
        │   ├── utf16le-bom.xml
        │   ├── utf16le.xml
        │   ├── utf8-bom.xml
        │   ├── utf8.xml
        │   ├── windows-1250.xml
        │   ├── windows-1251.xml
        │   ├── windows-1252.xml
        │   ├── windows-1253.xml
        │   ├── windows-1254.xml
        │   ├── windows-1255.xml
        │   ├── windows-1256.xml
        │   ├── windows-1257.xml
        │   ├── windows-1258.xml
        │   ├── windows-874.xml
        │   ├── x-mac-cyrillic.xml
        │   └── x-user-defined.xml
        ├── html5.html
        ├── html5.txt
        ├── libreoffice_document.fodt
        ├── linescore.xml
        ├── opennews_all.rss
        ├── players.xml
        ├── rpm_filelists.xml
        ├── rpm_other.xml
        ├── rpm_primary.xml
        ├── rpm_primary2.xml
        ├── sample_1.xml
        ├── sample_ns.xml
        ├── sample_rss.xml
        ├── test_writer.xml
        ├── test_writer_indent.xml
        └── test_writer_indent_cdata.xml
    ├── encodings.rs
    ├── escape.rs
    ├── fuzzing.rs
    ├── helpers
        └── mod.rs
    ├── html.rs
    ├── issues.rs
    ├── reader-attributes.rs
    ├── reader-config.rs
    ├── reader-errors.rs
    ├── reader-namespaces.rs
    ├── reader-references.rs
    ├── reader.rs
    ├── roundtrip.rs
    ├── serde-de-enum.rs
    ├── serde-de-seq.rs
    ├── serde-de-xsi.rs
    ├── serde-de.rs
    ├── serde-issues.rs
    ├── serde-migrated.rs
    ├── serde-se.rs
    ├── serde_helpers
        └── mod.rs
    ├── serde_roundtrip.rs
    ├── writer-indentation.rs
    └── writer.rs


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Unit tests assume that all xml files have unix style line endings
 2 | /tests/documents/* text eol=lf
 3 | /tests/documents/encoding/* text eol=lf
 4 | 
 5 | /tests/documents/encoding/utf16be.xml binary
 6 | /tests/documents/encoding/utf16le.xml binary
 7 | /tests/documents/encoding/utf16be-bom.xml binary
 8 | /tests/documents/encoding/utf16le-bom.xml binary
 9 | /tests/documents/sample_5_utf16bom.xml binary
10 | 


--------------------------------------------------------------------------------
/.github/workflows/cifuzz.yml:
--------------------------------------------------------------------------------
 1 | name: CIFuzz
 2 | on: [pull_request]
 3 | jobs:
 4 |  Fuzzing:
 5 |    runs-on: ubuntu-latest
 6 |    steps:
 7 |    - name: Build Fuzzers
 8 |      id: build
 9 |      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
10 |      with:
11 |        oss-fuzz-project-name: 'quick-xml'
12 |        language: rust
13 |    - name: Run Fuzzers
14 |      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
15 |      with:
16 |        oss-fuzz-project-name: 'quick-xml'
17 |        language: rust
18 |        fuzz-seconds: 600
19 |    - name: Upload Crash
20 |      uses: actions/upload-artifact@v4
21 |      if: failure() && steps.build.outcome == 'success'
22 |      with:
23 |        name: artifacts
24 |        path: ./out/artifacts
25 | 
26 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
  1 | name: Rust
  2 | 
  3 | on: [push, pull_request]
  4 | 
  5 | jobs:
  6 |   lint:
  7 |     runs-on: ubuntu-latest
  8 |     steps:
  9 |     - uses: actions/checkout@v4
 10 |     - name: Check fmt
 11 |       run: cargo fmt -- --check
 12 | 
 13 |   msrv:
 14 |     runs-on: ubuntu-latest
 15 |     steps:
 16 |     - uses: actions/checkout@v4
 17 |     - uses: dtolnay/rust-toolchain@1.56.0
 18 |     - run: cargo check
 19 | 
 20 |   minimal-versions:
 21 |     runs-on: ubuntu-latest
 22 |     steps:
 23 |     - uses: actions/checkout@v4
 24 |     - name: Install tools
 25 |       run: cargo install cargo-hack cargo-minimal-versions
 26 |     - name: Install nightly rust
 27 |       uses: dtolnay/rust-toolchain@nightly
 28 |     - name: Check with minimal versions
 29 |       run: cargo minimal-versions check
 30 |     - name: Check with minimal versions (serialize)
 31 |       run: cargo minimal-versions check --features serialize
 32 |     - name: Check with minimal versions (encoding)
 33 |       run: cargo minimal-versions check --features encoding
 34 |     - name: Check with minimal versions (async-tokio)
 35 |       run: cargo minimal-versions check --features async-tokio
 36 | 
 37 |   test:
 38 |     strategy:
 39 |       matrix:
 40 |         platform: [ubuntu-latest, windows-latest]
 41 | 
 42 |     runs-on: ${{ matrix.platform }}
 43 | 
 44 |     # Set variable to enable coverage
 45 |     env:
 46 |       RUSTFLAGS: -C instrument-coverage
 47 | 
 48 |     steps:
 49 |     - uses: actions/checkout@v4
 50 |     - name: Install coverage reporter (llvm-tools-preview)
 51 |       if: runner.os == 'Linux'
 52 |       run: rustup component add llvm-tools-preview
 53 |     - name: Install coverage reporter (grcov)
 54 |       if: runner.os == 'Linux'
 55 |       run: cargo install grcov
 56 | 
 57 |     - name: Build
 58 |       run: cargo build
 59 |     - name: Build benchmarks
 60 |       run: cargo bench --no-run
 61 |     - name: Build benchmarks (compare)
 62 |       working-directory: compare
 63 |       run: cargo bench --no-run
 64 |     - name: Run tests + benchmarks
 65 |       run: cargo test --all-features --benches --tests
 66 | 
 67 |     - name: Run tests (no features)
 68 |       env:
 69 |         LLVM_PROFILE_FILE: coverage/no-features-%p-%m.profraw
 70 |       run: cargo test --no-default-features
 71 |     - name: Run tests (serialize)
 72 |       env:
 73 |         LLVM_PROFILE_FILE: coverage/serialize-%p-%m.profraw
 74 |       run: cargo test --features serialize
 75 |     - name: Run tests (serialize+encoding)
 76 |       env:
 77 |         LLVM_PROFILE_FILE: coverage/serialize-encoding-%p-%m.profraw
 78 |       run: cargo test --features serialize,encoding
 79 |     - name: Run tests (serialize+escape-html)
 80 |       env:
 81 |         LLVM_PROFILE_FILE: coverage/serialize-escape-html-%p-%m.profraw
 82 |       run: cargo test --features serialize,escape-html
 83 |     - name: Run tests (all features)
 84 |       env:
 85 |         LLVM_PROFILE_FILE: coverage/all-features-%p-%m.profraw
 86 |       run: cargo test --all-features
 87 |     - name: Prepare coverage information for upload
 88 |       if: runner.os == 'Linux'
 89 |       run: |
 90 |         grcov ./coverage \
 91 |               -s . \
 92 |               --binary-path ./target/debug/ \
 93 |               --branch \
 94 |               --ignore-not-existing \
 95 |               --ignore 'tests/*' \
 96 |               -o ./coverage.lcov
 97 |     - name: Upload coverage to codecov.io
 98 |       if: runner.os == 'Linux'
 99 |       uses: codecov/codecov-action@v4
100 |       with:
101 |         files: ./coverage.lcov
102 |         flags: unittests
103 |         verbose: true
104 |       continue-on-error: true
105 | 
106 |   # Check that tests that are sensitive to target are passed
107 |   x86:
108 |     runs-on: ubuntu-latest
109 |     steps:
110 |     - uses: actions/checkout@v4
111 |     - name: Install 32-bit target
112 |       run: rustup target add i686-unknown-linux-gnu
113 |     - name: Install 32-bit libs (for criterion)
114 |       # Criterion wants to compile something.
115 |       # Cargo builds criterion even when it is not required for those tests.
116 |       # Without those libs compilation failed with:
117 |       # error: linking with `cc` failed: exit status: 1
118 |       #   |
119 |       #   = note: LC_ALL="C" PATH="..." ...
120 |       #   = note: /usr/bin/ld: cannot find Scrt1.o: No such file or directory
121 |       #           /usr/bin/ld: cannot find crti.o: No such file or directory
122 |       #           /usr/bin/ld: skipping incompatible /usr/lib/gcc/x86_64-linux-gnu/11/libgcc.a when searching for -lgcc
123 |       #           /usr/bin/ld: cannot find -lgcc: No such file or directory
124 |       #           collect2: error: ld returned 1 exit status
125 |       # Fixed as suggested in this answer:
126 |       # https://stackoverflow.com/a/16016792/7518605
127 |       run: sudo apt install gcc-multilib
128 |     - name: Run some tests on 32-bit target
129 |       run: cargo test --target i686-unknown-linux-gnu --test issues
130 |     - name: Run some tests on 32-bit target (async-tokio)
131 |       run: cargo test --target i686-unknown-linux-gnu --features async-tokio --test async-tokio
132 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .project
3 | Cargo.lock
4 | # macOS hidden files
5 | .DS_Store
6 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "encoding"]
2 | 	path = test-gen/encoding
3 | 	url = https://github.com/whatwg/encoding.git
4 | 	shallow = true
5 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | edition = "2021"
2 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [package]
  2 | name = "quick-xml"
  3 | version = "0.37.5"
  4 | description = "High performance xml reader and writer"
  5 | edition = "2021"
  6 | 
  7 | documentation = "https://docs.rs/quick-xml"
  8 | repository = "https://github.com/tafia/quick-xml"
  9 | 
 10 | keywords = ["xml", "serde", "parser", "writer", "html"]
 11 | categories = ["asynchronous", "encoding", "parsing", "parser-implementations"]
 12 | license = "MIT"
 13 | rust-version = "1.56"
 14 | # We exclude tests & examples & benches to reduce the size of a package.
 15 | # Unfortunately, this is source of warnings in latest cargo when packaging:
 16 | # > warning: ignoring {context} `{name}` as `{path}` is not included in the published package
 17 | # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491
 18 | # will be resolved
 19 | include = ["src/*", "LICENSE-MIT.md", "README.md"]
 20 | 
 21 | [dependencies]
 22 | arbitrary = { version = "1", features = ["derive"], optional = true }
 23 | document-features = { version = "0.2", optional = true }
 24 | encoding_rs = { version = "0.8", optional = true }
 25 | serde = { version = ">=1.0.139", optional = true }
 26 | tokio = { version = "1.10", optional = true, default-features = false, features = ["io-util"] }
 27 | memchr = "2.1"
 28 | 
 29 | [dev-dependencies]
 30 | criterion = "0.4"
 31 | pretty_assertions = "1.4"
 32 | regex = "1"
 33 | # https://github.com/serde-rs/serde/issues/1904 is fixed since 1.0.206
 34 | # serde does not follow semver in numbering and their dependencies, so we specifying patch here
 35 | serde_derive = { version = "1.0.206" }
 36 | serde-value = "0.7"
 37 | tokio = { version = "1.21", default-features = false, features = ["macros", "rt"] }
 38 | tokio-test = "0.4"
 39 | 
 40 | [lib]
 41 | bench = false
 42 | 
 43 | [[bench]]
 44 | name = "microbenches"
 45 | harness = false
 46 | path = "benches/microbenches.rs"
 47 | 
 48 | [[bench]]
 49 | name = "macrobenches"
 50 | harness = false
 51 | path = "benches/macrobenches.rs"
 52 | 
 53 | [features]
 54 | default = []
 55 | 
 56 | ## Enables support for asynchronous reading and writing from `tokio`'s IO-Traits by enabling
 57 | ## [reading events] from types implementing [`tokio::io::AsyncBufRead`].
 58 | ##
 59 | ## [reading events]: crate::reader::Reader::read_event_into_async
 60 | async-tokio = ["tokio"]
 61 | 
 62 | ## Enables support of non-UTF-8 encoded documents. Encoding will be inferred from
 63 | ## the XML declaration if it is found, otherwise UTF-8 is assumed.
 64 | ##
 65 | ## Currently, only ASCII-compatible encodings are supported. For example,
 66 | ## UTF-16 will not work (therefore, `quick-xml` is not [standard compliant]).
 67 | ##
 68 | ## Thus, quick-xml supports all encodings of [`encoding_rs`] except these:
 69 | ## - [UTF-16BE]
 70 | ## - [UTF-16LE]
 71 | ## - [ISO-2022-JP]
 72 | ##
 73 | ## You should stop processing a document when one of these encodings is detected,
 74 | ## because generated events can be wrong and do not reflect a real document structure!
 75 | ##
 76 | ## Because these are the only supported encodings that are not ASCII compatible, you can
 77 | ## check for them:
 78 | ##
 79 | ## ```
 80 | ## use quick_xml::events::Event;
 81 | ## use quick_xml::reader::Reader;
 82 | ##
 83 | ## # fn to_utf16le_with_bom(string: &str) -> Vec<u8> {
 84 | ## #     let mut bytes = Vec::new();
 85 | ## #     bytes.extend_from_slice(&[0xFF, 0xFE]); // UTF-16 LE BOM
 86 | ## #     for ch in string.encode_utf16() {
 87 | ## #         bytes.extend_from_slice(&ch.to_le_bytes());
 88 | ## #     }
 89 | ## #     bytes
 90 | ## # }
 91 | ## let xml = to_utf16le_with_bom(r#"<?xml encoding='UTF-16'><element/>"#);
 92 | ## let mut reader = Reader::from_reader(xml.as_ref());
 93 | ## reader.config_mut().trim_text(true);
 94 | ##
 95 | ## let mut buf = Vec::new();
 96 | ## let mut unsupported = false;
 97 | ## loop {
 98 | ##     if !reader.decoder().encoding().is_ascii_compatible() {
 99 | ##         unsupported = true;
100 | ##         break;
101 | ##     }
102 | ##     buf.clear();
103 | ##     match reader.read_event_into(&mut buf).unwrap() {
104 | ##         Event::Eof => break,
105 | ##         _ => {}
106 | ##     }
107 | ## }
108 | ## assert_eq!(unsupported, true);
109 | ## ```
110 | ## This restriction will be eliminated once issue [#158] is resolved.
111 | ##
112 | ## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding
113 | ## [UTF-16BE]: encoding_rs::UTF_16BE
114 | ## [UTF-16LE]: encoding_rs::UTF_16LE
115 | ## [ISO-2022-JP]: encoding_rs::ISO_2022_JP
116 | ## [#158]: https://github.com/tafia/quick-xml/issues/158
117 | encoding = ["encoding_rs"]
118 | 
119 | ## Enables support for recognizing all [HTML 5 entities] in [`unescape`]
120 | ## function. The full list of entities also can be found in
121 | ## <https://html.spec.whatwg.org/entities.json>.
122 | ##
123 | ## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref
124 | ## [`unescape`]: crate::escape::unescape
125 | escape-html = []
126 | 
127 | ## This feature is for the Serde deserializer that enables support for deserializing
128 | ## lists where tags are overlapped with tags that do not correspond to the list.
129 | ##
130 | ## When this feature is enabled, the XML:
131 | ## ```xml
132 | ## <any-name>
133 | ##   <item/>
134 | ##   <another-item/>
135 | ##   <item/>
136 | ##   <item/>
137 | ## </any-name>
138 | ## ```
139 | ## could be deserialized to a struct:
140 | ## ```no_run
141 | ## # use serde::Deserialize;
142 | ## #[derive(Deserialize)]
143 | ## #[serde(rename_all = "kebab-case")]
144 | ## struct AnyName {
145 | ##   item: Vec<()>,
146 | ##   another_item: (),
147 | ## }
148 | ## ```
149 | ##
150 | ## When this feature is not enabled (default), only the first element will be
151 | ## associated with the field, and the deserialized type will report an error
152 | ## (duplicated field) when the deserializer encounters a second `<item/>`.
153 | ##
154 | ## Note, that enabling this feature can lead to high and even unlimited memory
155 | ## consumption, because deserializer needs to check all events up to the end of a
156 | ## container tag (`</any-name>` in this example) to figure out that there are no
157 | ## more items for a field. If `</any-name>` or even EOF is not encountered, the
158 | ## parsing will never end which can lead to a denial-of-service (DoS) scenario.
159 | ##
160 | ## Having several lists and overlapped elements for them in XML could also lead
161 | ## to quadratic parsing time, because the deserializer must check the list of
162 | ## events as many times as the number of sequence fields present in the schema.
163 | ##
164 | ## To reduce negative consequences, always [limit] the maximum number of events
165 | ## that [`Deserializer`] will buffer.
166 | ##
167 | ## This feature works only with `serialize` feature and has no effect if `serialize`
168 | ## is not enabled.
169 | ##
170 | ## [limit]: crate::de::Deserializer::event_buffer_size
171 | ## [`Deserializer`]: crate::de::Deserializer
172 | overlapped-lists = []
173 | 
174 | ## Enables serialization of some quick-xml types using [`serde`]. This feature
175 | ## is rarely needed.
176 | ##
177 | ## This feature does NOT provide XML serializer or deserializer. You should use
178 | ## the `serialize` feature for that instead.
179 | # Cannot name "serde" to avoid clash with dependency.
180 | # "dep:" prefix only avalible from Rust 1.60
181 | serde-types = ["serde/derive"]
182 | 
183 | ## Enables support for [`serde`] serialization and deserialization. When this
184 | ## feature is enabled, quick-xml provides serializer and deserializer for XML.
185 | ##
186 | ## This feature does NOT enables serializaton of the types inside quick-xml.
187 | ## If you need that, use the `serde-types` feature.
188 | serialize = ["serde"] # "dep:" prefix only avalible from Rust 1.60
189 | 
190 | [package.metadata.docs.rs]
191 | # document all features
192 | all-features = true
193 | 
194 | # Tests, benchmarks and examples doesn't included in package on crates.io,
195 | # so we need to specify a path, otherwise `cargo package` complains
196 | # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491
197 | # will be resolved
198 | 
199 | [[test]]
200 | name = "async-tokio"
201 | required-features = ["async-tokio"]
202 | path = "tests/async-tokio.rs"
203 | 
204 | [[test]]
205 | name = "encodings"
206 | required-features = ["encoding"]
207 | path = "tests/encodings.rs"
208 | 
209 | [[test]]
210 | name = "html"
211 | required-features = ["escape-html"]
212 | path = "tests/html.rs"
213 | 
214 | [[test]]
215 | name = "serde_roundtrip"
216 | required-features = ["serialize"]
217 | path = "tests/serde_roundtrip.rs"
218 | 
219 | [[test]]
220 | name = "serde-de"
221 | required-features = ["serialize"]
222 | path = "tests/serde-de.rs"
223 | 
224 | [[test]]
225 | name = "serde-de-enum"
226 | required-features = ["serialize"]
227 | path = "tests/serde-de-enum.rs"
228 | 
229 | [[test]]
230 | name = "serde-de-seq"
231 | required-features = ["serialize"]
232 | path = "tests/serde-de-seq.rs"
233 | 
234 | [[test]]
235 | name = "serde-de-xsi"
236 | required-features = ["serialize"]
237 | path = "tests/serde-de-xsi.rs"
238 | 
239 | [[test]]
240 | name = "serde-se"
241 | required-features = ["serialize"]
242 | path = "tests/serde-se.rs"
243 | 
244 | [[test]]
245 | name = "serde-migrated"
246 | required-features = ["serialize"]
247 | path = "tests/serde-migrated.rs"
248 | 
249 | [[test]]
250 | name = "serde-issues"
251 | required-features = ["serialize"]
252 | path = "tests/serde-issues.rs"
253 | 
254 | [[example]]
255 | name = "read_nodes_serde"
256 | required-features = ["serialize"]
257 | path = "examples/read_nodes_serde.rs"
258 | 
259 | [[example]]
260 | name = "flattened_enum"
261 | required-features = ["serialize"]
262 | path = "examples/flattened_enum.rs"
263 | 


--------------------------------------------------------------------------------
/LICENSE-MIT.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Johann Tuffe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # quick-xml
  2 | 
  3 | ![status](https://github.com/tafia/quick-xml/actions/workflows/rust.yml/badge.svg)
  4 | [![Crate](https://img.shields.io/crates/v/quick-xml.svg)](https://crates.io/crates/quick-xml)
  5 | [![docs.rs](https://docs.rs/quick-xml/badge.svg)](https://docs.rs/quick-xml)
  6 | [![codecov](https://img.shields.io/codecov/c/github/tafia/quick-xml)](https://codecov.io/gh/tafia/quick-xml)
  7 | [![MSRV](https://img.shields.io/badge/rustc-1.56.0+-ab6000.svg)](https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html)
  8 | 
  9 | High performance xml pull reader/writer.
 10 | 
 11 | The reader:
 12 | - is almost zero-copy (use of `Cow` whenever possible)
 13 | - is easy on memory allocation (the API provides a way to reuse buffers)
 14 | - support various encoding (with `encoding` feature), namespaces resolution, special characters.
 15 | 
 16 | Syntax is inspired by [xml-rs](https://github.com/netvl/xml-rs).
 17 | 
 18 | ## Example
 19 | 
 20 | ### Reader
 21 | 
 22 | ```rust
 23 | use quick_xml::events::Event;
 24 | use quick_xml::reader::Reader;
 25 | 
 26 | let xml = r#"<tag1 att1 = "test">
 27 |                 <tag2><!--Test comment-->Test</tag2>
 28 |                 <tag2>Test 2</tag2>
 29 |              </tag1>"#;
 30 | let mut reader = Reader::from_str(xml);
 31 | reader.config_mut().trim_text(true);
 32 | 
 33 | let mut count = 0;
 34 | let mut txt = Vec::new();
 35 | let mut buf = Vec::new();
 36 | 
 37 | // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
 38 | loop {
 39 |     // NOTE: this is the generic case when we don't know about the input BufRead.
 40 |     // when the input is a &str or a &[u8], we don't actually need to use another
 41 |     // buffer, we could directly call `reader.read_event()`
 42 |     match reader.read_event_into(&mut buf) {
 43 |         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
 44 |         // exits the loop when reaching end of file
 45 |         Ok(Event::Eof) => break,
 46 | 
 47 |         Ok(Event::Start(e)) => {
 48 |             match e.name().as_ref() {
 49 |                 b"tag1" => println!("attributes values: {:?}",
 50 |                                     e.attributes().map(|a| a.unwrap().value)
 51 |                                     .collect::<Vec<_>>()),
 52 |                 b"tag2" => count += 1,
 53 |                 _ => (),
 54 |             }
 55 |         }
 56 |         Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
 57 | 
 58 |         // There are several other `Event`s we do not consider here
 59 |         _ => (),
 60 |     }
 61 |     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
 62 |     buf.clear();
 63 | }
 64 | ```
 65 | 
 66 | ### Writer
 67 | 
 68 | ```rust
 69 | use quick_xml::events::{Event, BytesEnd, BytesStart};
 70 | use quick_xml::reader::Reader;
 71 | use quick_xml::writer::Writer;
 72 | use std::io::Cursor;
 73 | 
 74 | let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
 75 | let mut reader = Reader::from_str(xml);
 76 | reader.config_mut().trim_text(true);
 77 | let mut writer = Writer::new(Cursor::new(Vec::new()));
 78 | loop {
 79 |     match reader.read_event() {
 80 |         Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
 81 | 
 82 |             // crates a new element ... alternatively we could reuse `e` by calling
 83 |             // `e.into_owned()`
 84 |             let mut elem = BytesStart::new("my_elem");
 85 | 
 86 |             // collect existing attributes
 87 |             elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
 88 | 
 89 |             // copy existing attributes, adds a new my-key="some value" attribute
 90 |             elem.push_attribute(("my-key", "some value"));
 91 | 
 92 |             // writes the event to the writer
 93 |             assert!(writer.write_event(Event::Start(elem)).is_ok());
 94 |         },
 95 |         Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
 96 |             assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok());
 97 |         },
 98 |         Ok(Event::Eof) => break,
 99 |         // we can either move or borrow the event to write, depending on your use-case
100 |         Ok(e) => assert!(writer.write_event(e).is_ok()),
101 |         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
102 |     }
103 | }
104 | 
105 | let result = writer.into_inner().into_inner();
106 | let expected = r#"<my_elem k1="v1" k2="v2" my-key="some value"><child>text</child></my_elem>"#;
107 | assert_eq!(result, expected.as_bytes());
108 | ```
109 | 
110 | ## Serde
111 | 
112 | When using the `serialize` feature, quick-xml can be used with serde's `Serialize`/`Deserialize` traits.
113 | The mapping between XML and Rust types, and in particular the syntax that allows you to specify the
114 | distinction between *elements* and *attributes*, is described in detail in the documentation
115 | for [deserialization](https://docs.rs/quick-xml/latest/quick_xml/de/).
116 | 
117 | ### Credits
118 | 
119 | This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs).
120 | quick-xml follows its convention for deserialization, including the
121 | [`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name.
122 | 
123 | ### Parsing the "value" of a tag
124 | 
125 | If you have an input of the form `<foo abc="xyz">bar</foo>`, and you want to get at the `bar`,
126 | you can use either the special name `$text`, or the special name `$value`:
127 | 
128 | ```rust,ignore
129 | struct Foo {
130 |     #[serde(rename = "@abc")]
131 |     pub abc: String,
132 |     #[serde(rename = "$text")]
133 |     pub body: String,
134 | }
135 | ```
136 | 
137 | Read about the difference in the [documentation](https://docs.rs/quick-xml/latest/quick_xml/de/index.html#difference-between-text-and-value-special-names).
138 | 
139 | ### Performance
140 | 
141 | Note that despite not focusing on performance (there are several unnecessary copies), it remains about 10x faster than serde-xml-rs.
142 | 
143 | # Features
144 | 
145 | - `encoding`: support non utf8 xmls
146 | - `serialize`: support serde `Serialize`/`Deserialize`
147 | 
148 | ## Performance
149 | 
150 | Benchmarking is hard and the results depend on your input file and your machine.
151 | 
152 | Here on my particular file, quick-xml is around **50 times faster** than [xml-rs](https://crates.io/crates/xml-rs) crate.
153 | 
154 | ```
155 | // quick-xml benches
156 | test bench_quick_xml            ... bench:     198,866 ns/iter (+/- 9,663)
157 | test bench_quick_xml_escaped    ... bench:     282,740 ns/iter (+/- 61,625)
158 | test bench_quick_xml_namespaced ... bench:     389,977 ns/iter (+/- 32,045)
159 | 
160 | // same bench with xml-rs
161 | test bench_xml_rs               ... bench:  14,468,930 ns/iter (+/- 321,171)
162 | 
163 | // serde-xml-rs vs serialize feature
164 | test bench_serde_quick_xml      ... bench:   1,181,198 ns/iter (+/- 138,290)
165 | test bench_serde_xml_rs         ... bench:  15,039,564 ns/iter (+/- 783,485)
166 | ```
167 | 
168 | For a feature and performance comparison, you can also have a look at RazrFalcon's [parser comparison table](https://github.com/RazrFalcon/roxmltree#parsing).
169 | 
170 | ## Contribute
171 | 
172 | Any PR is welcomed!
173 | 
174 | ## License
175 | 
176 | MIT
177 | 


--------------------------------------------------------------------------------
/benches/macrobenches.rs:
--------------------------------------------------------------------------------
  1 | use criterion::{self, criterion_group, criterion_main, Criterion, Throughput};
  2 | use quick_xml::events::Event;
  3 | use quick_xml::reader::{NsReader, Reader};
  4 | use quick_xml::Result as XmlResult;
  5 | 
  6 | static RPM_PRIMARY: &str = include_str!("../tests/documents/rpm_primary.xml");
  7 | static RPM_PRIMARY2: &str = include_str!("../tests/documents/rpm_primary2.xml");
  8 | static RPM_FILELISTS: &str = include_str!("../tests/documents/rpm_filelists.xml");
  9 | static RPM_OTHER: &str = include_str!("../tests/documents/rpm_other.xml");
 10 | static LIBREOFFICE_DOCUMENT: &str = include_str!("../tests/documents/libreoffice_document.fodt");
 11 | static DOCUMENT: &str = include_str!("../tests/documents/document.xml");
 12 | static TEST_WRITER_INDENT: &str = include_str!("../tests/documents/test_writer_indent.xml");
 13 | static SAMPLE_1: &str = include_str!("../tests/documents/sample_1.xml");
 14 | static LINESCORE: &str = include_str!("../tests/documents/linescore.xml");
 15 | static SAMPLE_RSS: &str = include_str!("../tests/documents/sample_rss.xml");
 16 | static SAMPLE_NS: &str = include_str!("../tests/documents/sample_ns.xml");
 17 | static PLAYERS: &str = include_str!("../tests/documents/players.xml");
 18 | 
 19 | static INPUTS: &[(&str, &str)] = &[
 20 |     // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
 21 |     ("rpm_primary.xml", RPM_PRIMARY),
 22 |     // long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
 23 |     ("rpm_primary2.xml", RPM_PRIMARY2),
 24 |     // long, mostly medium-length text elements, not much escaping
 25 |     ("rpm_filelists.xml", RPM_FILELISTS),
 26 |     // long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes
 27 |     ("rpm_other.xml", RPM_OTHER),
 28 |     // long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces
 29 |     ("libreoffice_document.fodt", LIBREOFFICE_DOCUMENT),
 30 |     // medium length, mostly empty tags, a few short attributes per element, no escaping
 31 |     ("document.xml", DOCUMENT),
 32 |     // medium length, lots of namespaces, no escaping
 33 |     ("test_writer_ident.xml", TEST_WRITER_INDENT),
 34 |     // short, mix of attributes and text, lots of escapes
 35 |     ("sample_1.xml", SAMPLE_1),
 36 |     // medium length, lots of attributes, short attributes, few escapes
 37 |     ("linescore.xml", LINESCORE),
 38 |     // short, lots of namespaces, no escapes
 39 |     ("sample_ns.xml", SAMPLE_NS),
 40 |     // long, few attributes, mix of attribute lengths, escapes in text content
 41 |     ("sample_rss.xml", SAMPLE_RSS),
 42 |     // long, lots of attributes, short attributes, no text, no escapes
 43 |     ("players.xml", PLAYERS),
 44 | ];
 45 | 
 46 | // TODO: use fully normalized attribute values
 47 | fn parse_document_from_str(doc: &str) -> XmlResult<()> {
 48 |     let mut r = Reader::from_str(doc);
 49 |     loop {
 50 |         match criterion::black_box(r.read_event()?) {
 51 |             Event::Start(e) | Event::Empty(e) => {
 52 |                 for attr in e.attributes() {
 53 |                     criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?);
 54 |                 }
 55 |             }
 56 |             Event::Text(e) => {
 57 |                 criterion::black_box(e.decode()?);
 58 |             }
 59 |             Event::CData(e) => {
 60 |                 criterion::black_box(e.into_inner());
 61 |             }
 62 |             Event::End(_) => (),
 63 |             Event::Eof => break,
 64 |             _ => (),
 65 |         }
 66 |     }
 67 |     Ok(())
 68 | }
 69 | 
 70 | // TODO: use fully normalized attribute values
 71 | fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
 72 |     let mut r = Reader::from_reader(doc);
 73 |     let mut buf = Vec::new();
 74 |     loop {
 75 |         match criterion::black_box(r.read_event_into(&mut buf)?) {
 76 |             Event::Start(e) | Event::Empty(e) => {
 77 |                 for attr in e.attributes() {
 78 |                     criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?);
 79 |                 }
 80 |             }
 81 |             Event::Text(e) => {
 82 |                 criterion::black_box(e.decode()?);
 83 |             }
 84 |             Event::CData(e) => {
 85 |                 criterion::black_box(e.into_inner());
 86 |             }
 87 |             Event::End(_) => (),
 88 |             Event::Eof => break,
 89 |             _ => (),
 90 |         }
 91 |         buf.clear();
 92 |     }
 93 |     Ok(())
 94 | }
 95 | 
 96 | // TODO: use fully normalized attribute values
 97 | fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
 98 |     let mut r = NsReader::from_str(doc);
 99 |     loop {
100 |         match criterion::black_box(r.read_resolved_event()?) {
101 |             (resolved_ns, Event::Start(e) | Event::Empty(e)) => {
102 |                 criterion::black_box(resolved_ns);
103 |                 for attr in e.attributes() {
104 |                     criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?);
105 |                 }
106 |             }
107 |             (resolved_ns, Event::Text(e)) => {
108 |                 criterion::black_box(e.decode()?);
109 |                 criterion::black_box(resolved_ns);
110 |             }
111 |             (resolved_ns, Event::CData(e)) => {
112 |                 criterion::black_box(e.into_inner());
113 |                 criterion::black_box(resolved_ns);
114 |             }
115 |             (_, Event::End(_)) => (),
116 |             (_, Event::Eof) => break,
117 |             _ => (),
118 |         }
119 |     }
120 |     Ok(())
121 | }
122 | 
123 | // TODO: use fully normalized attribute values
124 | fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
125 |     let mut r = NsReader::from_reader(doc);
126 |     let mut buf = Vec::new();
127 |     loop {
128 |         match criterion::black_box(r.read_resolved_event_into(&mut buf)?) {
129 |             (resolved_ns, Event::Start(e) | Event::Empty(e)) => {
130 |                 criterion::black_box(resolved_ns);
131 |                 for attr in e.attributes() {
132 |                     criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?);
133 |                 }
134 |             }
135 |             (resolved_ns, Event::Text(e)) => {
136 |                 criterion::black_box(e.decode()?);
137 |                 criterion::black_box(resolved_ns);
138 |             }
139 |             (resolved_ns, Event::CData(e)) => {
140 |                 criterion::black_box(e.into_inner());
141 |                 criterion::black_box(resolved_ns);
142 |             }
143 |             (_, Event::End(_)) => (),
144 |             (_, Event::Eof) => break,
145 |             _ => (),
146 |         }
147 |         buf.clear();
148 |     }
149 |     Ok(())
150 | }
151 | 
152 | /// Just parse - no decoding overhead
153 | pub fn bench_parse_document_nocopy(c: &mut Criterion) {
154 |     let mut group = c.benchmark_group("parse_document_nocopy");
155 | 
156 |     for (id, data) in INPUTS.iter() {
157 |         group.throughput(Throughput::Bytes(data.len() as u64));
158 |         group.bench_with_input(*id, *data, |b, input| {
159 |             b.iter(|| parse_document_from_str(input).unwrap())
160 |         });
161 |     }
162 | 
163 |     group.finish();
164 | }
165 | 
166 | /// Decode into a buffer, then parse
167 | pub fn bench_decode_and_parse_document(c: &mut Criterion) {
168 |     let mut group = c.benchmark_group("decode_and_parse_document");
169 | 
170 |     for (id, data) in INPUTS.iter() {
171 |         group.throughput(Throughput::Bytes(data.len() as u64));
172 |         group.bench_with_input(*id, *data, |b, input| {
173 |             b.iter(|| parse_document_from_bytes(input.as_bytes()).unwrap())
174 |         });
175 |     }
176 | 
177 |     group.finish();
178 | }
179 | 
180 | /// Just parse - no decoding overhead - including namespaces
181 | pub fn bench_parse_document_nocopy_with_namespaces(c: &mut Criterion) {
182 |     let mut group = c.benchmark_group("parse_document_nocopy_with_namespaces");
183 | 
184 |     for (id, data) in INPUTS.iter() {
185 |         group.throughput(Throughput::Bytes(data.len() as u64));
186 |         group.bench_with_input(*id, *data, |b, input| {
187 |             b.iter(|| parse_document_from_str_with_namespaces(input).unwrap())
188 |         });
189 |     }
190 | 
191 |     group.finish();
192 | }
193 | 
194 | /// Decode into a buffer, then parse - including namespaces
195 | pub fn bench_decode_and_parse_document_with_namespaces(c: &mut Criterion) {
196 |     let mut group = c.benchmark_group("decode_and_parse_document_with_namespaces");
197 | 
198 |     for (id, data) in INPUTS.iter() {
199 |         group.throughput(Throughput::Bytes(data.len() as u64));
200 |         group.bench_with_input(*id, *data, |b, input| {
201 |             b.iter(|| parse_document_from_bytes_with_namespaces(input.as_bytes()).unwrap())
202 |         });
203 |     }
204 | 
205 |     group.finish();
206 | }
207 | 
208 | criterion_group!(
209 |     benches,
210 |     bench_parse_document_nocopy,
211 |     bench_decode_and_parse_document,
212 |     bench_parse_document_nocopy_with_namespaces,
213 |     bench_decode_and_parse_document_with_namespaces,
214 | );
215 | criterion_main!(benches);
216 | 


--------------------------------------------------------------------------------
/compare/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "compare"
 3 | version = "0.1.0"
 4 | authors = ["Johann Tuffe <tafia973@gmail.com>"]
 5 | publish = false
 6 | edition = "2021"
 7 | 
 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 9 | 
10 | [dev-dependencies]
11 | criterion = { version = "0.5", features = ["html_reports"] }
12 | maybe_xml = "0.10.1"
13 | quick-xml = { path = "..", features = ["serialize"] }
14 | rapid-xml = "0.2"
15 | rusty_xml = { version = "0.3", package = "RustyXML" }
16 | xml_oxide = "0.3"
17 | xml-rs = "0.8"
18 | xml5ever = "0.17"
19 | xmlparser = "0.13"
20 | serde-xml-rs = "0.6"
21 | # Do not use "derive" feature, because it slowdown compilation
22 | # See https://github.com/serde-rs/serde/pull/2588
23 | serde = "1.0"
24 | serde_derive = "1.0"
25 | pretty_assertions = "1.4"
26 | 
27 | [[bench]]
28 | name = "bench"
29 | harness = false
30 | 


--------------------------------------------------------------------------------
/examples/custom_entities.rs:
--------------------------------------------------------------------------------
  1 | //! This example demonstrate how custom entities can be extracted from the DOCTYPE,
  2 | //! and later use to:
  3 | //! - insert new pieces of document (particular case - insert only textual content)
  4 | //! - decode attribute values
  5 | //!
  6 | //! NB: this example is deliberately kept simple:
  7 | //! * it assumes that the XML file is UTF-8 encoded (custom_entities must only contain UTF-8 data)
  8 | //! * it only handles internal entities;
  9 | //! * the regex in this example is simple but brittle;
 10 | //! * it does not support the use of entities in entity declaration.
 11 | 
 12 | use std::borrow::Cow;
 13 | use std::collections::{HashMap, VecDeque};
 14 | use std::str::from_utf8;
 15 | 
 16 | use quick_xml::encoding::Decoder;
 17 | use quick_xml::errors::Error;
 18 | use quick_xml::escape::EscapeError;
 19 | use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
 20 | use quick_xml::name::QName;
 21 | use quick_xml::reader::Reader;
 22 | use regex::bytes::Regex;
 23 | 
 24 | use pretty_assertions::assert_eq;
 25 | 
 26 | struct MyReader<'i> {
 27 |     /// Stack of readers, the first element is the initial reader, the other are
 28 |     /// readers created for each resolved entity
 29 |     readers: VecDeque<Reader<&'i [u8]>>,
 30 |     /// Map of captured internal _parsed general entities_. _Parsed_ means that
 31 |     /// value of the entity is parsed by XML reader
 32 |     entities: HashMap<&'i [u8], &'i [u8]>,
 33 |     /// In this example we use simple regular expression to capture entities from DTD.
 34 |     /// In real application you should use DTD parser.
 35 |     entity_re: Regex,
 36 | }
 37 | impl<'i> MyReader<'i> {
 38 |     fn new(input: &'i str) -> Result<Self, regex::Error> {
 39 |         let mut reader = Reader::from_str(input);
 40 |         reader.config_mut().trim_text(true);
 41 | 
 42 |         let mut readers = VecDeque::new();
 43 |         readers.push_back(reader);
 44 | 
 45 |         // Capture "name" and "content" from such string:
 46 |         // <!ENTITY name "content" >
 47 |         let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
 48 |         Ok(Self {
 49 |             readers,
 50 |             entities: HashMap::new(),
 51 |             entity_re,
 52 |         })
 53 |     }
 54 |     fn read_event(&mut self) -> Result<Event<'i>, Error> {
 55 |         loop {
 56 |             if let Some(mut reader) = self.readers.pop_back() {
 57 |                 match dbg!(reader.read_event())? {
 58 |                     // Capture defined entities from the DTD inside document and skip that event
 59 |                     Event::DocType(e) => {
 60 |                         self.readers.push_back(reader);
 61 |                         self.capture(e);
 62 |                         continue;
 63 |                     }
 64 |                     // When entity is referenced, create new reader with the same settings as
 65 |                     // the current reader have and push it to the top of stack. Then try to
 66 |                     // read next event from it (on next iteration)
 67 |                     Event::GeneralRef(e) => {
 68 |                         if let Some(ch) = e.resolve_char_ref()? {
 69 |                             self.readers.push_back(reader);
 70 |                             return Ok(Event::Text(BytesText::from_escaped(ch.to_string())));
 71 |                         }
 72 |                         let mut r = Reader::from_reader(self.resolve(&e)?);
 73 |                         *r.config_mut() = reader.config().clone();
 74 | 
 75 |                         self.readers.push_back(reader);
 76 |                         self.readers.push_back(r);
 77 |                         continue;
 78 |                     }
 79 |                     // When reader is exhausted, do not return it to the stack
 80 |                     Event::Eof => continue,
 81 | 
 82 |                     // Return all other events to caller
 83 |                     e => {
 84 |                         self.readers.push_back(reader);
 85 |                         return Ok(e);
 86 |                     }
 87 |                 }
 88 |             }
 89 |             return Ok(Event::Eof);
 90 |         }
 91 |     }
 92 | 
 93 |     /// In this example we use simple regular expression to capture entities from DTD.
 94 |     /// In real application you should use DTD parser
 95 |     fn capture(&mut self, doctype: BytesText<'i>) {
 96 |         let doctype = match doctype.into_inner() {
 97 |             Cow::Borrowed(doctype) => doctype,
 98 |             Cow::Owned(_) => unreachable!("We are sure that event will be borrowed"),
 99 |         };
100 |         for cap in self.entity_re.captures_iter(doctype) {
101 |             self.entities.insert(
102 |                 cap.get(1).unwrap().as_bytes(),
103 |                 cap.get(2).unwrap().as_bytes(),
104 |             );
105 |         }
106 |     }
107 | 
108 |     fn resolve(&self, entity: &[u8]) -> Result<&'i [u8], EscapeError> {
109 |         match self.entities.get(entity) {
110 |             Some(replacement) => Ok(replacement),
111 |             None => Err(EscapeError::UnrecognizedEntity(
112 |                 0..0,
113 |                 String::from_utf8_lossy(entity).into_owned(),
114 |             )),
115 |         }
116 |     }
117 | 
118 |     fn get_entity(&self, entity: &str) -> Option<&'i str> {
119 |         self.entities
120 |             .get(entity.as_bytes())
121 |             // SAFETY: We are sure that slices are correct UTF-8 because we get
122 |             // them from rust string
123 |             .map(|value| from_utf8(value).unwrap())
124 |     }
125 | 
126 |     fn decoder(&self) -> Decoder {
127 |         self.readers.back().unwrap().decoder()
128 |     }
129 | }
130 | 
131 | fn main() -> Result<(), Box<dyn std::error::Error>> {
132 |     let mut reader = MyReader::new(
133 |         r#"
134 |         <!DOCTYPE test [
135 |         <!ENTITY text "hello world" >
136 |         <!ENTITY element1 "<dtd attr = 'Message: &text;'/>" >
137 |         <!ENTITY element2 "<a>&element1;</a>" >
138 |         ]>
139 |         <test label="Message: &text;">&#39;&element2;&#x27;</test>
140 |         "#,
141 |     )?;
142 | 
143 |     let event = reader.read_event()?;
144 |     assert_eq!(
145 |         event,
146 |         Event::Start(BytesStart::from_content(
147 |             r#"test label="Message: &text;""#,
148 |             4
149 |         ))
150 |     );
151 |     if let Event::Start(e) = event {
152 |         let mut attrs = e.attributes();
153 | 
154 |         let label = attrs.next().unwrap()?;
155 |         assert_eq!(label.key, QName(b"label"));
156 |         assert_eq!(
157 |             label.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
158 |             "Message: hello world"
159 |         );
160 | 
161 |         assert_eq!(attrs.next(), None);
162 |     }
163 | 
164 |     // This is decoded decimal character reference &#39;
165 |     assert_eq!(
166 |         reader.read_event()?,
167 |         Event::Text(BytesText::from_escaped("'"))
168 |     );
169 | 
170 |     //--------------------------------------------------------------------------
171 |     // This part was inserted into original document from entity defined in DTD
172 | 
173 |     assert_eq!(reader.read_event()?, Event::Start(BytesStart::new("a")));
174 |     let event = reader.read_event()?;
175 |     assert_eq!(
176 |         event,
177 |         Event::Empty(BytesStart::from_content(
178 |             r#"dtd attr = 'Message: &text;'"#,
179 |             3
180 |         ))
181 |     );
182 |     if let Event::Start(e) = event {
183 |         let mut attrs = e.attributes();
184 | 
185 |         let attr = attrs.next().unwrap()?;
186 |         assert_eq!(attr.key, QName(b"attr"));
187 |         assert_eq!(
188 |             attr.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
189 |             "Message: hello world"
190 |         );
191 | 
192 |         assert_eq!(attrs.next(), None);
193 |     }
194 |     assert_eq!(reader.read_event()?, Event::End(BytesEnd::new("a")));
195 |     //--------------------------------------------------------------------------
196 | 
197 |     // This is decoded hexadecimal character reference &#x27;
198 |     assert_eq!(
199 |         reader.read_event()?,
200 |         Event::Text(BytesText::from_escaped("'"))
201 |     );
202 | 
203 |     assert_eq!(reader.read_event()?, Event::End(BytesEnd::new("test")));
204 |     assert_eq!(reader.read_event()?, Event::Eof);
205 | 
206 |     Ok(())
207 | }
208 | 


--------------------------------------------------------------------------------
/examples/flattened_enum.rs:
--------------------------------------------------------------------------------
  1 | //! This example demonstrates how to deserialize and serialize enum nodes using an intermediate
  2 | //! custom deserializer and seralizer.
  3 | //! The `elem` node can either be a `Foo` or a `Bar` node, depending on the `type`.
  4 | //! The `type` attribute is used to determine which variant to deserialize.
  5 | //! This is a workaround for [serde's issue](https://github.com/serde-rs/serde/issues/1905)
  6 | //!
  7 | //! note: to use serde, the feature needs to be enabled
  8 | //! run example with:
  9 | //!    cargo run --example flattened_enum --features="serialize"
 10 | 
 11 | use std::fmt;
 12 | 
 13 | use quick_xml::de::from_str;
 14 | use quick_xml::se::to_string_with_root;
 15 | use serde::de::value::MapAccessDeserializer;
 16 | use serde::de::{Error, MapAccess, Visitor};
 17 | use serde::ser::SerializeMap;
 18 | use serde::{Deserialize, Serialize};
 19 | 
 20 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 21 | struct Model {
 22 |     elem: Vec<Elem>,
 23 | }
 24 | 
 25 | #[derive(Debug, PartialEq)]
 26 | enum Elem {
 27 |     Foo(Foo),
 28 |     Bar(Bar),
 29 | }
 30 | 
 31 | impl<'de> Deserialize<'de> for Elem {
 32 |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
 33 |     where
 34 |         D: serde::Deserializer<'de>,
 35 |     {
 36 |         struct ElemVisitor;
 37 | 
 38 |         impl<'de> Visitor<'de> for ElemVisitor {
 39 |             type Value = Elem;
 40 | 
 41 |             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
 42 |                 formatter.write_str("an object with a `type` field")
 43 |             }
 44 | 
 45 |             fn visit_map<A>(self, mut map: A) -> Result<Elem, A::Error>
 46 |             where
 47 |                 A: MapAccess<'de>,
 48 |             {
 49 |                 if let Some((key, value)) = map.next_entry::<String, String>()? {
 50 |                     return match key.as_str() {
 51 |                         "@type" => match value.as_str() {
 52 |                             "foo" => {
 53 |                                 let f = Foo::deserialize(MapAccessDeserializer::new(map))?;
 54 |                                 Ok(Elem::Foo(f))
 55 |                             }
 56 |                             "bar" => {
 57 |                                 let f = Bar::deserialize(MapAccessDeserializer::new(map))?;
 58 |                                 Ok(Elem::Bar(f))
 59 |                             }
 60 |                             t => Err(Error::custom(format!("unknown type attribute `{t}`"))),
 61 |                         },
 62 |                         a => Err(Error::custom(format!(
 63 |                             "expected attribute `type`, but found `{a}`"
 64 |                         ))),
 65 |                     };
 66 |                 }
 67 |                 Err(Error::custom("expected `type` attribute"))
 68 |             }
 69 |         }
 70 |         deserializer.deserialize_map(ElemVisitor)
 71 |     }
 72 | }
 73 | 
 74 | impl Serialize for Elem {
 75 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
 76 |     where
 77 |         S: serde::Serializer,
 78 |     {
 79 |         match &self {
 80 |             Elem::Foo(f) => {
 81 |                 let mut state = serializer.serialize_map(Some(3))?;
 82 |                 state.serialize_entry("@type", "foo")?;
 83 |                 state.serialize_entry("a", &f.a)?;
 84 |                 state.serialize_entry("subfoo", &f.subfoo)?;
 85 |                 state.end()
 86 |             }
 87 |             Elem::Bar(b) => {
 88 |                 let mut state = serializer.serialize_map(Some(2))?;
 89 |                 state.serialize_entry("@type", "bar")?;
 90 |                 state.serialize_entry("b", &b.b)?;
 91 |                 state.end()
 92 |             }
 93 |         }
 94 |     }
 95 | }
 96 | 
 97 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 98 | struct Foo {
 99 |     a: String,
100 |     subfoo: SubFoo,
101 | }
102 | 
103 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
104 | struct SubFoo {
105 |     a1: String,
106 |     a2: String,
107 |     a3: String,
108 | }
109 | 
110 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
111 | struct Bar {
112 |     b: String,
113 | }
114 | 
115 | fn main() {
116 |     let x = r#"
117 | <model>
118 |     <elem type="foo">
119 |         <a>1</a>
120 |         <subfoo>
121 |             <a1>2</a1>
122 |             <a2>42</a2>
123 |             <a3>1337</a3>
124 |         </subfoo>
125 |     </elem>
126 |     <elem type="bar">
127 |         <b>22</b>
128 |     </elem>
129 | </model>
130 | "#;
131 | 
132 |     let model: Model = from_str(&x).unwrap();
133 |     println!("{:?}", model);
134 |     // Model { elem: [Foo(Foo { a: "1", subfoo: SubFoo { a1: "2", a2: "42", a3: "1337" } }), Bar(Bar { b: "22" })] }
135 | 
136 |     let x = to_string_with_root("model", &model).unwrap();
137 |     println!("{}", x);
138 |     // <model><elem type="foo"><a>1</a><subfoo><a1>2</a1><a2>42</a2><a3>1337</a3></subfoo></elem><elem type="bar"><b>22</b></elem></model>
139 | }
140 | 


--------------------------------------------------------------------------------
/examples/nested_readers.rs:
--------------------------------------------------------------------------------
 1 | use pretty_assertions::assert_eq;
 2 | use quick_xml::events::Event;
 3 | use quick_xml::reader::Reader;
 4 | 
 5 | // a structure to capture the rows we've extracted
 6 | // from a ECMA-376 table in document.xml
 7 | #[derive(Debug, Clone)]
 8 | struct TableStat {
 9 |     index: u8,
10 |     rows: Vec<Vec<String>>,
11 | }
12 | // demonstrate how to nest readers
13 | // This is useful for when you need to traverse
14 | // a few levels of a document to extract things.
15 | fn main() -> Result<(), quick_xml::Error> {
16 |     let mut buf = Vec::new();
17 |     // buffer for nested reader
18 |     let mut skip_buf = Vec::new();
19 |     let mut count = 0;
20 |     let mut reader = Reader::from_file("tests/documents/document.xml")?;
21 |     let mut found_tables = Vec::new();
22 |     loop {
23 |         match reader.read_event_into(&mut buf)? {
24 |             Event::Start(element) => {
25 |                 if let b"w:tbl" = element.name().as_ref() {
26 |                     count += 1;
27 |                     let mut stats = TableStat {
28 |                         index: count,
29 |                         rows: vec![],
30 |                     };
31 |                     // must define stateful variables
32 |                     // outside the nested loop else they are overwritten
33 |                     let mut row_index = 0;
34 |                     loop {
35 |                         skip_buf.clear();
36 |                         match reader.read_event_into(&mut skip_buf)? {
37 |                             Event::Start(element) => match element.name().as_ref() {
38 |                                 b"w:tr" => {
39 |                                     stats.rows.push(vec![]);
40 |                                     row_index = stats.rows.len() - 1;
41 |                                 }
42 |                                 b"w:tc" => {
43 |                                     stats.rows[row_index].push(
44 |                                         String::from_utf8(element.name().as_ref().to_vec())
45 |                                             .unwrap(),
46 |                                     );
47 |                                 }
48 |                                 _ => {}
49 |                             },
50 |                             Event::End(element) => {
51 |                                 if element.name().as_ref() == b"w:tbl" {
52 |                                     found_tables.push(stats);
53 |                                     break;
54 |                                 }
55 |                             }
56 |                             _ => {}
57 |                         }
58 |                     }
59 |                 }
60 |             }
61 |             Event::Eof => break,
62 |             _ => {}
63 |         }
64 |         buf.clear();
65 |     }
66 |     assert_eq!(found_tables.len(), 2);
67 |     // pretty print the table
68 |     println!("{:#?}", found_tables);
69 |     assert_eq!(found_tables[0].index, 2);
70 |     assert_eq!(found_tables[0].rows.len(), 2);
71 |     assert_eq!(found_tables[0].rows[0].len(), 4);
72 |     assert_eq!(found_tables[0].rows[1].len(), 4);
73 | 
74 |     assert_eq!(found_tables[1].index, 2);
75 |     assert_eq!(found_tables[1].rows.len(), 2);
76 |     assert_eq!(found_tables[1].rows[0].len(), 4);
77 |     assert_eq!(found_tables[1].rows[1].len(), 4);
78 |     Ok(())
79 | }
80 | 


--------------------------------------------------------------------------------
/examples/read_buffered.rs:
--------------------------------------------------------------------------------
 1 | // This example demonstrates how a reader (for example when reading from a file)
 2 | // can be buffered. In that case, data read from the file is written to a supplied
 3 | // buffer and returned XML events borrow from that buffer.
 4 | // That way, allocations can be kept to a minimum.
 5 | 
 6 | fn main() -> Result<(), quick_xml::Error> {
 7 |     use quick_xml::events::Event;
 8 |     use quick_xml::reader::Reader;
 9 | 
10 |     let mut reader = Reader::from_file("tests/documents/document.xml")?;
11 |     reader.config_mut().trim_text(true);
12 | 
13 |     let mut buf = Vec::new();
14 | 
15 |     let mut count = 0;
16 | 
17 |     loop {
18 |         match reader.read_event_into(&mut buf) {
19 |             Ok(Event::Start(ref e)) => {
20 |                 let name = e.name();
21 |                 let name = reader.decoder().decode(name.as_ref())?;
22 |                 println!("read start event {:?}", name.as_ref());
23 |                 count += 1;
24 |             }
25 |             Ok(Event::Eof) => break, // exits the loop when reaching end of file
26 |             Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
27 |             _ => (), // There are several other `Event`s we do not consider here
28 |         }
29 |     }
30 | 
31 |     println!("read {} start events in total", count);
32 | 
33 |     Ok(())
34 | }
35 | 


--------------------------------------------------------------------------------
/examples/read_nodes.rs:
--------------------------------------------------------------------------------
  1 | // example that separates logic for reading different top-level nodes of xml tree
  2 | // Note: for this specific data set using serde feature would simplify
  3 | //       this simple data is purely to make it easier to understand the code
  4 | 
  5 | use quick_xml::events::attributes::AttrError;
  6 | use quick_xml::events::{BytesStart, Event};
  7 | use quick_xml::name::QName;
  8 | use quick_xml::reader::Reader;
  9 | use std::borrow::Cow;
 10 | use std::collections::HashMap;
 11 | use std::convert::Infallible;
 12 | use std::str;
 13 | 
 14 | const XML: &str = r#"
 15 | <?xml version="1.0" encoding="utf-8"?>
 16 |   <DefaultSettings Language="es" Greeting="HELLO"/>
 17 |   <Localization>
 18 |     <Translation Tag="HELLO" Language="ja">
 19 |       <Text>こんにちは</Text>
 20 |     </Translation>
 21 |     <Translation Tag="BYE" Language="ja">
 22 |       <Text>さようなら</Text>
 23 |     </Translation>
 24 |     <Translation Tag="HELLO" Language="es">
 25 |       <Text>Hola</Text>
 26 |     </Translation>
 27 |     <Translation Tag="BYE" Language="es">
 28 |       <Text>Adiós</Text>
 29 |     </Translation>
 30 |   </Localization>
 31 | "#;
 32 | 
 33 | // Enum variants is not read in example, so suppress the warning
 34 | #[allow(dead_code)]
 35 | #[derive(Debug)]
 36 | enum AppError {
 37 |     /// XML parsing error
 38 |     Xml(quick_xml::Error),
 39 |     /// The `Translation/Text` node is missed
 40 |     NoText(String),
 41 | }
 42 | 
 43 | impl From<quick_xml::Error> for AppError {
 44 |     fn from(error: quick_xml::Error) -> Self {
 45 |         Self::Xml(error)
 46 |     }
 47 | }
 48 | 
 49 | impl From<AttrError> for AppError {
 50 |     fn from(error: AttrError) -> Self {
 51 |         Self::Xml(quick_xml::Error::InvalidAttr(error))
 52 |     }
 53 | }
 54 | 
 55 | #[derive(Debug)]
 56 | struct Translation {
 57 |     tag: String,
 58 |     lang: String,
 59 |     text: String,
 60 | }
 61 | 
 62 | impl Translation {
 63 |     fn new_from_element(
 64 |         reader: &mut Reader<&[u8]>,
 65 |         element: BytesStart,
 66 |     ) -> Result<Translation, AppError> {
 67 |         let mut tag = Cow::Borrowed("");
 68 |         let mut lang = Cow::Borrowed("");
 69 | 
 70 |         for attr_result in element.attributes() {
 71 |             let a = attr_result?;
 72 |             match a.key.as_ref() {
 73 |                 b"Language" => lang = a.decode_and_unescape_value(reader.decoder())?,
 74 |                 b"Tag" => tag = a.decode_and_unescape_value(reader.decoder())?,
 75 |                 _ => (),
 76 |             }
 77 |         }
 78 |         let mut element_buf = Vec::new();
 79 |         let event = reader.read_event_into(&mut element_buf)?;
 80 | 
 81 |         if let Event::Start(ref e) = event {
 82 |             let name = e.name();
 83 |             if name == QName(b"Text") {
 84 |                 // note: `read_text` does not support content as CDATA
 85 |                 let text_content = reader.read_text(e.name())?;
 86 |                 Ok(Translation {
 87 |                     tag: tag.into(),
 88 |                     lang: lang.into(),
 89 |                     text: text_content.into(),
 90 |                 })
 91 |             } else {
 92 |                 dbg!("Expected Event::Start for Text, got: {:?}", &event);
 93 |                 let name_string = reader
 94 |                     .decoder()
 95 |                     .decode(name.as_ref())
 96 |                     .map_err(quick_xml::Error::Encoding)?;
 97 |                 Err(AppError::NoText(name_string.into()))
 98 |             }
 99 |         } else {
100 |             let event_string = format!("{:?}", event);
101 |             Err(AppError::NoText(event_string))
102 |         }
103 |     }
104 | }
105 | 
106 | fn main() -> Result<(), AppError> {
107 |     // In a real-world use case, Settings would likely be a struct
108 |     // HashMap here is just to make the sample code short
109 |     let mut settings: HashMap<String, String>;
110 |     let mut translations: Vec<Translation> = Vec::new();
111 | 
112 |     let mut reader = Reader::from_str(XML);
113 |     let config = reader.config_mut();
114 | 
115 |     config.trim_text(true);
116 |     // == Handling empty elements ==
117 |     // To simply our processing code
118 |     // we want the same events for empty elements, like:
119 |     //   <DefaultSettings Language="es" Greeting="HELLO"/>
120 |     //   <Text/>
121 |     config.expand_empty_elements = true;
122 | 
123 |     let mut buf = Vec::new();
124 | 
125 |     loop {
126 |         let event = reader.read_event_into(&mut buf)?;
127 | 
128 |         match event {
129 |             Event::Start(element) => match element.name().as_ref() {
130 |                 b"DefaultSettings" => {
131 |                     // Note: real app would handle errors with good defaults or halt program with nice message
132 |                     // This illustrates decoding an attribute's key and value with error handling
133 |                     settings = element
134 |                         .attributes()
135 |                         .map(|attr_result| {
136 |                             match attr_result {
137 |                                 Ok(a) => {
138 |                                     let key = reader.decoder().decode(a.key.local_name().as_ref())
139 |                                         .or_else(|err| {
140 |                                             dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err);
141 |                                             Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
142 |                                         })
143 |                                         .unwrap().to_string();
144 |                                     let value = a.decode_and_unescape_value(reader.decoder()).or_else(|err| {
145 |                                             dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err);
146 |                                             Ok::<Cow<'_, str>, Infallible>(std::borrow::Cow::from(""))
147 |                                     }).unwrap().to_string();
148 |                                     (key, value)
149 |                                 },
150 |                                 Err(err) => {
151 |                                      dbg!("unable to read key in DefaultSettings, err = {:?}", err);
152 |                                     (String::new(), String::new())
153 |                                 }
154 |                             }
155 |                         })
156 |                         .collect();
157 |                     assert_eq!(settings["Language"], "es");
158 |                     assert_eq!(settings["Greeting"], "HELLO");
159 |                     reader.read_to_end(element.name())?;
160 |                 }
161 |                 b"Translation" => {
162 |                     translations.push(Translation::new_from_element(&mut reader, element)?);
163 |                 }
164 |                 _ => (),
165 |             },
166 | 
167 |             Event::Eof => break, // exits the loop when reaching end of file
168 |             _ => (),             // There are `Event` types not considered here
169 |         }
170 |     }
171 |     dbg!("{:?}", &translations);
172 |     assert_eq!(translations.len(), 4);
173 |     assert_eq!(translations[2].tag, "HELLO");
174 |     assert_eq!(translations[2].text, "Hola");
175 |     assert_eq!(translations[2].lang, "es");
176 | 
177 |     Ok(())
178 | }
179 | 


--------------------------------------------------------------------------------
/examples/read_nodes_serde.rs:
--------------------------------------------------------------------------------
 1 | // note: to use serde, the feature needs to be enabled
 2 | // run example with:
 3 | //    cargo run --example read_nodes_serde --features="serialize"
 4 | 
 5 | use quick_xml::de::from_str;
 6 | use serde::Deserialize;
 7 | 
 8 | #[derive(Debug, PartialEq, Default, Deserialize)]
 9 | #[serde(default)]
10 | struct Translation {
11 |     #[serde(rename = "@Tag")]
12 |     tag: String,
13 |     #[serde(rename = "@Language")]
14 |     lang: String,
15 |     #[serde(rename = "$text")]
16 |     text: String,
17 | }
18 | 
19 | #[derive(Debug, PartialEq, Default, Deserialize)]
20 | #[serde(default)]
21 | struct DefaultSettings {
22 |     #[serde(rename = "@Language")]
23 |     language: String,
24 |     #[serde(rename = "@Greeting")]
25 |     greeting: String,
26 | }
27 | 
28 | #[derive(Debug, PartialEq, Default, Deserialize)]
29 | #[serde(default, rename_all = "PascalCase")]
30 | struct Config {
31 |     #[serde(rename = "DefaultSettings")]
32 |     settings: DefaultSettings,
33 |     localization: Localization,
34 | }
35 | #[derive(Debug, PartialEq, Default, Deserialize)]
36 | #[serde(rename_all = "PascalCase")]
37 | struct Localization {
38 |     translation: Vec<Translation>,
39 | }
40 | 
41 | const XML: &str = r#"
42 | <?xml version="1.0" encoding="utf-8"?>
43 | <Config>
44 |   <DefaultSettings Language="es" Greeting="HELLO"/>
45 |   <Localization>
46 |     <Translation Tag="HELLO" Language="ja">
47 |       こんにちは
48 |     </Translation>
49 |     <Translation Tag="BYE" Language="ja">
50 |       さようなら
51 |     </Translation>
52 |     <Translation Tag="HELLO" Language="es">
53 |       Hola
54 |     </Translation>
55 |     <Translation Tag="BYE" Language="es">
56 |       Adiós
57 |     </Translation>
58 |   </Localization>
59 | </Config>
60 | "#;
61 | 
62 | const ONE_TRANSLATION_XML: &str = r#"
63 |     <Translation Tag="HELLO" Language="ja">
64 |       こんにちは
65 |     </Translation>
66 | "#;
67 | 
68 | fn main() -> Result<(), quick_xml::DeError> {
69 |     let t: Translation = from_str(ONE_TRANSLATION_XML)?;
70 |     assert_eq!(t.tag, "HELLO");
71 |     assert_eq!(t.lang, "ja");
72 |     assert_eq!(t.text, "こんにちは");
73 | 
74 |     let config: Config = from_str(XML)?;
75 |     dbg!("{:?}", &config);
76 | 
77 |     assert_eq!(config.settings.language, "es");
78 |     assert_eq!(config.settings.greeting, "HELLO");
79 | 
80 |     let translations = config.localization.translation;
81 |     assert_eq!(translations.len(), 4);
82 |     assert_eq!(translations[2].tag, "HELLO");
83 |     assert_eq!(translations[2].text, "Hola");
84 |     assert_eq!(translations[2].lang, "es");
85 |     Ok(())
86 | }
87 | 


--------------------------------------------------------------------------------
/examples/read_texts.rs:
--------------------------------------------------------------------------------
 1 | fn main() {
 2 |     use quick_xml::events::Event;
 3 |     use quick_xml::reader::Reader;
 4 | 
 5 |     let xml = "<tag1>text1</tag1><tag1>text2</tag1>\
 6 |                <tag1>text3</tag1><tag1><tag2>text4</tag2></tag1>";
 7 | 
 8 |     let mut reader = Reader::from_str(xml);
 9 |     reader.config_mut().trim_text(true);
10 | 
11 |     loop {
12 |         match reader.read_event() {
13 |             Ok(Event::Start(e)) if e.name().as_ref() == b"tag2" => {
14 |                 // read_text_into for buffered readers not implemented
15 |                 let txt = reader
16 |                     .read_text(e.name())
17 |                     .expect("Cannot decode text value");
18 |                 println!("{:?}", txt);
19 |             }
20 |             Ok(Event::Eof) => break, // exits the loop when reaching end of file
21 |             Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
22 |             _ => (), // There are several other `Event`s we do not consider here
23 |         }
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | corpus
3 | artifacts
4 | coverage
5 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "quick-xml-fuzz"
 3 | version = "0.0.0"
 4 | authors = ["Automatically generated"]
 5 | publish = false
 6 | edition = "2021"
 7 | 
 8 | [package.metadata]
 9 | cargo-fuzz = true
10 | 
11 | [dependencies]
12 | arbitrary = { version = "1.3", features = ["derive"] }
13 | libfuzzer-sys = "0.4"
14 | 
15 | [dependencies.quick-xml]
16 | path = ".."
17 | features = ["arbitrary"]
18 | 
19 | # Prevent this from interfering with workspaces
20 | [workspace]
21 | members = ["."]
22 | 
23 | [[bin]]
24 | name = "fuzz_target_1"
25 | path = "fuzz_targets/fuzz_target_1.rs"
26 | test = false
27 | doc = false
28 | 
29 | [[bin]]
30 | name = "structured_roundtrip"
31 | path = "fuzz_targets/structured_roundtrip.rs"
32 | test = false
33 | doc = false
34 | 


--------------------------------------------------------------------------------
/fuzz/README.md:
--------------------------------------------------------------------------------
1 | Run fuzzing with `-O` to avoid false positives at `debug_assert!`, e.g.:
2 | 
3 | ```bash
4 | cargo fuzz run -O -j4 fuzz_target_1
5 | ```
6 | 
7 | See also: https://github.com/rust-fuzz/cargo-fuzz
8 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/fuzz_target_1.rs:
--------------------------------------------------------------------------------
  1 | #![no_main]
  2 | use libfuzzer_sys::fuzz_target;
  3 | use std::hint::black_box;
  4 | 
  5 | use quick_xml::{events::Event, reader::Reader, writer::Writer};
  6 | use std::io::Cursor;
  7 | 
  8 | macro_rules! debug_format {
  9 |     ($x:expr) => {
 10 |         let _unused = std::hint::black_box(format!("{:?}", $x));
 11 |     };
 12 | }
 13 | 
 14 | fn round_trip<R>(reader: &mut Reader<R>) -> ()
 15 | where
 16 |     R: std::io::BufRead,
 17 | {
 18 |     let mut writer = Writer::new(Cursor::new(Vec::new()));
 19 |     let mut buf = vec![];
 20 |     let config = reader.config_mut();
 21 |     config.expand_empty_elements = true;
 22 |     config.trim_text(true);
 23 |     loop {
 24 |         let event_result = reader.read_event_into(&mut buf);
 25 |         if let Ok(ref event) = event_result {
 26 |             let _event = black_box(event.borrow());
 27 |             let _event = black_box(event.as_ref());
 28 |             debug_format!(event);
 29 |             debug_format!(writer.write_event(event.borrow()));
 30 |         }
 31 |         match event_result {
 32 |             Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
 33 |                 debug_format!(e);
 34 |                 debug_format!(e.name());
 35 |                 for a in e.attributes() {
 36 |                     debug_format!(a);
 37 |                     if a.ok().map_or(false, |a| a.unescape_value().is_err()) {
 38 |                         break;
 39 |                     }
 40 |                 }
 41 |             }
 42 |             Ok(Event::Text(ref e))
 43 |             | Ok(Event::Comment(ref e))
 44 |             | Ok(Event::DocType(ref e)) => {
 45 |                 debug_format!(e);
 46 |                 if let Err(err) = e.decode() {
 47 |                     debug_format!(err);
 48 |                     break;
 49 |                 }
 50 |             }
 51 |             Ok(Event::CData(e)) => {
 52 |                 if let Err(err) = e.escape() {
 53 |                     let _displayed = black_box(format!("{}", err));
 54 |                     debug_format!(err);
 55 |                     break;
 56 |                 }
 57 |             }
 58 |             Ok(Event::GeneralRef(ref e)) => {
 59 |                 debug_format!(e);
 60 |                 debug_format!(e.is_char_ref());
 61 |                 debug_format!(e.resolve_char_ref());
 62 |             }
 63 |             Ok(Event::PI(ref e)) => {
 64 |                 debug_format!(e);
 65 |             }
 66 |             Ok(Event::Decl(ref e)) => {
 67 |                 debug_format!(e);
 68 |                 let _ = black_box(e.version());
 69 |                 let _ = black_box(e.encoding());
 70 |                 let _ = black_box(e.standalone());
 71 |             }
 72 |             Ok(Event::End(e)) => {
 73 |                 debug_format!(e.local_name());
 74 |                 let name = e.name();
 75 |                 debug_format!(name);
 76 |                 debug_format!(name.prefix());
 77 |                 debug_format!(name.local_name());
 78 |                 debug_format!(name.decompose());
 79 |                 debug_format!(name.as_namespace_binding());
 80 |                 debug_format!(e);
 81 |             }
 82 |             Err(e) => {
 83 |                 debug_format!(e);
 84 |                 break;
 85 |             }
 86 |             Ok(Event::Eof) => break,
 87 |         }
 88 |         buf.clear();
 89 |     }
 90 |     let _round_trip = std::hint::black_box(writer.into_inner().into_inner());
 91 | }
 92 | 
 93 | fuzz_target!(|data: &[u8]| {
 94 |     // From reader
 95 |     let cursor = Cursor::new(data);
 96 |     let mut reader = Reader::from_reader(cursor);
 97 |     _ = std::hint::black_box(round_trip(&mut reader));
 98 | 
 99 |     // From str
100 |     if let Ok(s) = std::str::from_utf8(data) {
101 |         let mut reader = Reader::from_str(s);
102 |         _ = std::hint::black_box(round_trip(&mut reader));
103 |     }
104 | });
105 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/structured_roundtrip.rs:
--------------------------------------------------------------------------------
  1 | #![no_main]
  2 | 
  3 | use arbitrary::{Arbitrary, Unstructured};
  4 | use libfuzzer_sys::fuzz_target;
  5 | use quick_xml::events::{BytesCData, BytesPI, BytesText, Event};
  6 | use quick_xml::reader::{Config, NsReader, Reader};
  7 | use quick_xml::writer::Writer;
  8 | use std::{hint::black_box, io::Cursor};
  9 | 
 10 | #[derive(Debug, arbitrary::Arbitrary)]
 11 | enum ElementWriterFunc<'a> {
 12 |     WriteTextContent(&'a str),
 13 |     WriteCDataContent(&'a str),
 14 |     WritePiContent(&'a str),
 15 |     WriteEmpty,
 16 |     // TODO: We can't automatically generate an arbitrary function
 17 |     // WriteInnerContent,
 18 | }
 19 | 
 20 | fn arbitrary_name(u: &mut Unstructured) -> arbitrary::Result<String> {
 21 |     let s = String::arbitrary(u)?;
 22 |     if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
 23 |         return Err(arbitrary::Error::IncorrectFormat);
 24 |     }
 25 |     return Ok(s);
 26 | }
 27 | 
 28 | #[derive(Debug, arbitrary::Arbitrary)]
 29 | enum WriterFunc<'a> {
 30 |     WriteEvent(Event<'a>),
 31 |     WriteBom,
 32 |     WriteIndent,
 33 |     CreateElement {
 34 |         #[arbitrary(with = arbitrary_name)]
 35 |         name: String,
 36 |         func: ElementWriterFunc<'a>,
 37 |         attributes: Vec<(&'a str, &'a str)>,
 38 |     },
 39 | }
 40 | 
 41 | #[derive(Debug, arbitrary::Arbitrary)]
 42 | struct Driver<'a> {
 43 |     writer_funcs: Vec<WriterFunc<'a>>,
 44 |     reader_config: Config,
 45 | }
 46 | 
 47 | fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
 48 |     let mut writer = Writer::new(Cursor::new(Vec::new()));
 49 |     let writer_funcs = driver.writer_funcs;
 50 |     for writer_func in writer_funcs.iter() {
 51 |         // TODO: Handle error cases.
 52 |         use WriterFunc::*;
 53 |         match writer_func {
 54 |             WriteEvent(event) => writer.write_event(event.borrow())?,
 55 |             WriteBom => writer.write_bom()?,
 56 |             WriteIndent => writer.write_indent()?,
 57 |             CreateElement {
 58 |                 name,
 59 |                 func,
 60 |                 attributes,
 61 |             } => {
 62 |                 let element_writer = writer
 63 |                     .create_element(name)
 64 |                     .with_attributes(attributes.into_iter().copied());
 65 |                 use ElementWriterFunc::*;
 66 |                 match func {
 67 |                     WriteTextContent(text) => {
 68 |                         element_writer.write_text_content(BytesText::from_escaped(*text))?;
 69 |                     }
 70 |                     WriteCDataContent(text) => {
 71 |                         _ = element_writer.write_cdata_content(BytesCData::new(*text))?;
 72 |                     }
 73 |                     WritePiContent(text) => {
 74 |                         _ = element_writer.write_pi_content(BytesPI::new(*text))?;
 75 |                     }
 76 |                     WriteEmpty => {
 77 |                         _ = element_writer.write_empty()?;
 78 |                     }
 79 |                 }
 80 |             }
 81 |         }
 82 |     }
 83 |     let xml = writer.into_inner().into_inner();
 84 |     // The str should be valid as we just generated it, unwrapping **should** be safe.
 85 |     let mut reader = Reader::from_str(std::str::from_utf8(&xml).unwrap());
 86 |     *reader.config_mut() = driver.reader_config.clone();
 87 | 
 88 |     loop {
 89 |         let event = black_box(reader.read_event()?);
 90 |         if event == Event::Eof {
 91 |             break;
 92 |         }
 93 |     }
 94 | 
 95 |     let mut reader = NsReader::from_reader(&xml[..]);
 96 |     *reader.config_mut() = driver.reader_config;
 97 | 
 98 |     loop {
 99 |         let event = black_box(reader.read_event()?);
100 |         if event == Event::Eof {
101 |             break;
102 |         }
103 |     }
104 |     Ok(())
105 | }
106 | 
107 | fuzz_target!(|driver: Driver| {
108 |     if let Err(e) = fuzz_round_trip(driver) {
109 |         black_box(format!("{e:?}"));
110 |     }
111 | });
112 | 


--------------------------------------------------------------------------------
/src/de/attributes.rs:
--------------------------------------------------------------------------------
  1 | //! Implementation of the deserializer from attributes
  2 | 
  3 | use std::borrow::Cow;
  4 | 
  5 | use serde::de::{DeserializeSeed, Deserializer, Error, IntoDeserializer, MapAccess, Visitor};
  6 | use serde::forward_to_deserialize_any;
  7 | 
  8 | use crate::de::key::QNameDeserializer;
  9 | use crate::de::SimpleTypeDeserializer;
 10 | use crate::errors::serialize::DeError;
 11 | use crate::events::attributes::Attributes;
 12 | 
 13 | impl<'i> Attributes<'i> {
 14 |     /// Converts this iterator into a serde's [`MapAccess`] trait to use with serde.
 15 |     /// The returned object also implements the [`Deserializer`] trait.
 16 |     ///
 17 |     /// # Parameters
 18 |     /// - `prefix`: a prefix of the field names in structs that should be stripped
 19 |     ///   to get the local attribute name. The [`crate::de::Deserializer`] uses `"@"`
 20 |     ///   as a prefix, but [`Self::into_deserializer()`] uses empy string, which mean
 21 |     ///   that we do not strip anything.
 22 |     ///
 23 |     /// # Example
 24 |     /// ```
 25 |     /// # use pretty_assertions::assert_eq;
 26 |     /// use quick_xml::events::BytesStart;
 27 |     /// use serde::Deserialize;
 28 |     /// use serde::de::IntoDeserializer;
 29 |     ///
 30 |     /// #[derive(Debug, PartialEq, Deserialize)]
 31 |     /// struct MyData<'i> {
 32 |     ///     question: &'i str,
 33 |     ///     answer: u32,
 34 |     /// }
 35 |     ///
 36 |     /// #[derive(Debug, PartialEq, Deserialize)]
 37 |     /// struct MyDataPrefixed<'i> {
 38 |     ///     #[serde(rename = "@question")] question: &'i str,
 39 |     ///     #[serde(rename = "@answer")]   answer: u32,
 40 |     /// }
 41 |     ///
 42 |     /// let tag = BytesStart::from_content(
 43 |     ///     "tag
 44 |     ///         question = 'The Ultimate Question of Life, the Universe, and Everything'
 45 |     ///         answer = '42'",
 46 |     ///     3
 47 |     /// );
 48 |     /// // Strip nothing from the field names
 49 |     /// let de = tag.attributes().clone().into_deserializer();
 50 |     /// assert_eq!(
 51 |     ///     MyData::deserialize(de).unwrap(),
 52 |     ///     MyData {
 53 |     ///         question: "The Ultimate Question of Life, the Universe, and Everything",
 54 |     ///         answer: 42,
 55 |     ///     }
 56 |     /// );
 57 |     ///
 58 |     /// // Strip "@" from the field name
 59 |     /// let de = tag.attributes().into_map_access("@");
 60 |     /// assert_eq!(
 61 |     ///     MyDataPrefixed::deserialize(de).unwrap(),
 62 |     ///     MyDataPrefixed {
 63 |     ///         question: "The Ultimate Question of Life, the Universe, and Everything",
 64 |     ///         answer: 42,
 65 |     ///     }
 66 |     /// );
 67 |     /// ```
 68 |     #[inline]
 69 |     pub const fn into_map_access(self, prefix: &'static str) -> AttributesDeserializer<'i> {
 70 |         AttributesDeserializer {
 71 |             iter: self,
 72 |             value: None,
 73 |             prefix,
 74 |             key_buf: String::new(),
 75 |         }
 76 |     }
 77 | }
 78 | 
 79 | impl<'de> IntoDeserializer<'de, DeError> for Attributes<'de> {
 80 |     type Deserializer = AttributesDeserializer<'de>;
 81 | 
 82 |     #[inline]
 83 |     fn into_deserializer(self) -> Self::Deserializer {
 84 |         self.into_map_access("")
 85 |     }
 86 | }
 87 | 
 88 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 89 | 
 90 | /// A deserializer used to make possible to pack all attributes into a struct.
 91 | /// It is created by [`Attributes::into_map_access`] or [`Attributes::into_deserializer`]
 92 | /// methods.
 93 | ///
 94 | /// This deserializer always call [`Visitor::visit_map`] with self as [`MapAccess`].
 95 | ///
 96 | /// # Lifetime
 97 | ///
 98 | /// `'i` is a lifetime of the original buffer from which attributes were parsed.
 99 | /// In particular, when reader was created from a string, this is lifetime of the
100 | /// string.
101 | #[derive(Debug, Clone)]
102 | pub struct AttributesDeserializer<'i> {
103 |     iter: Attributes<'i>,
104 |     /// The value of the attribute, read in last call to `next_key_seed`.
105 |     value: Option<Cow<'i, [u8]>>,
106 |     /// This prefix will be stripped from struct fields before match against attribute name.
107 |     prefix: &'static str,
108 |     /// Buffer to store attribute name as a field name exposed to serde consumers.
109 |     /// Keeped in the serializer to avoid many small allocations
110 |     key_buf: String,
111 | }
112 | 
113 | impl<'de> Deserializer<'de> for AttributesDeserializer<'de> {
114 |     type Error = DeError;
115 | 
116 |     #[inline]
117 |     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
118 |     where
119 |         V: Visitor<'de>,
120 |     {
121 |         visitor.visit_map(self)
122 |     }
123 | 
124 |     forward_to_deserialize_any! {
125 |         bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
126 |         bytes byte_buf option unit unit_struct newtype_struct seq tuple
127 |         tuple_struct map struct enum identifier ignored_any
128 |     }
129 | }
130 | 
131 | impl<'de> MapAccess<'de> for AttributesDeserializer<'de> {
132 |     type Error = DeError;
133 | 
134 |     fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error>
135 |     where
136 |         K: DeserializeSeed<'de>,
137 |     {
138 |         debug_assert_eq!(self.value, None);
139 | 
140 |         match self.iter.next() {
141 |             None => Ok(None),
142 |             Some(Ok(attr)) => {
143 |                 self.value = Some(attr.value);
144 |                 self.key_buf.clear();
145 |                 self.key_buf.push_str(self.prefix);
146 |                 let de =
147 |                     QNameDeserializer::from_attr(attr.key, self.iter.decoder(), &mut self.key_buf)?;
148 |                 seed.deserialize(de).map(Some)
149 |             }
150 |             Some(Err(err)) => Err(Error::custom(err)),
151 |         }
152 |     }
153 | 
154 |     fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
155 |     where
156 |         V: DeserializeSeed<'de>,
157 |     {
158 |         match self.value.take() {
159 |             Some(value) => {
160 |                 let de =
161 |                     SimpleTypeDeserializer::from_part(&value, 0..value.len(), self.iter.decoder());
162 |                 seed.deserialize(de)
163 |             }
164 |             None => Err(DeError::KeyNotRead),
165 |         }
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/de/resolver.rs:
--------------------------------------------------------------------------------
  1 | //! Entity resolver module
  2 | 
  3 | use std::convert::Infallible;
  4 | use std::error::Error;
  5 | 
  6 | use crate::escape::resolve_predefined_entity;
  7 | use crate::events::BytesText;
  8 | 
  9 | /// Used to resolve unknown entities while parsing
 10 | ///
 11 | /// # Example
 12 | ///
 13 | /// ```
 14 | /// # use serde::Deserialize;
 15 | /// # use pretty_assertions::assert_eq;
 16 | /// use regex::bytes::Regex;
 17 | /// use std::collections::BTreeMap;
 18 | /// use std::string::FromUtf8Error;
 19 | /// use quick_xml::de::{Deserializer, EntityResolver};
 20 | /// use quick_xml::events::BytesText;
 21 | ///
 22 | /// struct DocTypeEntityResolver {
 23 | ///     re: Regex,
 24 | ///     map: BTreeMap<String, String>,
 25 | /// }
 26 | ///
 27 | /// impl Default for DocTypeEntityResolver {
 28 | ///     fn default() -> Self {
 29 | ///         Self {
 30 | ///             // We do not focus on true parsing in this example
 31 | ///             // You should use special libraries to parse DTD
 32 | ///             re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#).unwrap(),
 33 | ///             map: BTreeMap::new(),
 34 | ///         }
 35 | ///     }
 36 | /// }
 37 | ///
 38 | /// impl EntityResolver for DocTypeEntityResolver {
 39 | ///     type Error = FromUtf8Error;
 40 | ///
 41 | ///     fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
 42 | ///         for cap in self.re.captures_iter(&doctype) {
 43 | ///             self.map.insert(
 44 | ///                 String::from_utf8(cap[1].to_vec())?,
 45 | ///                 String::from_utf8(cap[2].to_vec())?,
 46 | ///             );
 47 | ///         }
 48 | ///         Ok(())
 49 | ///     }
 50 | ///
 51 | ///     fn resolve(&self, entity: &str) -> Option<&str> {
 52 | ///         self.map.get(entity).map(|s| s.as_str())
 53 | ///     }
 54 | /// }
 55 | ///
 56 | /// let xml_reader = br#"
 57 | ///     <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
 58 | ///     <root>
 59 | ///         <entity_one>&e1;</entity_one>
 60 | ///     </root>
 61 | /// "#.as_ref();
 62 | ///
 63 | /// let mut de = Deserializer::with_resolver(
 64 | ///     xml_reader,
 65 | ///     DocTypeEntityResolver::default(),
 66 | /// );
 67 | /// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
 68 | ///
 69 | /// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string()));
 70 | /// ```
 71 | pub trait EntityResolver {
 72 |     /// The error type that represents DTD parse error
 73 |     type Error: Error;
 74 | 
 75 |     /// Called on contents of [`Event::DocType`] to capture declared entities.
 76 |     /// Can be called multiple times, for each parsed `<!DOCTYPE >` declaration.
 77 |     ///
 78 |     /// [`Event::DocType`]: crate::events::Event::DocType
 79 |     fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>;
 80 | 
 81 |     /// Called when an entity needs to be resolved.
 82 |     ///
 83 |     /// `None` is returned if a suitable value can not be found.
 84 |     /// In that case an [`EscapeError::UnrecognizedEntity`] will be returned by
 85 |     /// a deserializer.
 86 |     ///
 87 |     /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
 88 |     fn resolve(&self, entity: &str) -> Option<&str>;
 89 | }
 90 | 
 91 | /// An [`EntityResolver`] that resolves only predefined entities:
 92 | ///
 93 | /// | Entity | Resolution
 94 | /// |--------|------------
 95 | /// |`&lt;`  | `<`
 96 | /// |`&gt;`  | `>`
 97 | /// |`&amp;` | `&`
 98 | /// |`&apos;`| `'`
 99 | /// |`&quot;`| `"`
100 | #[derive(Default, Copy, Clone)]
101 | pub struct PredefinedEntityResolver;
102 | 
103 | impl EntityResolver for PredefinedEntityResolver {
104 |     type Error = Infallible;
105 | 
106 |     #[inline]
107 |     fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> {
108 |         Ok(())
109 |     }
110 | 
111 |     #[inline]
112 |     fn resolve(&self, entity: &str) -> Option<&str> {
113 |         resolve_predefined_entity(entity)
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/src/de/text.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     de::simple_type::SimpleTypeDeserializer,
  3 |     de::{Text, TEXT_KEY},
  4 |     errors::serialize::DeError,
  5 |     utils::CowRef,
  6 | };
  7 | use serde::de::value::BorrowedStrDeserializer;
  8 | use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};
  9 | use serde::serde_if_integer128;
 10 | use std::borrow::Cow;
 11 | 
 12 | /// A deserializer for a single text node of a mixed sequence of tags and text.
 13 | ///
 14 | /// This deserializer are very similar to a [`MapValueDeserializer`] (when it
 15 | /// processes the [`DeEvent::Text`] event). The only difference in the
 16 | /// `deserialize_seq` method. This deserializer will perform deserialization
 17 | /// from a textual content, whereas the [`MapValueDeserializer`] will iterate
 18 | /// over tags / text within it's parent tag.
 19 | ///
 20 | /// This deserializer processes items as following:
 21 | /// - numbers are parsed from a text content using [`FromStr`]; in case of error
 22 | ///   [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], or [`Visitor::visit_string`]
 23 | ///   is called; it is responsibility of the type to return an error if it does
 24 | ///   not able to process passed data;
 25 | /// - booleans converted from the text according to the XML [specification]:
 26 | ///   - `"true"` and `"1"` converted to `true`;
 27 | ///   - `"false"` and `"0"` converted to `false`;
 28 | ///   - everything else calls [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`],
 29 | ///     or [`Visitor::visit_string`]; it is responsibility of the type to return
 30 | ///     an error if it does not able to process passed data;
 31 | /// - strings returned as is;
 32 | /// - characters also returned as strings. If string contain more than one character
 33 | ///   or empty, it is responsibility of a type to return an error;
 34 | /// - `Option`:
 35 | ///   - empty text is deserialized as `None`;
 36 | ///   - everything else is deserialized as `Some` using the same deserializer;
 37 | /// - units (`()`) and unit structs always deserialized successfully, the content is ignored;
 38 | /// - newtype structs forwards deserialization to the inner type using the same
 39 | ///   deserializer;
 40 | /// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
 41 | ///   (this is the difference): text content passed to the deserializer directly;
 42 | /// - structs and maps calls [`Visitor::visit_borrowed_str`] or [`Visitor::visit_string`],
 43 | ///   it is responsibility of the type to return an error if it do not able to process
 44 | ///   this data;
 45 | /// - enums:
 46 | ///   - the variant name is deserialized as `$text`;
 47 | ///   - the content is deserialized using the same deserializer:
 48 | ///     - unit variants: just return `()`;
 49 | ///     - newtype variants forwards deserialization to the inner type using the
 50 | ///       same deserializer;
 51 | ///     - tuple and struct variants are deserialized using [`SimpleTypeDeserializer`].
 52 | ///
 53 | /// [`MapValueDeserializer`]: ../map/struct.MapValueDeserializer.html
 54 | /// [`DeEvent::Text`]: crate::de::DeEvent::Text
 55 | /// [`FromStr`]: std::str::FromStr
 56 | /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean
 57 | pub struct TextDeserializer<'de>(pub Text<'de>);
 58 | 
 59 | impl<'de> TextDeserializer<'de> {
 60 |     /// Returns a next string as concatenated content of consequent [`Text`] and
 61 |     /// [`CData`] events, used inside [`deserialize_primitives!()`].
 62 |     ///
 63 |     /// [`Text`]: crate::events::Event::Text
 64 |     /// [`CData`]: crate::events::Event::CData
 65 |     #[inline]
 66 |     fn read_string(self) -> Result<Cow<'de, str>, DeError> {
 67 |         Ok(self.0.text)
 68 |     }
 69 | }
 70 | 
 71 | impl<'de> Deserializer<'de> for TextDeserializer<'de> {
 72 |     type Error = DeError;
 73 | 
 74 |     deserialize_primitives!();
 75 | 
 76 |     fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
 77 |     where
 78 |         V: Visitor<'de>,
 79 |     {
 80 |         visitor.visit_unit()
 81 |     }
 82 | 
 83 |     fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
 84 |     where
 85 |         V: Visitor<'de>,
 86 |     {
 87 |         if self.0.is_empty() {
 88 |             visitor.visit_none()
 89 |         } else {
 90 |             visitor.visit_some(self)
 91 |         }
 92 |     }
 93 | 
 94 |     /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
 95 |     /// with this deserializer.
 96 |     fn deserialize_newtype_struct<V>(
 97 |         self,
 98 |         _name: &'static str,
 99 |         visitor: V,
100 |     ) -> Result<V::Value, Self::Error>
101 |     where
102 |         V: Visitor<'de>,
103 |     {
104 |         visitor.visit_newtype_struct(self)
105 |     }
106 | 
107 |     /// This method deserializes a sequence inside of element that itself is a
108 |     /// sequence element:
109 |     ///
110 |     /// ```xml
111 |     /// <>
112 |     ///   ...
113 |     ///   inner sequence as xs:list
114 |     ///   ...
115 |     /// </>
116 |     /// ```
117 |     fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
118 |     where
119 |         V: Visitor<'de>,
120 |     {
121 |         SimpleTypeDeserializer::from_text_content(self.0).deserialize_seq(visitor)
122 |     }
123 | 
124 |     #[inline]
125 |     fn deserialize_struct<V>(
126 |         self,
127 |         _name: &'static str,
128 |         _fields: &'static [&'static str],
129 |         visitor: V,
130 |     ) -> Result<V::Value, Self::Error>
131 |     where
132 |         V: Visitor<'de>,
133 |     {
134 |         // Deserializer methods are only hints, if deserializer could not satisfy
135 |         // request, it should return the data that it has. It is responsibility
136 |         // of a Visitor to return an error if it does not understand the data
137 |         self.deserialize_str(visitor)
138 |     }
139 | 
140 |     fn deserialize_enum<V>(
141 |         self,
142 |         _name: &'static str,
143 |         _variants: &'static [&'static str],
144 |         visitor: V,
145 |     ) -> Result<V::Value, Self::Error>
146 |     where
147 |         V: Visitor<'de>,
148 |     {
149 |         visitor.visit_enum(self)
150 |     }
151 | 
152 |     #[inline]
153 |     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
154 |     where
155 |         V: Visitor<'de>,
156 |     {
157 |         self.deserialize_str(visitor)
158 |     }
159 | }
160 | 
161 | impl<'de> EnumAccess<'de> for TextDeserializer<'de> {
162 |     type Error = DeError;
163 |     type Variant = Self;
164 | 
165 |     fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
166 |     where
167 |         V: DeserializeSeed<'de>,
168 |     {
169 |         let name = seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?;
170 |         Ok((name, self))
171 |     }
172 | }
173 | 
174 | impl<'de> VariantAccess<'de> for TextDeserializer<'de> {
175 |     type Error = DeError;
176 | 
177 |     #[inline]
178 |     fn unit_variant(self) -> Result<(), Self::Error> {
179 |         Ok(())
180 |     }
181 | 
182 |     fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
183 |     where
184 |         T: DeserializeSeed<'de>,
185 |     {
186 |         seed.deserialize(self)
187 |     }
188 | 
189 |     #[inline]
190 |     fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
191 |     where
192 |         V: Visitor<'de>,
193 |     {
194 |         self.deserialize_tuple(len, visitor)
195 |     }
196 | 
197 |     #[inline]
198 |     fn struct_variant<V>(
199 |         self,
200 |         fields: &'static [&'static str],
201 |         visitor: V,
202 |     ) -> Result<V::Value, Self::Error>
203 |     where
204 |         V: Visitor<'de>,
205 |     {
206 |         self.deserialize_struct("", fields, visitor)
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/de/var.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     de::key::QNameDeserializer,
  3 |     de::map::ElementMapAccess,
  4 |     de::resolver::EntityResolver,
  5 |     de::simple_type::SimpleTypeDeserializer,
  6 |     de::{DeEvent, Deserializer, XmlRead, TEXT_KEY},
  7 |     errors::serialize::DeError,
  8 | };
  9 | use serde::de::value::BorrowedStrDeserializer;
 10 | use serde::de::{self, DeserializeSeed, Deserializer as _, Visitor};
 11 | 
 12 | /// An enum access
 13 | pub struct EnumAccess<'de, 'd, R, E>
 14 | where
 15 |     R: XmlRead<'de>,
 16 |     E: EntityResolver,
 17 | {
 18 |     de: &'d mut Deserializer<'de, R, E>,
 19 | }
 20 | 
 21 | impl<'de, 'd, R, E> EnumAccess<'de, 'd, R, E>
 22 | where
 23 |     R: XmlRead<'de>,
 24 |     E: EntityResolver,
 25 | {
 26 |     pub fn new(de: &'d mut Deserializer<'de, R, E>) -> Self {
 27 |         EnumAccess { de }
 28 |     }
 29 | }
 30 | 
 31 | impl<'de, 'd, R, E> de::EnumAccess<'de> for EnumAccess<'de, 'd, R, E>
 32 | where
 33 |     R: XmlRead<'de>,
 34 |     E: EntityResolver,
 35 | {
 36 |     type Error = DeError;
 37 |     type Variant = VariantAccess<'de, 'd, R, E>;
 38 | 
 39 |     fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
 40 |     where
 41 |         V: DeserializeSeed<'de>,
 42 |     {
 43 |         let (name, is_text) = match self.de.peek()? {
 44 |             DeEvent::Start(e) => (seed.deserialize(QNameDeserializer::from_elem(e)?)?, false),
 45 |             DeEvent::Text(_) => (
 46 |                 seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
 47 |                 true,
 48 |             ),
 49 |             // SAFETY: The reader is guaranteed that we don't have unmatched tags
 50 |             // If we here, then out deserializer has a bug
 51 |             DeEvent::End(e) => unreachable!("{:?}", e),
 52 |             DeEvent::Eof => return Err(DeError::UnexpectedEof),
 53 |         };
 54 |         Ok((
 55 |             name,
 56 |             VariantAccess {
 57 |                 de: self.de,
 58 |                 is_text,
 59 |             },
 60 |         ))
 61 |     }
 62 | }
 63 | 
 64 | pub struct VariantAccess<'de, 'd, R, E>
 65 | where
 66 |     R: XmlRead<'de>,
 67 |     E: EntityResolver,
 68 | {
 69 |     de: &'d mut Deserializer<'de, R, E>,
 70 |     /// `true` if variant should be deserialized from a textual content
 71 |     /// and `false` if from tag
 72 |     is_text: bool,
 73 | }
 74 | 
 75 | impl<'de, 'd, R, E> de::VariantAccess<'de> for VariantAccess<'de, 'd, R, E>
 76 | where
 77 |     R: XmlRead<'de>,
 78 |     E: EntityResolver,
 79 | {
 80 |     type Error = DeError;
 81 | 
 82 |     fn unit_variant(self) -> Result<(), Self::Error> {
 83 |         match self.de.next()? {
 84 |             // Consume subtree
 85 |             DeEvent::Start(e) => self.de.read_to_end(e.name()),
 86 |             // Does not needed to deserialize using SimpleTypeDeserializer, because
 87 |             // it returns `()` when `deserialize_unit()` is requested
 88 |             DeEvent::Text(_) => Ok(()),
 89 |             // SAFETY: the other events are filtered in `variant_seed()`
 90 |             _ => unreachable!("Only `Start` or `Text` events are possible here"),
 91 |         }
 92 |     }
 93 | 
 94 |     fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
 95 |     where
 96 |         T: DeserializeSeed<'de>,
 97 |     {
 98 |         if self.is_text {
 99 |             match self.de.next()? {
100 |                 DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
101 |                 // SAFETY: the other events are filtered in `variant_seed()`
102 |                 _ => unreachable!("Only `Text` events are possible here"),
103 |             }
104 |         } else {
105 |             seed.deserialize(self.de)
106 |         }
107 |     }
108 | 
109 |     fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
110 |     where
111 |         V: Visitor<'de>,
112 |     {
113 |         if self.is_text {
114 |             match self.de.next()? {
115 |                 DeEvent::Text(e) => {
116 |                     SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor)
117 |                 }
118 |                 // SAFETY: the other events are filtered in `variant_seed()`
119 |                 _ => unreachable!("Only `Text` events are possible here"),
120 |             }
121 |         } else {
122 |             self.de.deserialize_tuple(len, visitor)
123 |         }
124 |     }
125 | 
126 |     fn struct_variant<V>(
127 |         self,
128 |         fields: &'static [&'static str],
129 |         visitor: V,
130 |     ) -> Result<V::Value, Self::Error>
131 |     where
132 |         V: Visitor<'de>,
133 |     {
134 |         match self.de.next()? {
135 |             DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.de, e, fields)?),
136 |             DeEvent::Text(e) => {
137 |                 SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor)
138 |             }
139 |             // SAFETY: the other events are filtered in `variant_seed()`
140 |             _ => unreachable!("Only `Start` or `Text` events are possible here"),
141 |         }
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/encoding.rs:
--------------------------------------------------------------------------------
  1 | //! A module for wrappers that encode / decode data.
  2 | 
  3 | use std::borrow::Cow;
  4 | use std::str::Utf8Error;
  5 | 
  6 | #[cfg(feature = "encoding")]
  7 | use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8};
  8 | 
  9 | /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8.
 10 | /// See <https://unicode.org/faq/utf_bom.html#bom1>
 11 | pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
 12 | /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with little-endian byte order.
 13 | /// See <https://unicode.org/faq/utf_bom.html#bom1>
 14 | #[cfg(feature = "encoding")]
 15 | pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
 16 | /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with big-endian byte order.
 17 | /// See <https://unicode.org/faq/utf_bom.html#bom1>
 18 | #[cfg(feature = "encoding")]
 19 | pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
 20 | 
 21 | /// An error when decoding or encoding
 22 | ///
 23 | /// If feature [`encoding`] is disabled, the [`EncodingError`] is always [`EncodingError::Utf8`]
 24 | ///
 25 | /// [`encoding`]: ../index.html#encoding
 26 | #[derive(Clone, Debug, PartialEq, Eq)]
 27 | #[non_exhaustive]
 28 | pub enum EncodingError {
 29 |     /// Input was not valid UTF-8
 30 |     Utf8(Utf8Error),
 31 |     /// Input did not adhere to the given encoding
 32 |     #[cfg(feature = "encoding")]
 33 |     Other(&'static Encoding),
 34 | }
 35 | 
 36 | impl From<Utf8Error> for EncodingError {
 37 |     #[inline]
 38 |     fn from(e: Utf8Error) -> Self {
 39 |         Self::Utf8(e)
 40 |     }
 41 | }
 42 | 
 43 | impl std::error::Error for EncodingError {
 44 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
 45 |         match self {
 46 |             Self::Utf8(e) => Some(e),
 47 |             #[cfg(feature = "encoding")]
 48 |             Self::Other(_) => None,
 49 |         }
 50 |     }
 51 | }
 52 | 
 53 | impl std::fmt::Display for EncodingError {
 54 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 55 |         match self {
 56 |             Self::Utf8(e) => write!(f, "cannot decode input using UTF-8: {}", e),
 57 |             #[cfg(feature = "encoding")]
 58 |             Self::Other(encoding) => write!(f, "cannot decode input using {}", encoding.name()),
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | /// Decoder of byte slices into strings.
 64 | ///
 65 | /// If feature [`encoding`] is enabled, this encoding taken from the `"encoding"`
 66 | /// XML declaration or assumes UTF-8, if XML has no <?xml ?> declaration, encoding
 67 | /// key is not defined or contains unknown encoding.
 68 | ///
 69 | /// The library supports any UTF-8 compatible encodings that crate `encoding_rs`
 70 | /// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16].
 71 | ///
 72 | /// If feature [`encoding`] is disabled, the decoder is always UTF-8 decoder:
 73 | /// any XML declarations are ignored.
 74 | ///
 75 | /// [utf16]: https://github.com/tafia/quick-xml/issues/158
 76 | /// [`encoding`]: ../index.html#encoding
 77 | #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 78 | pub struct Decoder {
 79 |     #[cfg(feature = "encoding")]
 80 |     pub(crate) encoding: &'static Encoding,
 81 | }
 82 | 
 83 | impl Decoder {
 84 |     pub(crate) const fn utf8() -> Self {
 85 |         Decoder {
 86 |             #[cfg(feature = "encoding")]
 87 |             encoding: UTF_8,
 88 |         }
 89 |     }
 90 | 
 91 |     #[cfg(all(test, feature = "encoding", feature = "serialize"))]
 92 |     pub(crate) const fn utf16() -> Self {
 93 |         Decoder { encoding: UTF_16LE }
 94 |     }
 95 | }
 96 | 
 97 | impl Decoder {
 98 |     /// Returns the `Reader`s encoding.
 99 |     ///
100 |     /// This encoding will be used by [`decode`].
101 |     ///
102 |     /// [`decode`]: Self::decode
103 |     #[cfg(feature = "encoding")]
104 |     pub const fn encoding(&self) -> &'static Encoding {
105 |         self.encoding
106 |     }
107 | 
108 |     /// ## Without `encoding` feature
109 |     ///
110 |     /// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM
111 |     /// if it is present in the `bytes`.
112 |     ///
113 |     /// ## With `encoding` feature
114 |     ///
115 |     /// Decodes specified bytes using encoding, declared in the XML, if it was
116 |     /// declared there, or UTF-8 otherwise, and ignoring BOM if it is present
117 |     /// in the `bytes`.
118 |     ///
119 |     /// ----
120 |     /// Returns an error in case of malformed sequences in the `bytes`.
121 |     pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>, EncodingError> {
122 |         #[cfg(not(feature = "encoding"))]
123 |         let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));
124 | 
125 |         #[cfg(feature = "encoding")]
126 |         let decoded = decode(bytes, self.encoding);
127 | 
128 |         decoded
129 |     }
130 | 
131 |     /// Like [`decode`][Self::decode] but using a pre-allocated buffer.
132 |     pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<(), EncodingError> {
133 |         #[cfg(not(feature = "encoding"))]
134 |         buf.push_str(std::str::from_utf8(bytes)?);
135 | 
136 |         #[cfg(feature = "encoding")]
137 |         decode_into(bytes, self.encoding, buf)?;
138 | 
139 |         Ok(())
140 |     }
141 | 
142 |     /// Decodes the `Cow` buffer, preserves the lifetime
143 |     pub(crate) fn decode_cow<'b>(
144 |         &self,
145 |         bytes: &Cow<'b, [u8]>,
146 |     ) -> Result<Cow<'b, str>, EncodingError> {
147 |         match bytes {
148 |             Cow::Borrowed(bytes) => self.decode(bytes),
149 |             // Convert to owned, because otherwise Cow will be bound with wrong lifetime
150 |             Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
151 |         }
152 |     }
153 | }
154 | 
155 | /// Decodes the provided bytes using the specified encoding.
156 | ///
157 | /// Returns an error in case of malformed or non-representable sequences in the `bytes`.
158 | #[cfg(feature = "encoding")]
159 | pub fn decode<'b>(
160 |     bytes: &'b [u8],
161 |     encoding: &'static Encoding,
162 | ) -> Result<Cow<'b, str>, EncodingError> {
163 |     encoding
164 |         .decode_without_bom_handling_and_without_replacement(bytes)
165 |         .ok_or(EncodingError::Other(encoding))
166 | }
167 | 
168 | /// Like [`decode`] but using a pre-allocated buffer.
169 | #[cfg(feature = "encoding")]
170 | pub fn decode_into(
171 |     bytes: &[u8],
172 |     encoding: &'static Encoding,
173 |     buf: &mut String,
174 | ) -> Result<(), EncodingError> {
175 |     if encoding == UTF_8 {
176 |         buf.push_str(std::str::from_utf8(bytes)?);
177 |         return Ok(());
178 |     }
179 | 
180 |     let mut decoder = encoding.new_decoder_without_bom_handling();
181 |     buf.reserve(
182 |         decoder
183 |             .max_utf8_buffer_length_without_replacement(bytes.len())
184 |             // SAFETY: None can be returned only if required size will overflow usize,
185 |             // but in that case String::reserve also panics
186 |             .unwrap(),
187 |     );
188 |     let (result, read) = decoder.decode_to_string_without_replacement(bytes, buf, true);
189 |     match result {
190 |         DecoderResult::InputEmpty => {
191 |             debug_assert_eq!(read, bytes.len());
192 |             Ok(())
193 |         }
194 |         DecoderResult::Malformed(_, _) => Err(EncodingError::Other(encoding)),
195 |         // SAFETY: We allocate enough space above
196 |         DecoderResult::OutputFull => unreachable!(),
197 |     }
198 | }
199 | 
200 | /// Automatic encoding detection of XML files based using the
201 | /// [recommended algorithm](https://www.w3.org/TR/xml11/#sec-guessing).
202 | ///
203 | /// If encoding is detected, `Some` is returned with an encoding and size of BOM
204 | /// in bytes, if detection was performed using BOM, or zero, if detection was
205 | /// performed without BOM.
206 | ///
207 | /// IF encoding was not recognized, `None` is returned.
208 | ///
209 | /// Because the [`encoding_rs`] crate supports only subset of those encodings, only
210 | /// the supported subset are detected, which is UTF-8, UTF-16 BE and UTF-16 LE.
211 | ///
212 | /// The algorithm suggests examine up to the first 4 bytes to determine encoding
213 | /// according to the following table:
214 | ///
215 | /// | Bytes       |Detected encoding
216 | /// |-------------|------------------------------------------
217 | /// | **BOM**
218 | /// |`FE_FF_##_##`|UTF-16, big-endian
219 | /// |`FF FE ## ##`|UTF-16, little-endian
220 | /// |`EF BB BF`   |UTF-8
221 | /// | **No BOM**
222 | /// |`00 3C 00 3F`|UTF-16 BE or ISO-10646-UCS-2 BE or similar 16-bit BE (use declared encoding to find the exact one)
223 | /// |`3C 00 3F 00`|UTF-16 LE or ISO-10646-UCS-2 LE or similar 16-bit LE (use declared encoding to find the exact one)
224 | /// |`3C 3F 78 6D`|UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns for the relevant ASCII characters, the encoding declaration itself may be read reliably
225 | #[cfg(feature = "encoding")]
226 | pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> {
227 |     match bytes {
228 |         // with BOM
229 |         _ if bytes.starts_with(UTF16_BE_BOM) => Some((UTF_16BE, 2)),
230 |         _ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)),
231 |         _ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)),
232 | 
233 |         // without BOM
234 |         _ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2
235 |         _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), // Some LE encoding, for example, UTF-16 or ISO-10646-UCS-2
236 |         _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)), // Some ASCII compatible
237 | 
238 |         _ => None,
239 |     }
240 | }
241 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! High performance XML reader/writer.
 2 | //!
 3 | //! # Description
 4 | //!
 5 | //! quick-xml contains two modes of operation:
 6 | //!
 7 | //! A streaming API based on the [StAX] model. This is suited for larger XML documents which
 8 | //! cannot completely read into memory at once.
 9 | //!
10 | //! The user has to explicitly _ask_ for the next XML event, similar to a database cursor.
11 | //! This is achieved by the following two structs:
12 | //!
13 | //! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user.
14 | //! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs.
15 | //!
16 | //! Especially for nested XML elements, the user must keep track _where_ (how deep)
17 | //! in the XML document the current event is located.
18 | //!
19 | //! quick-xml contains optional support of asynchronous reading and writing using [tokio].
20 | //! To get it enable the [`async-tokio`](#async-tokio) feature.
21 | //!
22 | //! Furthermore, quick-xml also contains optional [Serde] support to directly
23 | //! serialize and deserialize from structs, without having to deal with the XML events.
24 | //! To get it enable the [`serialize`](#serialize) feature. Read more about mapping Rust types
25 | //! to XML in the documentation of [`de`] module. Also check [`serde_helpers`]
26 | //! module.
27 | //!
28 | //! # Examples
29 | //!
30 | //! - For a reading example see [`Reader`]
31 | //! - For a writing example see [`Writer`]
32 | //!
33 | //! # Features
34 | //!
35 | //! `quick-xml` supports the following features:
36 | //!
37 | //! [StAX]: https://en.wikipedia.org/wiki/StAX
38 | //! [tokio]: https://tokio.rs/
39 | //! [Serde]: https://serde.rs/
40 | //! [`de`]: ./de/index.html
41 | #![cfg_attr(
42 |     feature = "document-features",
43 |     cfg_attr(doc, doc = ::document_features::document_features!(
44 |         feature_label = "<a id=\"{feature}\" href=\"#{feature}\"><strong><code>{feature}</code></strong></a>"
45 |     ))
46 | )]
47 | #![forbid(unsafe_code)]
48 | #![deny(missing_docs)]
49 | #![recursion_limit = "1024"]
50 | // Enable feature requirements in the docs from 1.57
51 | // See https://stackoverflow.com/questions/61417452
52 | // docs.rs defines `docsrs` when building documentation
53 | #![cfg_attr(docsrs, feature(doc_auto_cfg))]
54 | 
55 | #[cfg(feature = "serialize")]
56 | pub mod de;
57 | pub mod encoding;
58 | pub mod errors;
59 | pub mod escape;
60 | pub mod events;
61 | pub mod name;
62 | pub mod parser;
63 | pub mod reader;
64 | #[cfg(feature = "serialize")]
65 | pub mod se;
66 | #[cfg(feature = "serde-types")]
67 | pub mod serde_helpers;
68 | /// Not an official API, public for integration tests
69 | #[doc(hidden)]
70 | pub mod utils;
71 | pub mod writer;
72 | 
73 | // reexports
74 | pub use crate::encoding::Decoder;
75 | #[cfg(feature = "serialize")]
76 | pub use crate::errors::serialize::{DeError, SeError};
77 | pub use crate::errors::{Error, Result};
78 | pub use crate::reader::{NsReader, Reader};
79 | pub use crate::writer::{ElementWriter, Writer};
80 | 


--------------------------------------------------------------------------------
/src/parser/element.rs:
--------------------------------------------------------------------------------
  1 | //! Contains a parser for an XML element.
  2 | 
  3 | use crate::errors::SyntaxError;
  4 | use crate::parser::Parser;
  5 | 
  6 | /// A parser that search a `>` symbol in the slice outside of quoted regions.
  7 | ///
  8 | /// The parser considers two quoted regions: a double-quoted (`"..."`) and
  9 | /// a single-quoted (`'...'`) region. Matches found inside those regions are not
 10 | /// considered as results. Each region starts and ends by its quote symbol,
 11 | /// which cannot be escaped (but can be encoded as XML character entity or named
 12 | /// entity. Anyway, that encoding does not contain literal quotes).
 13 | ///
 14 | /// To use a parser create an instance of parser and [`feed`] data into it.
 15 | /// After successful search the parser will return [`Some`] with position of
 16 | /// found symbol. If search is unsuccessful, a [`None`] will be returned. You
 17 | /// typically would expect positive result of search, so that you should feed
 18 | /// new data until you get it.
 19 | ///
 20 | /// NOTE: after successful match the parser does not returned to the initial
 21 | /// state and should not be used anymore. Create a new parser if you want to perform
 22 | /// new search.
 23 | ///
 24 | /// # Example
 25 | ///
 26 | /// ```
 27 | /// # use pretty_assertions::assert_eq;
 28 | /// use quick_xml::parser::{ElementParser, Parser};
 29 | ///
 30 | /// let mut parser = ElementParser::default();
 31 | ///
 32 | /// // Parse `<my-element  with = 'some > inside'>and the text follow...`
 33 | /// // splitted into three chunks
 34 | /// assert_eq!(parser.feed(b"<my-element"), None);
 35 | /// // ...get new chunk of data
 36 | /// assert_eq!(parser.feed(b" with = 'some >"), None);
 37 | /// // ...get another chunk of data
 38 | /// assert_eq!(parser.feed(b" inside'>and the text follow..."), Some(8));
 39 | /// //                       ^       ^
 40 | /// //                       0       8
 41 | /// ```
 42 | ///
 43 | /// [`feed`]: Self::feed()
 44 | #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 45 | pub enum ElementParser {
 46 |     /// The initial state (inside element, but outside of attribute value).
 47 |     Outside,
 48 |     /// Inside a single-quoted region (`'...'`).
 49 |     SingleQ,
 50 |     /// Inside a double-quoted region (`"..."`).
 51 |     DoubleQ,
 52 | }
 53 | 
 54 | impl Parser for ElementParser {
 55 |     /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`.
 56 |     #[inline]
 57 |     fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
 58 |         for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) {
 59 |             *self = match (*self, bytes[i]) {
 60 |                 // only allowed to match `>` while we are in state `Outside`
 61 |                 (Self::Outside, b'>') => return Some(i),
 62 |                 (Self::Outside, b'\'') => Self::SingleQ,
 63 |                 (Self::Outside, b'\"') => Self::DoubleQ,
 64 | 
 65 |                 // the only end_byte that gets us out if the same character
 66 |                 (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside,
 67 | 
 68 |                 // all other bytes: no state change
 69 |                 _ => continue,
 70 |             };
 71 |         }
 72 |         None
 73 |     }
 74 | 
 75 |     #[inline]
 76 |     fn eof_error() -> SyntaxError {
 77 |         SyntaxError::UnclosedTag
 78 |     }
 79 | }
 80 | 
 81 | impl Default for ElementParser {
 82 |     #[inline]
 83 |     fn default() -> Self {
 84 |         Self::Outside
 85 |     }
 86 | }
 87 | 
 88 | #[test]
 89 | fn parse() {
 90 |     use pretty_assertions::assert_eq;
 91 |     use ElementParser::*;
 92 | 
 93 |     /// Returns `Ok(pos)` with the position in the buffer where element is ended.
 94 |     ///
 95 |     /// Returns `Err(internal_state)` if parsing does not done yet.
 96 |     fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result<usize, ElementParser> {
 97 |         match parser.feed(bytes) {
 98 |             Some(i) => Ok(i),
 99 |             None => Err(parser),
100 |         }
101 |     }
102 | 
103 |     assert_eq!(parse_element(b"", Outside), Err(Outside));
104 |     assert_eq!(parse_element(b"", SingleQ), Err(SingleQ));
105 |     assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ));
106 | 
107 |     assert_eq!(parse_element(b"'", Outside), Err(SingleQ));
108 |     assert_eq!(parse_element(b"'", SingleQ), Err(Outside));
109 |     assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ));
110 | 
111 |     assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ));
112 |     assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ));
113 |     assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside));
114 | 
115 |     assert_eq!(parse_element(b">", Outside), Ok(0));
116 |     assert_eq!(parse_element(b">", SingleQ), Err(SingleQ));
117 |     assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ));
118 | 
119 |     assert_eq!(parse_element(b"''>", Outside), Ok(2));
120 |     assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ));
121 |     assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ));
122 | }
123 | 


--------------------------------------------------------------------------------
/src/parser/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Contains low-level parsers of different XML pieces.
 2 | 
 3 | use crate::errors::SyntaxError;
 4 | 
 5 | mod element;
 6 | mod pi;
 7 | 
 8 | pub use element::ElementParser;
 9 | pub use pi::PiParser;
10 | 
11 | /// Used to decouple reading of data from data source and parsing XML structure from it.
12 | /// This is a state preserved between getting chunks of bytes from the reader.
13 | ///
14 | /// This trait is implemented for every parser that processes piece of XML grammar.
15 | pub trait Parser {
16 |     /// Process new data and try to determine end of the parsed thing.
17 |     ///
18 |     /// Returns position of the end of thing in `bytes` in case of successful search
19 |     /// and `None` otherwise.
20 |     ///
21 |     /// # Parameters
22 |     /// - `bytes`: a slice to find the end of a thing.
23 |     ///   Should contain text in ASCII-compatible encoding
24 |     fn feed(&mut self, bytes: &[u8]) -> Option<usize>;
25 | 
26 |     /// Returns parse error produced by this parser in case of reaching end of
27 |     /// input without finding the end of a parsed thing.
28 |     fn eof_error() -> SyntaxError;
29 | }
30 | 


--------------------------------------------------------------------------------
/src/parser/pi.rs:
--------------------------------------------------------------------------------
  1 | //! Contains a parser for an XML processing instruction.
  2 | 
  3 | use crate::errors::SyntaxError;
  4 | use crate::parser::Parser;
  5 | 
  6 | /// A parser that search a `?>` sequence in the slice.
  7 | ///
  8 | /// To use a parser create an instance of parser and [`feed`] data into it.
  9 | /// After successful search the parser will return [`Some`] with position where
 10 | /// processing instruction is ended (the position after `?>`). If search was
 11 | /// unsuccessful, a [`None`] will be returned. You typically would expect positive
 12 | /// result of search, so that you should feed new data until you get it.
 13 | ///
 14 | /// NOTE: after successful match the parser does not returned to the initial
 15 | /// state and should not be used anymore. Create a new parser if you want to perform
 16 | /// new search.
 17 | ///
 18 | /// # Example
 19 | ///
 20 | /// ```
 21 | /// # use pretty_assertions::assert_eq;
 22 | /// use quick_xml::parser::{Parser, PiParser};
 23 | ///
 24 | /// let mut parser = PiParser::default();
 25 | ///
 26 | /// // Parse `<?instruction with = 'some > and ?' inside?>and the text follow...`
 27 | /// // splitted into three chunks
 28 | /// assert_eq!(parser.feed(b"<?instruction"), None);
 29 | /// // ...get new chunk of data
 30 | /// assert_eq!(parser.feed(b" with = 'some > and ?"), None);
 31 | /// // ...get another chunk of data
 32 | /// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9));
 33 | /// //                       ^        ^
 34 | /// //                       0        9
 35 | /// ```
 36 | ///
 37 | /// [`feed`]: Self::feed()
 38 | #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
 39 | pub struct PiParser(
 40 |     /// A flag that indicates was the `bytes` in the previous attempt to find the
 41 |     /// end ended with `?`.
 42 |     pub bool,
 43 | );
 44 | 
 45 | impl Parser for PiParser {
 46 |     /// Determines the end position of a processing instruction in the provided slice.
 47 |     /// Processing instruction ends on the first occurrence of `?>` which cannot be
 48 |     /// escaped.
 49 |     ///
 50 |     /// Returns position after the `?>` or `None` if such sequence was not found.
 51 |     ///
 52 |     /// [Section 2.6]: Parameter entity references MUST NOT be recognized within
 53 |     /// processing instructions, so parser do not search for them.
 54 |     ///
 55 |     /// # Parameters
 56 |     /// - `bytes`: a slice to find the end of a processing instruction.
 57 |     ///   Should contain text in ASCII-compatible encoding
 58 |     ///
 59 |     /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi
 60 |     #[inline]
 61 |     fn feed(&mut self, bytes: &[u8]) -> Option<usize> {
 62 |         for i in memchr::memchr_iter(b'>', bytes) {
 63 |             match i {
 64 |                 0 if self.0 => return Some(0),
 65 |                 // If the previous byte is `?`, then we found `?>`
 66 |                 i if i > 0 && bytes[i - 1] == b'?' => return Some(i),
 67 |                 _ => {}
 68 |             }
 69 |         }
 70 |         self.0 = bytes.last().copied() == Some(b'?');
 71 |         None
 72 |     }
 73 | 
 74 |     #[inline]
 75 |     fn eof_error() -> SyntaxError {
 76 |         SyntaxError::UnclosedPIOrXmlDecl
 77 |     }
 78 | }
 79 | 
 80 | #[test]
 81 | fn pi() {
 82 |     use pretty_assertions::assert_eq;
 83 | 
 84 |     /// Returns `Ok(pos)` with the position in the buffer where processing
 85 |     /// instruction is ended.
 86 |     ///
 87 |     /// Returns `Err(internal_state)` if parsing is not done yet.
 88 |     fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result<usize, bool> {
 89 |         let mut parser = PiParser(had_question_mark);
 90 |         match parser.feed(bytes) {
 91 |             Some(i) => Ok(i),
 92 |             None => Err(parser.0),
 93 |         }
 94 |     }
 95 | 
 96 |     // Comments shows which character was seen the last before calling `feed`.
 97 |     // `x` means any character, pipe denotes start of the buffer that passed to `feed`
 98 | 
 99 |     assert_eq!(parse_pi(b"", false), Err(false)); // x|
100 |     assert_eq!(parse_pi(b"", true), Err(false)); // ?|
101 | 
102 |     assert_eq!(parse_pi(b"?", false), Err(true)); // x|?
103 |     assert_eq!(parse_pi(b"?", true), Err(true)); // ?|?
104 | 
105 |     assert_eq!(parse_pi(b">", false), Err(false)); // x|>
106 |     assert_eq!(parse_pi(b">", true), Ok(0)); // ?|>
107 | 
108 |     assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?>
109 |     assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?>
110 | 
111 |     assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?>
112 |     assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?>
113 | }
114 | 


--------------------------------------------------------------------------------
/src/se/text.rs:
--------------------------------------------------------------------------------
  1 | //! Contains serializer for a special `&text` field
  2 | 
  3 | use crate::de::TEXT_KEY;
  4 | use crate::se::simple_type::{SimpleSeq, SimpleTypeSerializer};
  5 | use crate::se::SeError;
  6 | use serde::ser::{Impossible, Serialize, Serializer};
  7 | use serde::serde_if_integer128;
  8 | use std::fmt::Write;
  9 | 
 10 | macro_rules! write_primitive {
 11 |     ($method:ident ( $ty:ty )) => {
 12 |         #[inline]
 13 |         fn $method(self, value: $ty) -> Result<Self::Ok, Self::Error> {
 14 |             self.0.$method(value)
 15 |         }
 16 |     };
 17 | }
 18 | 
 19 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 20 | 
 21 | /// A serializer used to serialize a `$text` field of a struct or map.
 22 | ///
 23 | /// This serializer a very similar to [`SimpleTypeSerializer`], but different
 24 | /// from it in how it processes unit enum variants. Unlike [`SimpleTypeSerializer`]
 25 | /// this serializer does not write anything for the unit variant.
 26 | pub struct TextSerializer<W: Write>(pub SimpleTypeSerializer<W>);
 27 | 
 28 | impl<W: Write> Serializer for TextSerializer<W> {
 29 |     type Ok = W;
 30 |     type Error = SeError;
 31 | 
 32 |     type SerializeSeq = SimpleSeq<W>;
 33 |     type SerializeTuple = SimpleSeq<W>;
 34 |     type SerializeTupleStruct = SimpleSeq<W>;
 35 |     type SerializeTupleVariant = SimpleSeq<W>;
 36 |     type SerializeMap = Impossible<Self::Ok, Self::Error>;
 37 |     type SerializeStruct = Impossible<Self::Ok, Self::Error>;
 38 |     type SerializeStructVariant = Impossible<Self::Ok, Self::Error>;
 39 | 
 40 |     write_primitive!(serialize_bool(bool));
 41 | 
 42 |     write_primitive!(serialize_i8(i8));
 43 |     write_primitive!(serialize_i16(i16));
 44 |     write_primitive!(serialize_i32(i32));
 45 |     write_primitive!(serialize_i64(i64));
 46 | 
 47 |     write_primitive!(serialize_u8(u8));
 48 |     write_primitive!(serialize_u16(u16));
 49 |     write_primitive!(serialize_u32(u32));
 50 |     write_primitive!(serialize_u64(u64));
 51 | 
 52 |     serde_if_integer128! {
 53 |         write_primitive!(serialize_i128(i128));
 54 |         write_primitive!(serialize_u128(u128));
 55 |     }
 56 | 
 57 |     write_primitive!(serialize_f32(f32));
 58 |     write_primitive!(serialize_f64(f64));
 59 | 
 60 |     write_primitive!(serialize_char(char));
 61 |     write_primitive!(serialize_str(&str));
 62 |     write_primitive!(serialize_bytes(&[u8]));
 63 | 
 64 |     #[inline]
 65 |     fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
 66 |         self.0.serialize_none()
 67 |     }
 68 | 
 69 |     fn serialize_some<T: ?Sized + Serialize>(self, value: &T) -> Result<Self::Ok, Self::Error> {
 70 |         value.serialize(self)
 71 |     }
 72 | 
 73 |     #[inline]
 74 |     fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
 75 |         self.0.serialize_unit()
 76 |     }
 77 | 
 78 |     #[inline]
 79 |     fn serialize_unit_struct(self, name: &'static str) -> Result<Self::Ok, Self::Error> {
 80 |         self.0.serialize_unit_struct(name)
 81 |     }
 82 | 
 83 |     #[inline]
 84 |     fn serialize_unit_variant(
 85 |         self,
 86 |         name: &'static str,
 87 |         variant_index: u32,
 88 |         variant: &'static str,
 89 |     ) -> Result<Self::Ok, Self::Error> {
 90 |         if variant == TEXT_KEY {
 91 |             Ok(self.0.writer)
 92 |         } else {
 93 |             self.0.serialize_unit_variant(name, variant_index, variant)
 94 |         }
 95 |     }
 96 | 
 97 |     fn serialize_newtype_struct<T: ?Sized + Serialize>(
 98 |         self,
 99 |         _name: &'static str,
100 |         value: &T,
101 |     ) -> Result<Self::Ok, Self::Error> {
102 |         value.serialize(self)
103 |     }
104 | 
105 |     #[inline]
106 |     fn serialize_newtype_variant<T: ?Sized + Serialize>(
107 |         self,
108 |         name: &'static str,
109 |         _variant_index: u32,
110 |         variant: &'static str,
111 |         _value: &T,
112 |     ) -> Result<Self::Ok, Self::Error> {
113 |         Err(SeError::Unsupported(
114 |             format!(
115 |                 "cannot serialize enum newtype variant `{}::{}` as text content value",
116 |                 name, variant
117 |             )
118 |             .into(),
119 |         ))
120 |     }
121 | 
122 |     #[inline]
123 |     fn serialize_seq(self, len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
124 |         self.0.serialize_seq(len)
125 |     }
126 | 
127 |     #[inline]
128 |     fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple, Self::Error> {
129 |         self.0.serialize_tuple(len)
130 |     }
131 | 
132 |     #[inline]
133 |     fn serialize_tuple_struct(
134 |         self,
135 |         name: &'static str,
136 |         len: usize,
137 |     ) -> Result<Self::SerializeTupleStruct, Self::Error> {
138 |         self.0.serialize_tuple_struct(name, len)
139 |     }
140 | 
141 |     #[inline]
142 |     fn serialize_tuple_variant(
143 |         self,
144 |         name: &'static str,
145 |         _variant_index: u32,
146 |         variant: &'static str,
147 |         _len: usize,
148 |     ) -> Result<Self::SerializeTupleVariant, Self::Error> {
149 |         Err(SeError::Unsupported(
150 |             format!(
151 |                 "cannot serialize enum tuple variant `{}::{}` as text content value",
152 |                 name, variant
153 |             )
154 |             .into(),
155 |         ))
156 |     }
157 | 
158 |     #[inline]
159 |     fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
160 |         Err(SeError::Unsupported(
161 |             "cannot serialize map as text content value".into(),
162 |         ))
163 |     }
164 | 
165 |     #[inline]
166 |     fn serialize_struct(
167 |         self,
168 |         name: &'static str,
169 |         _len: usize,
170 |     ) -> Result<Self::SerializeStruct, Self::Error> {
171 |         Err(SeError::Unsupported(
172 |             format!("cannot serialize struct `{}` as text content value", name).into(),
173 |         ))
174 |     }
175 | 
176 |     #[inline]
177 |     fn serialize_struct_variant(
178 |         self,
179 |         name: &'static str,
180 |         _variant_index: u32,
181 |         variant: &'static str,
182 |         _len: usize,
183 |     ) -> Result<Self::SerializeStructVariant, Self::Error> {
184 |         Err(SeError::Unsupported(
185 |             format!(
186 |                 "cannot serialize enum struct variant `{}::{}` as text content value",
187 |                 name, variant
188 |             )
189 |             .into(),
190 |         ))
191 |     }
192 | }
193 | 


--------------------------------------------------------------------------------
/test-gen/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "test-gen"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | encoding_rs = "0.8"
10 | serde = { version = "1.0", features = ["derive"] }
11 | serde_json = "1.0"
12 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Document descriptions
 2 | 
 3 | document.xml
 4 |     medium length, mostly empty tags, a few short attributes per element, no escaping
 5 | html5.html
 6 | html5.txt
 7 | libreoffice_document.fodt
 8 |     long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces
 9 | linescore.xml
10 |     medium length, lots of attributes, short attributes, few escapes
11 | opennews_all.rss
12 | players.xml
13 |     long, lots of attributes, short attributes, no text, no escapes
14 | rpm_filelists.xml
15 |     long, mostly medium-length text elements, not much escaping
16 | rpm_other.xml
17 |     long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes
18 | rpm_primary.xml
19 |     long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
20 | rpm_primary2.xml
21 |     long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces
22 | sample_1.xml
23 |     short, mix of attributes and text, lots of escapes
24 | sample_ns.xml
25 |     short, lots of namespaces, no escapes
26 | sample_rss.xml
27 |     long, few attributes, mix of attribute lengths, escapes in text content
28 | test_writer_indent_cdata.xml
29 | test_writer_indent.xml
30 |     medium length, lots of namespaces, no escaping
31 | test_writer.xml
32 | utf16be.xml
33 | utf16le.xml
34 | 


--------------------------------------------------------------------------------
/tests/async-tokio.rs:
--------------------------------------------------------------------------------
  1 | use std::io::Cursor;
  2 | use std::iter;
  3 | 
  4 | use pretty_assertions::assert_eq;
  5 | use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*};
  6 | use quick_xml::name::QName;
  7 | use quick_xml::reader::Reader;
  8 | use quick_xml::utils::Bytes;
  9 | use tokio::io::{AsyncBufReadExt, AsyncReadExt, BufReader};
 10 | 
 11 | // Import `small_buffers_tests!`
 12 | #[macro_use]
 13 | mod helpers;
 14 | 
 15 | small_buffers_tests!(
 16 |     #[tokio::test]
 17 |     read_event_into_async: tokio::io::BufReader<_>,
 18 |     async, await
 19 | );
 20 | 
 21 | #[tokio::test]
 22 | async fn test_sample() {
 23 |     let src = include_str!("documents/sample_rss.xml");
 24 |     let mut reader = Reader::from_reader(src.as_bytes());
 25 |     let mut buf = Vec::new();
 26 |     let mut count = 0;
 27 |     // Expected number of iterations, to prevent infinity loops if refactoring breaks test
 28 |     let mut reads = 0;
 29 |     loop {
 30 |         reads += 1;
 31 |         assert!(
 32 |             reads <= 10000,
 33 |             "too many events, possible infinity loop: {reads}"
 34 |         );
 35 |         match reader.read_event_into_async(&mut buf).await {
 36 |             Ok(Start(_)) => count += 1,
 37 |             Ok(Decl(e)) => assert_eq!(e.version().unwrap(), b"1.0".as_ref()),
 38 |             Ok(Eof) => break,
 39 |             Ok(_) => (),
 40 |             Err(e) => panic!("{} at {}", e, reader.error_position()),
 41 |         }
 42 |         buf.clear();
 43 |     }
 44 |     assert_eq!((count, reads), (1247, 5457));
 45 | }
 46 | 
 47 | /// This tests checks that read_to_end() correctly returns span even when
 48 | /// text is trimmed from both sides
 49 | mod read_to_end {
 50 |     use super::*;
 51 |     use pretty_assertions::assert_eq;
 52 | 
 53 |     #[tokio::test]
 54 |     async fn text() {
 55 |         let mut r = Reader::from_str("<tag> text </tag>");
 56 |         //                            ^0   ^5    ^11
 57 |         r.config_mut().trim_text(true);
 58 | 
 59 |         let mut buf = Vec::new();
 60 |         assert_eq!(
 61 |             r.read_event_into_async(&mut buf).await.unwrap(),
 62 |             Start(BytesStart::new("tag"))
 63 |         );
 64 |         assert_eq!(
 65 |             r.read_to_end_into_async(QName(b"tag"), &mut buf)
 66 |                 .await
 67 |                 .unwrap(),
 68 |             5..11
 69 |         );
 70 |         assert_eq!(r.read_event_into_async(&mut buf).await.unwrap(), Eof);
 71 |     }
 72 | 
 73 |     #[tokio::test]
 74 |     async fn tag() {
 75 |         let mut r = Reader::from_str("<tag> <nested/> </tag>");
 76 |         //                            ^0   ^5         ^16
 77 |         r.config_mut().trim_text(true);
 78 | 
 79 |         let mut buf = Vec::new();
 80 |         assert_eq!(
 81 |             r.read_event_into_async(&mut buf).await.unwrap(),
 82 |             Start(BytesStart::new("tag"))
 83 |         );
 84 |         assert_eq!(
 85 |             r.read_to_end_into_async(QName(b"tag"), &mut buf)
 86 |                 .await
 87 |                 .unwrap(),
 88 |             5..16
 89 |         );
 90 |         assert_eq!(r.read_event_into_async(&mut buf).await.unwrap(), Eof);
 91 |     }
 92 | }
 93 | 
 94 | #[tokio::test]
 95 | async fn issue623() {
 96 |     let mut buf = Vec::new();
 97 |     let mut reader = Reader::from_reader(Cursor::new(
 98 |         b"
 99 |         <AppendedData>
100 |             _binary << data&>
101 |         </AppendedData>
102 |     ",
103 |     ));
104 |     reader.config_mut().trim_text(true);
105 | 
106 |     assert_eq!(
107 |         (
108 |             reader.read_event_into_async(&mut buf).await.unwrap(),
109 |             reader.buffer_position()
110 |         ),
111 |         (Start(BytesStart::new("AppendedData")), 23)
112 |     );
113 | 
114 |     let mut inner = reader.stream();
115 |     // Read to start of data marker
116 |     inner.read_until(b'_', &mut buf).await.unwrap();
117 | 
118 |     // Read binary data. We must know its size
119 |     let mut binary = [0u8; 16];
120 |     inner.read_exact(&mut binary).await.unwrap();
121 |     assert_eq!(Bytes(&binary), Bytes(b"binary << data&>"));
122 |     assert_eq!(inner.offset(), 53);
123 |     assert_eq!(reader.buffer_position(), 53);
124 | 
125 |     assert_eq!(
126 |         (
127 |             reader.read_event_into_async(&mut buf).await.unwrap(),
128 |             reader.buffer_position()
129 |         ),
130 |         (End(BytesEnd::new("AppendedData")), 77)
131 |     );
132 | 
133 |     assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Eof);
134 | }
135 | 
136 | /// Regression test for https://github.com/tafia/quick-xml/issues/751
137 | ///
138 | /// Actually, that error was not found in async reader, but we would to test it as well.
139 | #[tokio::test]
140 | async fn issue751() {
141 |     let mut text = Vec::new();
142 |     let mut chunk = Vec::new();
143 |     chunk.extend_from_slice(b"<content>");
144 |     for data in iter::repeat(b"some text inside").take(1000) {
145 |         chunk.extend_from_slice(data);
146 |         text.extend_from_slice(data);
147 |     }
148 |     chunk.extend_from_slice(b"</content>");
149 | 
150 |     let mut reader = Reader::from_reader(quick_xml::utils::Fountain {
151 |         chunk: &chunk,
152 |         consumed: 0,
153 |         overall_read: 0,
154 |     });
155 |     let mut buf = Vec::new();
156 |     let mut starts = 0u64;
157 |     let mut ends = 0u64;
158 |     let mut texts = 0u64;
159 |     loop {
160 |         buf.clear();
161 |         match reader.read_event_into_async(&mut buf).await {
162 |             Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
163 |             Ok(Eof) => break,
164 | 
165 |             Ok(Start(e)) => {
166 |                 starts += 1;
167 |                 assert_eq!(
168 |                     e.name(),
169 |                     QName(b"content"),
170 |                     "starts: {starts}, ends: {ends}, texts: {texts}"
171 |                 );
172 |             }
173 |             Ok(End(e)) => {
174 |                 ends += 1;
175 |                 assert_eq!(
176 |                     e.name(),
177 |                     QName(b"content"),
178 |                     "starts: {starts}, ends: {ends}, texts: {texts}"
179 |                 );
180 |             }
181 |             Ok(Text(e)) => {
182 |                 texts += 1;
183 |                 assert_eq!(
184 |                     e.as_ref(),
185 |                     text,
186 |                     "starts: {starts}, ends: {ends}, texts: {texts}"
187 |                 );
188 |             }
189 |             _ => (),
190 |         }
191 |         // If we successfully read more than `u32::MAX`, the test is passed
192 |         if reader.get_ref().overall_read >= u32::MAX as u64 {
193 |             break;
194 |         }
195 |     }
196 | }
197 | 
198 | /// Regression test for https://github.com/tafia/quick-xml/issues/774
199 | ///
200 | /// Capacity of the buffer selected in that way, that "text" will be read into
201 | /// one internal buffer of `BufReader` in one `fill_buf()` call and `<` of the
202 | /// closing tag in the next call.
203 | #[tokio::test]
204 | async fn issue774() {
205 |     let xml = BufReader::with_capacity(9, b"<tag>text</tag>" as &[u8]);
206 |     //                                      ^0       ^9
207 |     let mut reader = Reader::from_reader(xml);
208 |     let mut buf = Vec::new();
209 | 
210 |     assert_eq!(
211 |         reader.read_event_into_async(&mut buf).await.unwrap(),
212 |         Start(BytesStart::new("tag"))
213 |     );
214 |     assert_eq!(
215 |         reader.read_event_into_async(&mut buf).await.unwrap(),
216 |         Text(BytesText::new("text"))
217 |     );
218 |     assert_eq!(
219 |         reader.read_event_into_async(&mut buf).await.unwrap(),
220 |         End(BytesEnd::new("tag"))
221 |     );
222 | }
223 | 


--------------------------------------------------------------------------------
/tests/documents/encoding/Big5.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/Big5.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/EUC-JP.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/EUC-JP.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/EUC-KR.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/EUC-KR.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/GBK.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/GBK.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/IBM866.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/IBM866.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-10.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-10.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-13.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-13.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-14.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-14.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-15.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-15.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-16.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-16.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-2.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-2.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-3.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-3.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-4.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-4.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-5.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-5.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-6.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-6.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-7.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-7.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-8-I.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-8-I.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/ISO-8859-8.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/ISO-8859-8.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/KOI8-R.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/KOI8-R.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/KOI8-U.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/KOI8-U.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/Shift_JIS.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/Shift_JIS.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/gb18030.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/gb18030.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/macintosh.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/macintosh.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/utf16be-bom.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/utf16be-bom.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/utf16be.xml:
--------------------------------------------------------------------------------
1 |  < ? x m l   v e r s i o n = " 1 . 0 " ? > 
2 |  < p r o j e c t   n a m e = " p r o j e c t - n a m e " > 
3 |  < / p r o j e c t > 
4 | 


--------------------------------------------------------------------------------
/tests/documents/encoding/utf16le-bom.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/utf16le-bom.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/utf16le.xml:
--------------------------------------------------------------------------------
1 | < ? x m l   v e r s i o n = " 1 . 0 " ? > 
2 |  < p r o j e c t   n a m e = " p r o j e c t - n a m e " > 
3 |  < / p r o j e c t > 
4 |  


--------------------------------------------------------------------------------
/tests/documents/encoding/utf8-bom.xml:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0"?>
2 | <project name="project-name">
3 | </project>
4 | 


--------------------------------------------------------------------------------
/tests/documents/encoding/utf8.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0"?>
2 | <project name="project-name">
3 | </project>
4 | 


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1250.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1250.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1251.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1251.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1252.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1252.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1253.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1253.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1254.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1254.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1255.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1255.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1256.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1256.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1257.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1257.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-1258.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-1258.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/windows-874.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/windows-874.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/x-mac-cyrillic.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/x-mac-cyrillic.xml


--------------------------------------------------------------------------------
/tests/documents/encoding/x-user-defined.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/encoding/x-user-defined.xml


--------------------------------------------------------------------------------
/tests/documents/html5.html:
--------------------------------------------------------------------------------
1 | <!doctype html>
2 | <a href=foo.html>Hey</a>
3 | &nbsp;
4 | 


--------------------------------------------------------------------------------
/tests/documents/html5.txt:
--------------------------------------------------------------------------------
 1 | DocType(html)
 2 | Characters(
 3 | )
 4 | StartElement(a, attr-error: position 7: attribute value must be enclosed in `"` or `'`)
 5 | Characters(Hey)
 6 | EndElement(a)
 7 | Characters(
 8 | )
 9 | Reference(nbsp)
10 | Characters(
11 | )
12 | EndDocument
13 | 


--------------------------------------------------------------------------------
/tests/documents/linescore.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?><!--Copyright 2012 MLB Advanced Media, L.P.  Use of any content on this page acknowledges agreement to the terms posted here http://gdx.mlb.com/components/copyright.txt-->
  2 | <game xmlns:xs="http://www.w3.org/2001/XMLSchema" id="2008/07/10/dblrok-ddorok-2"
  3 |       venue="Generic"
  4 |       game_pk="239575"
  5 |       time="Gm 2"
  6 |       time_zone="ET"
  7 |       ampm="AM"
  8 |       away_time="3:33"
  9 |       away_time_zone="ET"
 10 |       away_ampm="AM"
 11 |       home_time="3:33"
 12 |       home_time_zone="ET"
 13 |       home_ampm="AM"
 14 |       game_type="R"
 15 |       tiebreaker_sw="N"
 16 |       original_date="2008/07/10"
 17 |       time_zone_aw_lg="0"
 18 |       time_zone_hm_lg="0"
 19 |       time_aw_lg="3:33"
 20 |       aw_lg_ampm="AM"
 21 |       time_hm_lg="3:33"
 22 |       hm_lg_ampm="AM"
 23 |       venue_id="401"
 24 |       scheduled_innings="7"
 25 |       description="6-13 makeup "
 26 |       away_name_abbrev="DSL BLJ1"
 27 |       home_name_abbrev="DSL DOD"
 28 |       away_code="dbl"
 29 |       away_file_code="t604"
 30 |       away_team_id="604"
 31 |       away_team_city="DSL Blue Jays1"
 32 |       away_team_name="DSL Blue Jays1"
 33 |       away_division="S"
 34 |       away_league_id="130"
 35 |       away_sport_code="rok"
 36 |       home_code="ddo"
 37 |       home_file_code="t611"
 38 |       home_team_id="611"
 39 |       home_team_city="DSL Dodgers"
 40 |       home_team_name="DSL Dodgers"
 41 |       home_division="S"
 42 |       home_league_id="130"
 43 |       home_sport_code="rok"
 44 |       day="THU"
 45 |       gameday_sw="N"
 46 |       double_header_sw="Y"
 47 |       away_games_back="8.5"
 48 |       home_games_back="10.0"
 49 |       venue_w_chan_loc="USNY0996"
 50 |       gameday_link="2008_07_10_dblrok_ddorok_2"
 51 |       away_win="16"
 52 |       away_loss="19"
 53 |       home_win="13"
 54 |       home_loss="22"
 55 |       game_data_directory="/gameday/components/game/rok/gid_2008_07_10_dblrok_ddorok_2"
 56 |       league="DSL"
 57 |       top_inning="N"
 58 |       status="Final"
 59 |       ind="F"
 60 |       inning="7"
 61 |       outs="3"
 62 |       away_team_runs="4"
 63 |       home_team_runs="1"
 64 |       away_team_hits="9"
 65 |       home_team_hits="2"
 66 |       away_team_errors="1"
 67 |       home_team_errors="2"
 68 |       away_team_link="/clubs/index.jsp?cid=t604"
 69 |       home_team_link="/clubs/index.jsp?cid=t611"
 70 |       box_link="/milb/stats/stats.jsp?gid=2008_07_10_dblrok_ddorok_2&amp;t=g_box&amp;did=milb"
 71 |       home_recap_link=""
 72 |       away_recap_link="">
 73 |    <linescore inning="1" home_inning_runs="0" away_inning_runs="1"/>
 74 |    <linescore inning="2" home_inning_runs="0" away_inning_runs="0"/>
 75 |    <linescore inning="3" home_inning_runs="1" away_inning_runs="1"/>
 76 |    <linescore inning="4" home_inning_runs="0" away_inning_runs="2"/>
 77 |    <linescore inning="5" home_inning_runs="0" away_inning_runs="0"/>
 78 |    <linescore inning="6" home_inning_runs="0" away_inning_runs="0"/>
 79 |    <linescore inning="7" home_inning_runs="0" away_inning_runs="0"/>
 80 |    <winning_pitcher first_name="Hector" first="Hector" id="542261" last_name="Velazquez"
 81 |                     last="Velazquez"
 82 |                     name_display_roster=""
 83 |                     wins="4"
 84 |                     losses="0"
 85 |                     era="3.41"
 86 |                     s_wins=""
 87 |                     s_losses=""
 88 |                     s_era=""/>
 89 |    <losing_pitcher first_name="Rubby" first="Rubby" id="523989" last_name="De La Rosa"
 90 |                    last="De La Rosa"
 91 |                    name_display_roster=""
 92 |                    wins="0"
 93 |                    losses="3"
 94 |                    era="1.98"
 95 |                    s_wins=""
 96 |                    s_losses=""
 97 |                    s_era=""/>
 98 |    <save_pitcher first_name="" first="" id="" last_name="" last="" name_display_roster=""
 99 |                  wins="0"
100 |                  losses="0"
101 |                  era="0"
102 |                  s_wins=""
103 |                  s_losses=""
104 |                  s_era=""
105 |                  saves="0"/>
106 | </game>
107 | 
108 | 


--------------------------------------------------------------------------------
/tests/documents/opennews_all.rss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tafia/quick-xml/2b9198715ad20b0ce833adb1b47c6f1a5eb4905c/tests/documents/opennews_all.rss


--------------------------------------------------------------------------------
/tests/documents/rpm_primary2.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <metadata xmlns="http://linux.duke.edu/metadata/common"
  3 |     xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="4">
  4 |     <package type="rpm">
  5 |         <name>complex-package</name>
  6 |         <arch>x86_64</arch>
  7 |         <version epoch="1" ver="2.3.4" rel="5.el8"/>
  8 |         <checksum type="sha256" pkgid="YES">bbb7b0e9350a0f75b923bdd0ef4f9af39765c668a3e70bfd3486ea9f0f618aaf</checksum>
  9 |         <summary>A package for exercising many different features of RPM metadata</summary>
 10 |         <description>Complex package</description>
 11 |         <packager>Michael Bluth</packager>
 12 |         <url>http://bobloblaw.com</url>
 13 |         <time file="1627052744" build="1627052743"/>
 14 |         <size package="8680" installed="117" archive="932"/>
 15 |         <location href="complex-package-2.3.4-5.el8.x86_64.rpm"/>
 16 |         <format>
 17 |             <rpm:license>MPLv2</rpm:license>
 18 |             <rpm:vendor>Bluth Company</rpm:vendor>
 19 |             <rpm:group>Development/Tools</rpm:group>
 20 |             <rpm:buildhost>localhost</rpm:buildhost>
 21 |             <rpm:sourcerpm>complex-package-2.3.4-5.el8.src.rpm</rpm:sourcerpm>
 22 |             <rpm:header-range start="4504" end="8413"/>
 23 |             <rpm:provides>
 24 |                 <rpm:entry name="/usr/bin/ls"/>
 25 |                 <rpm:entry name="complex-package" flags="EQ" epoch="1" ver="2.3.4" rel="5.el8"/>
 26 |                 <rpm:entry name="complex-package(x86-64)" flags="EQ" epoch="1" ver="2.3.4" rel="5.el8"/>
 27 |                 <rpm:entry name="laughter" flags="EQ" epoch="0" ver="33"/>
 28 |                 <rpm:entry name="narration(ronhoward)"/>
 29 |             </rpm:provides>
 30 |             <rpm:requires>
 31 |                 <rpm:entry name="/usr/bin/bash"/>
 32 |                 <rpm:entry name="/usr/sbin/useradd" pre="1"/>
 33 |                 <rpm:entry name="arson" flags="GE" epoch="0" ver="1.0.0" rel="1"/>
 34 |                 <rpm:entry name="fur" flags="LE" epoch="0" ver="2"/>
 35 |                 <rpm:entry name="staircar" flags="LE" epoch="0" ver="99.1" rel="3"/>
 36 |             </rpm:requires>
 37 |             <rpm:conflicts>
 38 |                 <rpm:entry name="foxnetwork" flags="GT" epoch="0" ver="5555"/>
 39 |             </rpm:conflicts>
 40 |             <rpm:obsoletes>
 41 |                 <rpm:entry name="bluemangroup" flags="LT" epoch="0" ver="32.1" rel="0"/>
 42 |                 <rpm:entry name="cornballer" flags="LT" epoch="0" ver="444"/>
 43 |             </rpm:obsoletes>
 44 |             <rpm:suggests>
 45 |                 <rpm:entry name="(bobloblaw &gt;= 1.1 if maritimelaw else anyone &lt; 0.5.1-2)"/>
 46 |                 <rpm:entry name="(dove and return)"/>
 47 |                 <rpm:entry name="(job or money &gt; 9000)"/>
 48 |             </rpm:suggests>
 49 |             <rpm:enhances>
 50 |                 <rpm:entry name="(bananas or magic)"/>
 51 |             </rpm:enhances>
 52 |             <rpm:recommends>
 53 |                 <rpm:entry name="((hiding and attic) if light-treason)"/>
 54 |                 <rpm:entry name="GeneParmesan(PI)"/>
 55 |                 <rpm:entry name="yacht" flags="GT" epoch="9" ver="11.0" rel="0"/>
 56 |             </rpm:recommends>
 57 |             <rpm:supplements>
 58 |                 <rpm:entry name="((hiding and illusion) unless alliance-of-magicians)"/>
 59 |                 <rpm:entry name="comedy" flags="EQ" epoch="0" ver="11.1" rel="4"/>
 60 |             </rpm:supplements>
 61 |             <file>/etc/complex/pkg.cfg</file>
 62 |             <file>/usr/bin/complex_a</file>
 63 |         </format>
 64 |     </package>
 65 |     <package type="rpm">
 66 |         <name>rpm-empty</name>
 67 |         <arch>x86_64</arch>
 68 |         <version epoch="0" ver="0" rel="0"/>
 69 |         <checksum type="sha256" pkgid="YES">90fbba546300f507473547f33e229ee7bad94bbbe6e84b21d485e8e43b5f1132</checksum>
 70 |         <summary>""</summary>
 71 |         <description></description>
 72 |         <packager></packager>
 73 |         <url></url>
 74 |         <time file="1625930845" build="1615686424"/>
 75 |         <size package="6005" installed="0" archive="124"/>
 76 |         <location href="rpm-empty-0-0.x86_64.rpm"/>
 77 |         <format>
 78 |             <rpm:license>LGPL</rpm:license>
 79 |             <rpm:vendor></rpm:vendor>
 80 |             <rpm:group>Unspecified</rpm:group>
 81 |             <rpm:buildhost>localhost</rpm:buildhost>
 82 |             <rpm:sourcerpm>rpm-empty-0-0.src.rpm</rpm:sourcerpm>
 83 |             <rpm:header-range start="4504" end="5961"/>
 84 |             <rpm:provides>
 85 |                 <rpm:entry name="rpm-empty" flags="EQ" epoch="0" ver="0" rel="0"/>
 86 |                 <rpm:entry name="rpm-empty(x86-64)" flags="EQ" epoch="0" ver="0" rel="0"/>
 87 |             </rpm:provides>
 88 |         </format>
 89 |     </package>
 90 |     <package type="rpm">
 91 |         <name>rpm-with-invalid-chars</name>
 92 |         <arch>noarch</arch>
 93 |         <version epoch="0" ver="1" rel="1.fc33"/>
 94 |         <checksum type="sha256" pkgid="YES">64f1444f8e86a9ae6accdc2c4b12cb4a87fb2414c0998df461a8623a52eb3cc4</checksum>
 95 |         <summary>An RPM file with invalid characters in its description.</summary>
 96 |         <description>This RPM that contains XML-illegal characters such as ampersand &amp; and less-than &lt; greater-than &gt; in its &lt;/description&gt;.
 97 | These must be escaped in the final XML metadata. The XML spec does not strictly require escaping 'single' or "double" quotes
 98 | within text content, and not all XML libraries do so. However, it is generally recommended.</description>
 99 |         <packager></packager>
100 |         <url>https://github.com/dralley/rpmrepo_rs/</url>
101 |         <time file="1625930845" build="1617418325"/>
102 |         <size package="6489" installed="0" archive="124"/>
103 |         <location href="rpm-with-invalid-chars-1-1.fc33.noarch.rpm"/>
104 |         <format>
105 |             <rpm:license>Public Domain</rpm:license>
106 |             <rpm:vendor></rpm:vendor>
107 |             <rpm:group>Unspecified</rpm:group>
108 |             <rpm:buildhost>localhost</rpm:buildhost>
109 |             <rpm:sourcerpm>rpm-with-invalid-chars-1-1.fc33.src.rpm</rpm:sourcerpm>
110 |             <rpm:header-range start="4504" end="6445"/>
111 |             <rpm:provides>
112 |                 <rpm:entry name="rpm-with-invalid-chars" flags="EQ" epoch="0" ver="1" rel="1.fc33"/>
113 |             </rpm:provides>
114 |         </format>
115 |     </package>
116 |     <package type="rpm">
117 |         <name>rpm-with-non-ascii</name>
118 |         <arch>noarch</arch>
119 |         <version epoch="0" ver="1" rel="1.fc33"/>
120 |         <checksum type="sha256" pkgid="YES">957de8a966af8fe8e55102489099d8b20bbecc23954c8c2bd88fb59625260393</checksum>
121 |         <summary>An RPM file with non-ascii characters in its metadata.</summary>
122 |         <description>This file contains unicode characters and should be encoded as UTF-8. The
123 | following code points are all outside the "Basic Latin (ASCII)" code point
124 | block:
125 | 
126 | * U+0080: 
127 | * U+0100: Ā
128 | * U+0180: ƀ
129 | * U+0250: ɐ
130 | * U+02B0: ʰ
131 | * U+0041 0x0300: À
132 | * U+0370: Ͱ
133 | 
134 | See: http://www.unicode.org/charts/</description>
135 |         <packager></packager>
136 |         <url>https://github.com/dralley/rpmrepo_rs/</url>
137 |         <time file="1625930845" build="1615686425"/>
138 |         <size package="6433" installed="0" archive="124"/>
139 |         <location href="rpm-with-non-ascii-1-1.fc33.noarch.rpm"/>
140 |         <format>
141 |             <rpm:license>Public Domain</rpm:license>
142 |             <rpm:vendor></rpm:vendor>
143 |             <rpm:group>Unspecified</rpm:group>
144 |             <rpm:buildhost>localhost</rpm:buildhost>
145 |             <rpm:sourcerpm>rpm-with-non-ascii-1-1.fc33.src.rpm</rpm:sourcerpm>
146 |             <rpm:header-range start="4504" end="6389"/>
147 |             <rpm:provides>
148 |                 <rpm:entry name="rpm-with-non-ascii" flags="EQ" epoch="0" ver="1" rel="1.fc33"/>
149 |             </rpm:provides>
150 |         </format>
151 |     </package>
152 | </metadata>
153 | 


--------------------------------------------------------------------------------
/tests/documents/sample_1.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8" standalone="yes"?>
 2 | <project name="project-name">
 3 |     <libraries>
 4 |         <library groupId="org.example" artifactId="&lt;name&gt;" version="0.1"/>
 5 |         <library groupId="com.example" artifactId="&quot;cool-lib&amp;" version="999"/>
 6 |     </libraries>
 7 |     <module name="module-1">
 8 |         <files>
 9 |             <file name="somefile.java" type="java">
10 |                 Some &lt;java&gt; class
11 |             </file>
12 |             <file name="another_file.java" type="java">
13 |                 Another &quot;java&quot; class
14 |             </file>
15 |             <file name="config.xml" type="xml">
16 |                 Weird &apos;XML&apos; config
17 |             </file>
18 |         </files>
19 |         <libraries>
20 |             <library groupId="junit" artifactId="junit" version="1.9.5"/>
21 |         </libraries>
22 |     </module>
23 |     <module name="module-2">
24 |         <files>
25 |             <file name="program.js" type="javascript">
26 |                 JavaScript &amp; program
27 |             </file>
28 |             <file name="style.css" type="css">
29 |                 Cascading style sheet: &#xA9; - &#1161;
30 |             </file>
31 |         </files>
32 |     </module>
33 | </project>
34 | 


--------------------------------------------------------------------------------
/tests/documents/sample_ns.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <naked xmlns:d="urn:example:double">
 3 |   <p:data  xmlns:h="urn:example:header" xmlns:p="urn:example:namespace">
 4 |     <datum id="34" xmlns="urn:example:datum-ns" other-name="15">
 5 |       <h:dat-arg p:name="ns" name="">H</h:dat-arg>
 6 |       <dat-arg p:name="ns" name="dat1">N</dat-arg>
 7 |       <dat-arg dash-dot.prefix:name="ns" name="dat2" xmlns:dash-dot.prefix="urn:example:prefix"/>
 8 |       <dat-arg xmlns="urn:example:shadowed" name="shadow" xmlns:p="urn:example:shadow-namespace" >
 9 |         <p:name p:doc="shadow-ns">Name</p:name>
10 |         <d:name>Another name</d:name>
11 |         <arg xmlns="">
12 |           <naked dat:dat-val="dat" xmlns:dat="urn:example:datum-ns"/>
13 |         </arg>
14 |       </dat-arg>
15 |     </datum>
16 |   </p:data>
17 | </naked>
18 | 


--------------------------------------------------------------------------------
/tests/documents/test_writer.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?><manifest xmlns:android="http://schemas.android.com/apk/res/android" package="com.github.sample" android:versionName="Lollipop" android:versionCode="5.1"><application android:label="SampleApplication"></application></manifest>
2 | 


--------------------------------------------------------------------------------
/tests/documents/test_writer_indent.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:svg="http://www.w3.org/2000/svg" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.0">
 3 |     <office:scripts />
 4 |     <office:font-face-decls>
 5 |         <style:font-face style:name="Tahoma1" svg:font-family="Tahoma" />
 6 |         <style:font-face style:name="Andale Sans UI" svg:font-family="'Andale Sans UI'" style:font-pitch="variable" />
 7 |         <style:font-face style:name="Tahoma" svg:font-family="Tahoma" style:font-pitch="variable" />
 8 |         <style:font-face style:name="Thorndale" svg:font-family="Thorndale" style:font-family-generic="roman" style:font-pitch="variable" />
 9 |     </office:font-face-decls>
10 |     <office:automatic-styles>
11 |         <style:style style:name="P1" style:family="paragraph" style:parent-style-name="Standard">
12 |             <style:paragraph-properties fo:text-align="center" style:justify-single-word="false" />
13 |             <style:text-properties fo:font-size="8pt" />
14 |         </style:style>
15 |         <style:style style:name="P2" style:family="paragraph" style:parent-style-name="Standard">
16 |             <style:paragraph-properties fo:text-align="center" style:justify-single-word="false" />
17 |             <style:text-properties fo:font-size="10pt" />
18 |         </style:style>
19 |         <style:style style:name="P3" style:family="paragraph" style:parent-style-name="Standard">
20 |             <style:paragraph-properties fo:text-align="center" style:justify-single-word="false" />
21 |             <style:text-properties fo:font-size="12pt" />
22 |         </style:style>
23 |         <style:style style:name="P4" style:family="paragraph" style:parent-style-name="Standard">
24 |             <style:paragraph-properties fo:text-align="center" style:justify-single-word="false" />
25 |             <style:text-properties fo:font-size="14pt" />
26 |         </style:style>
27 |         <style:style style:name="P5" style:family="paragraph" style:parent-style-name="Standard">
28 |             <style:paragraph-properties fo:text-align="center" style:justify-single-word="false" />
29 |         </style:style>
30 |     </office:automatic-styles>
31 |     <office:body>
32 |         <office:text>
33 |             <text:sequence-decls>
34 |                 <!-- This is a comment with indentation! -->
35 |                 <text:sequence-decl text:display-outline-level="0" text:name="Illustration" />
36 |                 <text:sequence-decl text:display-outline-level="0" text:name="Table" />
37 |                 <text:sequence-decl text:display-outline-level="0" text:name="Text" />
38 |                 <text:sequence-decl text:display-outline-level="0" text:name="Drawing" />
39 |             </text:sequence-decls>
40 |             <text:p text:style-name="P1">This is a simple test document to demonstrate the DocumentLoader example!</text:p>
41 |             <text:p text:style-name="P2">This is a simple test document to demonstrate the DocumentLoader example!</text:p>
42 |             <text:p text:style-name="P3">This is a simple test document to demonstrate the DocumentLoader example!</text:p>
43 |             <text:p text:style-name="P4">This is a simple test document to demonstrate the DocumentLoader example!</text:p>
44 |             <text:p text:style-name="P5">This is a simple test document to demonstrate the DocumentLoader example!</text:p>
45 |         </office:text>
46 |     </office:body>
47 | </office:document-content>


--------------------------------------------------------------------------------
/tests/documents/test_writer_indent_cdata.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0"?>
2 | <jobs>
3 |     <job>
4 |         <jobid><![CDATA[00d46e4494e1]]></jobid>
5 |     </job>
6 | </jobs>


--------------------------------------------------------------------------------
/tests/escape.rs:
--------------------------------------------------------------------------------
  1 | use pretty_assertions::assert_eq;
  2 | use quick_xml::escape::{self, EscapeError, ParseCharRefError};
  3 | use std::borrow::Cow;
  4 | use std::num::IntErrorKind;
  5 | 
  6 | #[test]
  7 | fn escape() {
  8 |     let unchanged = escape::escape("test");
  9 |     // assert_eq does not check that Cow is borrowed, but we explicitly use Cow
 10 |     // because it influences diff
 11 |     // TODO: use assert_matches! when stabilized and other features will bump MSRV
 12 |     assert_eq!(unchanged, Cow::Borrowed("test"));
 13 |     assert!(matches!(unchanged, Cow::Borrowed(_)));
 14 | 
 15 |     assert_eq!(escape::escape("<&\"'>"), "&lt;&amp;&quot;&apos;&gt;");
 16 |     assert_eq!(escape::escape("<test>"), "&lt;test&gt;");
 17 |     assert_eq!(escape::escape("\"a\"bc"), "&quot;a&quot;bc");
 18 |     assert_eq!(escape::escape("\"a\"b&c"), "&quot;a&quot;b&amp;c");
 19 |     assert_eq!(
 20 |         escape::escape("prefix_\"a\"b&<>c"),
 21 |         "prefix_&quot;a&quot;b&amp;&lt;&gt;c"
 22 |     );
 23 | }
 24 | 
 25 | #[test]
 26 | fn partial_escape() {
 27 |     let unchanged = escape::partial_escape("test");
 28 |     // assert_eq does not check that Cow is borrowed, but we explicitly use Cow
 29 |     // because it influences diff
 30 |     // TODO: use assert_matches! when stabilized and other features will bump MSRV
 31 |     assert_eq!(unchanged, Cow::Borrowed("test"));
 32 |     assert!(matches!(unchanged, Cow::Borrowed(_)));
 33 | 
 34 |     assert_eq!(escape::partial_escape("<&\"'>"), "&lt;&amp;\"'&gt;");
 35 |     assert_eq!(escape::partial_escape("<test>"), "&lt;test&gt;");
 36 |     assert_eq!(escape::partial_escape("\"a\"bc"), "\"a\"bc");
 37 |     assert_eq!(escape::partial_escape("\"a\"b&c"), "\"a\"b&amp;c");
 38 |     assert_eq!(
 39 |         escape::partial_escape("prefix_\"a\"b&<>c"),
 40 |         "prefix_\"a\"b&amp;&lt;&gt;c"
 41 |     );
 42 | }
 43 | 
 44 | #[test]
 45 | fn minimal_escape() {
 46 |     assert_eq!(escape::minimal_escape("test"), Cow::Borrowed("test"));
 47 |     assert_eq!(escape::minimal_escape("<&\"'>"), "&lt;&amp;\"'>");
 48 |     assert_eq!(escape::minimal_escape("<test>"), "&lt;test>");
 49 |     assert_eq!(escape::minimal_escape("\"a\"bc"), "\"a\"bc");
 50 |     assert_eq!(escape::minimal_escape("\"a\"b&c"), "\"a\"b&amp;c");
 51 |     assert_eq!(
 52 |         escape::minimal_escape("prefix_\"a\"b&<>c"),
 53 |         "prefix_\"a\"b&amp;&lt;>c"
 54 |     );
 55 | }
 56 | 
 57 | #[test]
 58 | fn unescape() {
 59 |     let unchanged = escape::unescape("test");
 60 |     // assert_eq does not check that Cow is borrowed, but we explicitly use Cow
 61 |     // because it influences diff
 62 |     // TODO: use assert_matches! when stabilized and other features will bump MSRV
 63 |     assert_eq!(unchanged, Ok(Cow::Borrowed("test")));
 64 |     assert!(matches!(unchanged, Ok(Cow::Borrowed(_))));
 65 | 
 66 |     assert_eq!(
 67 |         escape::unescape("&lt;&amp;test&apos;&quot;&gt;"),
 68 |         Ok("<&test'\">".into())
 69 |     );
 70 |     assert_eq!(escape::unescape("&#x30;"), Ok("0".into()));
 71 |     assert_eq!(escape::unescape("&#48;"), Ok("0".into()));
 72 |     assert_eq!(
 73 |         escape::unescape("&foo;"),
 74 |         Err(EscapeError::UnrecognizedEntity(1..4, "foo".into()))
 75 |     );
 76 | }
 77 | 
 78 | /// XML allows any number of leading zeroes. That is not explicitly mentioned
 79 | /// in the specification, but enforced by the conformance test suite
 80 | /// (https://www.w3.org/XML/Test/)
 81 | /// 100 digits should be enough to ensure that any artificial restrictions
 82 | /// (such as maximal string of u128 representation) does not applied
 83 | #[test]
 84 | fn unescape_long() {
 85 |     assert_eq!(
 86 |         escape::unescape("&#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000048;"),
 87 |         Ok("0".into()),
 88 |     );
 89 |     assert_eq!(
 90 |         escape::unescape("&#x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000030;"),
 91 |         Ok("0".into()),
 92 |     );
 93 | 
 94 |     // Too big numbers for u32 should produce errors
 95 |     match escape::unescape(&format!("&#{};", u32::MAX as u64 + 1)) {
 96 |         Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
 97 |             assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
 98 |         }
 99 |         x => panic!(
100 |             "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
101 |             x
102 |         ),
103 |     }
104 |     match escape::unescape(&format!("&#x{:x};", u32::MAX as u64 + 1)) {
105 |         Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
106 |             assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
107 |         }
108 |         x => panic!(
109 |             "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
110 |             x
111 |         ),
112 |     }
113 | }
114 | 
115 | #[test]
116 | fn unescape_sign() {
117 |     assert_eq!(
118 |         escape::unescape("&#+48;"),
119 |         Err(EscapeError::InvalidCharRef(
120 |             ParseCharRefError::UnexpectedSign
121 |         )),
122 |     );
123 |     assert_eq!(
124 |         escape::unescape("&#x+30;"),
125 |         Err(EscapeError::InvalidCharRef(
126 |             ParseCharRefError::UnexpectedSign
127 |         )),
128 |     );
129 | 
130 |     assert_eq!(
131 |         escape::unescape("&#-48;"),
132 |         Err(EscapeError::InvalidCharRef(
133 |             ParseCharRefError::UnexpectedSign
134 |         )),
135 |     );
136 |     assert_eq!(
137 |         escape::unescape("&#x-30;"),
138 |         Err(EscapeError::InvalidCharRef(
139 |             ParseCharRefError::UnexpectedSign
140 |         )),
141 |     );
142 | }
143 | 
144 | #[test]
145 | fn unescape_with() {
146 |     let custom_entities = |ent: &str| match ent {
147 |         "foo" => Some("BAR"),
148 |         _ => None,
149 |     };
150 | 
151 |     let unchanged = escape::unescape_with("test", custom_entities);
152 |     // assert_eq does not check that Cow is borrowed, but we explicitly use Cow
153 |     // because it influences diff
154 |     // TODO: use assert_matches! when stabilized and other features will bump MSRV
155 |     assert_eq!(unchanged, Ok(Cow::Borrowed("test")));
156 |     assert!(matches!(unchanged, Ok(Cow::Borrowed(_))));
157 | 
158 |     assert_eq!(
159 |         escape::unescape_with("&lt;", custom_entities),
160 |         Err(EscapeError::UnrecognizedEntity(1..3, "lt".into())),
161 |     );
162 |     assert_eq!(
163 |         escape::unescape_with("&#x30;", custom_entities),
164 |         Ok("0".into())
165 |     );
166 |     assert_eq!(
167 |         escape::unescape_with("&#48;", custom_entities),
168 |         Ok("0".into())
169 |     );
170 |     assert_eq!(
171 |         escape::unescape_with("&foo;", custom_entities),
172 |         Ok("BAR".into())
173 |     );
174 |     assert_eq!(
175 |         escape::unescape_with("&fop;", custom_entities),
176 |         Err(EscapeError::UnrecognizedEntity(1..4, "fop".into()))
177 |     );
178 | }
179 | 
180 | /// XML allows any number of leading zeroes. That is not explicitly mentioned
181 | /// in the specification, but enforced by the conformance test suite
182 | /// (https://www.w3.org/XML/Test/)
183 | /// 100 digits should be enough to ensure that any artificial restrictions
184 | /// (such as maximal string of u128 representation) does not applied
185 | #[test]
186 | fn unescape_with_long() {
187 |     assert_eq!(
188 |         escape::unescape_with("&#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000048;", |_| None),
189 |         Ok("0".into()),
190 |     );
191 |     assert_eq!(
192 |         escape::unescape_with("&#x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000030;", |_| None),
193 |         Ok("0".into()),
194 |     );
195 | 
196 |     // Too big numbers for u32 should produce errors
197 |     match escape::unescape_with(&format!("&#{};", u32::MAX as u64 + 1), |_| None) {
198 |         Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
199 |             assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
200 |         }
201 |         x => panic!(
202 |             "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
203 |             x
204 |         ),
205 |     }
206 |     match escape::unescape_with(&format!("&#x{:x};", u32::MAX as u64 + 1), |_| None) {
207 |         Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
208 |             assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
209 |         }
210 |         x => panic!(
211 |             "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
212 |             x
213 |         ),
214 |     }
215 | }
216 | 
217 | #[test]
218 | fn unescape_with_sign() {
219 |     assert_eq!(
220 |         escape::unescape_with("&#+48;", |_| None),
221 |         Err(EscapeError::InvalidCharRef(
222 |             ParseCharRefError::UnexpectedSign
223 |         )),
224 |     );
225 |     assert_eq!(
226 |         escape::unescape_with("&#x+30;", |_| None),
227 |         Err(EscapeError::InvalidCharRef(
228 |             ParseCharRefError::UnexpectedSign
229 |         )),
230 |     );
231 | 
232 |     assert_eq!(
233 |         escape::unescape_with("&#-48;", |_| None),
234 |         Err(EscapeError::InvalidCharRef(
235 |             ParseCharRefError::UnexpectedSign
236 |         )),
237 |     );
238 |     assert_eq!(
239 |         escape::unescape_with("&#x-30;", |_| None),
240 |         Err(EscapeError::InvalidCharRef(
241 |             ParseCharRefError::UnexpectedSign
242 |         )),
243 |     );
244 | }
245 | 


--------------------------------------------------------------------------------
/tests/fuzzing.rs:
--------------------------------------------------------------------------------
 1 | //! Cases that was found by fuzzing
 2 | 
 3 | use quick_xml::errors::{Error, IllFormedError};
 4 | use quick_xml::events::Event;
 5 | use quick_xml::reader::Reader;
 6 | 
 7 | #[test]
 8 | fn fuzz_53() {
 9 |     let data: &[u8] = b"\xe9\x00\x00\x00\x00\x00\x00\x00\x00\
10 | \x00\x00\x00\x00\n(\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\
11 | \x00<>\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<<\x00\x00\x00";
12 |     let mut reader = Reader::from_reader(data);
13 |     let mut buf = vec![];
14 |     loop {
15 |         match reader.read_event_into(&mut buf) {
16 |             Ok(Event::Eof) | Err(..) => break,
17 |             _ => buf.clear(),
18 |         }
19 |     }
20 | }
21 | 
22 | #[test]
23 | fn fuzz_101() {
24 |     let data: &[u8] = b"\x00\x00<\x00\x00\x0a>&#44444444401?#\x0a413518\
25 |                        #\x0a\x0a\x0a;<:<)(<:\x0a\x0a\x0a\x0a;<:\x0a\x0a\
26 |                        <:\x0a\x0a\x0a\x0a\x0a<\x00*\x00\x00\x00\x00";
27 |     let mut reader = Reader::from_reader(data);
28 |     let mut buf = vec![];
29 |     loop {
30 |         match reader.read_event_into(&mut buf) {
31 |             Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
32 |                 for a in e.attributes() {
33 |                     if a.ok().map_or(true, |a| {
34 |                         a.decode_and_unescape_value(reader.decoder()).is_err()
35 |                     }) {
36 |                         break;
37 |                     }
38 |                 }
39 |             }
40 |             Ok(Event::Text(e)) => {
41 |                 if e.decode().is_err() {
42 |                     break;
43 |                 }
44 |             }
45 |             Ok(Event::Eof) | Err(..) => break,
46 |             _ => (),
47 |         }
48 |         buf.clear();
49 |     }
50 | }
51 | 
52 | #[test]
53 | fn fuzz_empty_doctype() {
54 |     let data: &[u8] = b"<!DOCTYPE  \n    >";
55 |     let mut reader = Reader::from_reader(data);
56 |     let mut buf = Vec::new();
57 |     assert!(matches!(
58 |         reader.read_event_into(&mut buf).unwrap_err(),
59 |         Error::IllFormed(IllFormedError::MissingDoctypeName)
60 |     ));
61 |     assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
62 | }
63 | 


--------------------------------------------------------------------------------
/tests/helpers/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Utility functions for integration tests
  2 | 
  3 | /// Tests for https://github.com/tafia/quick-xml/issues/469
  4 | /// Exported to reuse in `async-tokio` tests.
  5 | #[macro_export]
  6 | macro_rules! small_buffers_tests {
  7 |     (
  8 |         #[$test:meta]
  9 |         $read_event:ident: $BufReader:ty
 10 |         $(, $async:ident, $await:ident)?
 11 |     ) => {
 12 |         mod small_buffers {
 13 |             use quick_xml::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
 14 |             use quick_xml::reader::Reader;
 15 |             use pretty_assertions::assert_eq;
 16 | 
 17 |             #[$test]
 18 |             $($async)? fn decl() {
 19 |                 let xml = "<?xml ?>";
 20 |                 //         ^^^^^^^ data that fit into buffer
 21 |                 let size = xml.match_indices("?>").next().unwrap().0 + 1;
 22 |                 let br = <$BufReader>::with_capacity(size, xml.as_bytes());
 23 |                 let mut reader = Reader::from_reader(br);
 24 |                 let mut buf = Vec::new();
 25 | 
 26 |                 assert_eq!(
 27 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 28 |                     Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
 29 |                 );
 30 |                 assert_eq!(
 31 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 32 |                     Event::Eof
 33 |                 );
 34 |             }
 35 | 
 36 |             #[$test]
 37 |             $($async)? fn pi() {
 38 |                 let xml = "<?pi?>";
 39 |                 //         ^^^^^ data that fit into buffer
 40 |                 let size = xml.match_indices("?>").next().unwrap().0 + 1;
 41 |                 let br = <$BufReader>::with_capacity(size, xml.as_bytes());
 42 |                 let mut reader = Reader::from_reader(br);
 43 |                 let mut buf = Vec::new();
 44 | 
 45 |                 assert_eq!(
 46 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 47 |                     Event::PI(BytesPI::new("pi"))
 48 |                 );
 49 |                 assert_eq!(
 50 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 51 |                     Event::Eof
 52 |                 );
 53 |             }
 54 | 
 55 |             #[$test]
 56 |             $($async)? fn empty() {
 57 |                 let xml = "<empty/>";
 58 |                 //         ^^^^^^^ data that fit into buffer
 59 |                 let size = xml.match_indices("/>").next().unwrap().0 + 1;
 60 |                 let br = <$BufReader>::with_capacity(size, xml.as_bytes());
 61 |                 let mut reader = Reader::from_reader(br);
 62 |                 let mut buf = Vec::new();
 63 | 
 64 |                 assert_eq!(
 65 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 66 |                     Event::Empty(BytesStart::new("empty"))
 67 |                 );
 68 |                 assert_eq!(
 69 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 70 |                     Event::Eof
 71 |                 );
 72 |             }
 73 | 
 74 |             #[$test]
 75 |             $($async)? fn cdata1() {
 76 |                 let xml = "<![CDATA[cdata]]>";
 77 |                 //         ^^^^^^^^^^^^^^^ data that fit into buffer
 78 |                 let size = xml.match_indices("]]>").next().unwrap().0 + 1;
 79 |                 let br = <$BufReader>::with_capacity(size, xml.as_bytes());
 80 |                 let mut reader = Reader::from_reader(br);
 81 |                 let mut buf = Vec::new();
 82 | 
 83 |                 assert_eq!(
 84 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 85 |                     Event::CData(BytesCData::new("cdata"))
 86 |                 );
 87 |                 assert_eq!(
 88 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
 89 |                     Event::Eof
 90 |                 );
 91 |             }
 92 | 
 93 |             #[$test]
 94 |             $($async)? fn cdata2() {
 95 |                 let xml = "<![CDATA[cdata]]>";
 96 |                 //         ^^^^^^^^^^^^^^^^ data that fit into buffer
 97 |                 let size = xml.match_indices("]]>").next().unwrap().0 + 2;
 98 |                 let br = <$BufReader>::with_capacity(size, xml.as_bytes());
 99 |                 let mut reader = Reader::from_reader(br);
100 |                 let mut buf = Vec::new();
101 | 
102 |                 assert_eq!(
103 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
104 |                     Event::CData(BytesCData::new("cdata"))
105 |                 );
106 |                 assert_eq!(
107 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
108 |                     Event::Eof
109 |                 );
110 |             }
111 | 
112 |             #[$test]
113 |             $($async)? fn comment1() {
114 |                 let xml = "<!--comment-->";
115 |                 //         ^^^^^^^^^^^^ data that fit into buffer
116 |                 let size = xml.match_indices("-->").next().unwrap().0 + 1;
117 |                 let br = <$BufReader>::with_capacity(size, xml.as_bytes());
118 |                 let mut reader = Reader::from_reader(br);
119 |                 let mut buf = Vec::new();
120 | 
121 |                 assert_eq!(
122 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
123 |                     Event::Comment(BytesText::new("comment"))
124 |                 );
125 |                 assert_eq!(
126 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
127 |                     Event::Eof
128 |                 );
129 |             }
130 | 
131 |             #[$test]
132 |             $($async)? fn comment2() {
133 |                 let xml = "<!--comment-->";
134 |                 //         ^^^^^^^^^^^^^ data that fit into buffer
135 |                 let size = xml.match_indices("-->").next().unwrap().0 + 2;
136 |                 let br = <$BufReader>::with_capacity(size, xml.as_bytes());
137 |                 let mut reader = Reader::from_reader(br);
138 |                 let mut buf = Vec::new();
139 | 
140 |                 assert_eq!(
141 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
142 |                     Event::Comment(BytesText::new("comment"))
143 |                 );
144 |                 assert_eq!(
145 |                     reader.$read_event(&mut buf) $(.$await)? .unwrap(),
146 |                     Event::Eof
147 |                 );
148 |             }
149 |         }
150 |     };
151 | }
152 | 


--------------------------------------------------------------------------------
/tests/html.rs:
--------------------------------------------------------------------------------
  1 | use pretty_assertions::assert_eq;
  2 | use quick_xml::encoding::Decoder;
  3 | use quick_xml::escape::unescape;
  4 | use quick_xml::events::{BytesStart, Event};
  5 | use quick_xml::name::{QName, ResolveResult};
  6 | use quick_xml::reader::NsReader;
  7 | use std::str::from_utf8;
  8 | 
  9 | #[test]
 10 | fn html5() {
 11 |     test(
 12 |         include_str!("documents/html5.html"),
 13 |         include_str!("documents/html5.txt"),
 14 |         false,
 15 |     );
 16 | }
 17 | 
 18 | #[test]
 19 | fn escaped_characters_html() {
 20 |     test(
 21 |         r#"<e attr="&planck;&Egrave;&ell;&#x1D55D;&bigodot;">&boxDR;&boxDL;&#x02554;&#x02557;&#9556;&#9559;</e>"#,
 22 |         r#"
 23 |             |StartElement(e [attr="ℏÈℓ𝕝⨀"])
 24 |             |Reference(boxDR)
 25 |             |Reference(boxDL)
 26 |             |Reference(#x02554)
 27 |             |Reference(#x02557)
 28 |             |Reference(#9556)
 29 |             |Reference(#9559)
 30 |             |EndElement(e)
 31 |             |EndDocument
 32 |         "#,
 33 |         true,
 34 |     )
 35 | }
 36 | 
 37 | #[track_caller]
 38 | fn test(input: &str, output: &str, trim: bool) {
 39 |     test_bytes(input.as_bytes(), output.as_bytes(), trim);
 40 | }
 41 | 
 42 | #[track_caller]
 43 | fn test_bytes(input: &[u8], output: &[u8], trim: bool) {
 44 |     let mut reader = NsReader::from_reader(input);
 45 |     let config = reader.config_mut();
 46 |     config.trim_text(trim);
 47 |     config.check_comments = true;
 48 | 
 49 |     let mut spec_lines = SpecIter(output).enumerate();
 50 | 
 51 |     let mut decoder = reader.decoder();
 52 |     loop {
 53 |         let line = match reader.read_resolved_event() {
 54 |             Ok((_, Event::Decl(e))) => {
 55 |                 // Declaration could change decoder
 56 |                 decoder = reader.decoder();
 57 | 
 58 |                 let version_cow = e.version().unwrap();
 59 |                 let version = decoder.decode(version_cow.as_ref()).unwrap();
 60 |                 let encoding_cow = e.encoding().unwrap().unwrap();
 61 |                 let encoding = decoder.decode(encoding_cow.as_ref()).unwrap();
 62 |                 format!("StartDocument({}, {})", version, encoding)
 63 |             }
 64 |             Ok((_, Event::PI(e))) => {
 65 |                 format!("ProcessingInstruction(PI={})", decoder.decode(&e).unwrap())
 66 |             }
 67 |             Ok((_, Event::DocType(e))) => format!("DocType({})", decoder.decode(&e).unwrap()),
 68 |             Ok((n, Event::Start(e))) => {
 69 |                 let name = namespace_name(n, e.name(), decoder);
 70 |                 match make_attrs(&e, decoder) {
 71 |                     Ok(attrs) if attrs.is_empty() => format!("StartElement({})", &name),
 72 |                     Ok(attrs) => format!("StartElement({} [{}])", &name, &attrs),
 73 |                     Err(e) => format!("StartElement({}, attr-error: {})", &name, &e),
 74 |                 }
 75 |             }
 76 |             Ok((n, Event::Empty(e))) => {
 77 |                 let name = namespace_name(n, e.name(), decoder);
 78 |                 match make_attrs(&e, decoder) {
 79 |                     Ok(attrs) if attrs.is_empty() => format!("EmptyElement({})", &name),
 80 |                     Ok(attrs) => format!("EmptyElement({} [{}])", &name, &attrs),
 81 |                     Err(e) => format!("EmptyElement({}, attr-error: {})", &name, &e),
 82 |                 }
 83 |             }
 84 |             Ok((n, Event::End(e))) => {
 85 |                 let name = namespace_name(n, e.name(), decoder);
 86 |                 format!("EndElement({})", name)
 87 |             }
 88 |             Ok((_, Event::Comment(e))) => format!("Comment({})", decoder.decode(&e).unwrap()),
 89 |             Ok((_, Event::CData(e))) => format!("CData({})", decoder.decode(&e).unwrap()),
 90 |             Ok((_, Event::Text(e))) => match unescape(&decoder.decode(&e).unwrap()) {
 91 |                 Ok(c) => format!("Characters({})", &c),
 92 |                 Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err),
 93 |             },
 94 |             Ok((_, Event::GeneralRef(e))) => match unescape(&decoder.decode(&e).unwrap()) {
 95 |                 Ok(c) => format!("Reference({})", &c),
 96 |                 Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err),
 97 |             },
 98 |             Ok((_, Event::Eof)) => "EndDocument".to_string(),
 99 |             Err(e) => format!("Error: {}", e),
100 |         };
101 |         if let Some((n, spec)) = spec_lines.next() {
102 |             if spec.trim() == "EndDocument" {
103 |                 break;
104 |             }
105 |             assert_eq!(
106 |                 line.trim(),
107 |                 spec.trim(),
108 |                 "Unexpected event at line {}",
109 |                 n + 1
110 |             );
111 |         } else {
112 |             if line == "EndDocument" {
113 |                 break;
114 |             }
115 |             panic!("Unexpected event: {}", line);
116 |         }
117 |     }
118 | }
119 | 
120 | fn namespace_name(n: ResolveResult, name: QName, decoder: Decoder) -> String {
121 |     let name = decoder.decode(name.as_ref()).unwrap();
122 |     match n {
123 |         // Produces string '{namespace}prefixed_name'
124 |         ResolveResult::Bound(n) => format!("{{{}}}{}", decoder.decode(n.as_ref()).unwrap(), name),
125 |         _ => name.to_string(),
126 |     }
127 | }
128 | 
129 | fn make_attrs(e: &BytesStart, decoder: Decoder) -> ::std::result::Result<String, String> {
130 |     let mut atts = Vec::new();
131 |     for a in e.attributes() {
132 |         match a {
133 |             Ok(a) => {
134 |                 if a.key.as_namespace_binding().is_none() {
135 |                     let key = decoder.decode(a.key.as_ref()).unwrap();
136 |                     let value = decoder.decode(a.value.as_ref()).unwrap();
137 |                     let unescaped_value = unescape(&value).unwrap();
138 |                     atts.push(format!(
139 |                         "{}=\"{}\"",
140 |                         key,
141 |                         // unescape does not change validity of an UTF-8 string
142 |                         &unescaped_value
143 |                     ));
144 |                 }
145 |             }
146 |             Err(e) => return Err(e.to_string()),
147 |         }
148 |     }
149 |     Ok(atts.join(", "))
150 | }
151 | 
152 | struct SpecIter<'a>(&'a [u8]);
153 | 
154 | impl<'a> Iterator for SpecIter<'a> {
155 |     type Item = &'a str;
156 |     fn next(&mut self) -> Option<&'a str> {
157 |         let start = self
158 |             .0
159 |             .iter()
160 |             .position(|b| !matches!(*b, b' ' | b'\r' | b'\n' | b'\t' | b'|' | b':' | b'0'..=b'9'))
161 |             .unwrap_or(0);
162 | 
163 |         if let Some(p) = self.0.windows(3).position(|w| w == b")\r\n") {
164 |             let (prev, next) = self.0.split_at(p + 1);
165 |             self.0 = &next[1..];
166 |             Some(from_utf8(&prev[start..]).expect("Error decoding to uft8"))
167 |         } else if let Some(p) = self.0.windows(2).position(|w| w == b")\n") {
168 |             let (prev, next) = self.0.split_at(p + 1);
169 |             self.0 = next;
170 |             Some(from_utf8(&prev[start..]).expect("Error decoding to uft8"))
171 |         } else if self.0.is_empty() {
172 |             None
173 |         } else {
174 |             let p = self.0;
175 |             self.0 = &[];
176 |             Some(from_utf8(&p[start..]).unwrap())
177 |         }
178 |     }
179 | }
180 | 


--------------------------------------------------------------------------------
/tests/reader-attributes.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Cow;
  2 | 
  3 | use quick_xml::events::attributes::Attribute;
  4 | use quick_xml::events::{BytesEnd, Event::*};
  5 | use quick_xml::name::QName;
  6 | use quick_xml::reader::Reader;
  7 | 
  8 | use pretty_assertions::assert_eq;
  9 | 
 10 | #[test]
 11 | fn single_gt() {
 12 |     let mut reader = Reader::from_str("<a attr='>' check='2'></a>");
 13 |     match reader.read_event() {
 14 |         Ok(Start(e)) => {
 15 |             let mut attrs = e.attributes();
 16 |             assert_eq!(
 17 |                 attrs.next(),
 18 |                 Some(Ok(Attribute {
 19 |                     key: QName(b"attr"),
 20 |                     value: Cow::Borrowed(b">"),
 21 |                 }))
 22 |             );
 23 |             assert_eq!(
 24 |                 attrs.next(),
 25 |                 Some(Ok(Attribute {
 26 |                     key: QName(b"check"),
 27 |                     value: Cow::Borrowed(b"2"),
 28 |                 }))
 29 |             );
 30 |             assert_eq!(attrs.next(), None);
 31 |         }
 32 |         x => panic!("expected <a attr='>'>, got {:?}", x),
 33 |     }
 34 |     assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a")));
 35 | }
 36 | 
 37 | #[test]
 38 | fn single_gt_quot() {
 39 |     let mut reader = Reader::from_str(r#"<a attr='">"' check='"2"'></a>"#);
 40 |     match reader.read_event() {
 41 |         Ok(Start(e)) => {
 42 |             let mut attrs = e.attributes();
 43 |             assert_eq!(
 44 |                 attrs.next(),
 45 |                 Some(Ok(Attribute {
 46 |                     key: QName(b"attr"),
 47 |                     value: Cow::Borrowed(br#"">""#),
 48 |                 }))
 49 |             );
 50 |             assert_eq!(
 51 |                 attrs.next(),
 52 |                 Some(Ok(Attribute {
 53 |                     key: QName(b"check"),
 54 |                     value: Cow::Borrowed(br#""2""#),
 55 |                 }))
 56 |             );
 57 |             assert_eq!(attrs.next(), None);
 58 |         }
 59 |         x => panic!("expected <a attr='>'>, got {:?}", x),
 60 |     }
 61 |     assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a")));
 62 | }
 63 | 
 64 | #[test]
 65 | fn double_gt() {
 66 |     let mut reader = Reader::from_str(r#"<a attr=">" check="2"></a>"#);
 67 |     match reader.read_event() {
 68 |         Ok(Start(e)) => {
 69 |             let mut attrs = e.attributes();
 70 |             assert_eq!(
 71 |                 attrs.next(),
 72 |                 Some(Ok(Attribute {
 73 |                     key: QName(b"attr"),
 74 |                     value: Cow::Borrowed(b">"),
 75 |                 }))
 76 |             );
 77 |             assert_eq!(
 78 |                 attrs.next(),
 79 |                 Some(Ok(Attribute {
 80 |                     key: QName(b"check"),
 81 |                     value: Cow::Borrowed(b"2"),
 82 |                 }))
 83 |             );
 84 |             assert_eq!(attrs.next(), None);
 85 |         }
 86 |         x => panic!("expected <a attr='>'>, got {:?}", x),
 87 |     }
 88 |     assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a")));
 89 | }
 90 | 
 91 | #[test]
 92 | fn double_gt_apos() {
 93 |     let mut reader = Reader::from_str(r#"<a attr="'>'" check="'2'"></a>"#);
 94 |     match reader.read_event() {
 95 |         Ok(Start(e)) => {
 96 |             let mut attrs = e.attributes();
 97 |             assert_eq!(
 98 |                 attrs.next(),
 99 |                 Some(Ok(Attribute {
100 |                     key: QName(b"attr"),
101 |                     value: Cow::Borrowed(b"'>'"),
102 |                 }))
103 |             );
104 |             assert_eq!(
105 |                 attrs.next(),
106 |                 Some(Ok(Attribute {
107 |                     key: QName(b"check"),
108 |                     value: Cow::Borrowed(b"'2'"),
109 |                 }))
110 |             );
111 |             assert_eq!(attrs.next(), None);
112 |         }
113 |         x => panic!("expected <a attr='>'>, got {:?}", x),
114 |     }
115 |     assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("a")));
116 | }
117 | 
118 | #[test]
119 | fn empty_tag() {
120 |     let mut reader = Reader::from_str("<a att1='a' att2='b'/>");
121 |     match reader.read_event() {
122 |         Ok(Empty(e)) => {
123 |             let mut attrs = e.attributes();
124 |             assert_eq!(
125 |                 attrs.next(),
126 |                 Some(Ok(Attribute {
127 |                     key: QName(b"att1"),
128 |                     value: Cow::Borrowed(b"a"),
129 |                 }))
130 |             );
131 |             assert_eq!(
132 |                 attrs.next(),
133 |                 Some(Ok(Attribute {
134 |                     key: QName(b"att2"),
135 |                     value: Cow::Borrowed(b"b"),
136 |                 }))
137 |             );
138 |             assert_eq!(attrs.next(), None);
139 |         }
140 |         e => panic!("Expecting Empty event, got {:?}", e),
141 |     }
142 | }
143 | 
144 | #[test]
145 | fn equal_sign_in_value() {
146 |     let mut reader = Reader::from_str("<a att1=\"a=b\"/>");
147 |     match reader.read_event() {
148 |         Ok(Empty(e)) => {
149 |             let mut attrs = e.attributes();
150 |             assert_eq!(
151 |                 attrs.next(),
152 |                 Some(Ok(Attribute {
153 |                     key: QName(b"att1"),
154 |                     value: Cow::Borrowed(b"a=b"),
155 |                 }))
156 |             );
157 |             assert_eq!(attrs.next(), None);
158 |         }
159 |         e => panic!("Expecting Empty event, got {:?}", e),
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/tests/roundtrip.rs:
--------------------------------------------------------------------------------
  1 | //! Contains tests that checks that writing events from a reader produces the same documents.
  2 | 
  3 | use quick_xml::events::attributes::AttrError;
  4 | use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*};
  5 | use quick_xml::reader::Reader;
  6 | use quick_xml::writer::Writer;
  7 | 
  8 | use pretty_assertions::assert_eq;
  9 | 
 10 | mod events {
 11 |     use super::*;
 12 |     use pretty_assertions::assert_eq;
 13 | 
 14 |     /// Test start and end together because reading only end event requires special
 15 |     /// setting on the reader
 16 |     #[test]
 17 |     fn start_end() {
 18 |         let input = r#"<source attr="val" attr2 = ' "-->&entity;<-- '></source>"#;
 19 |         let mut reader = Reader::from_str(input);
 20 |         let mut writer = Writer::new(Vec::new());
 21 |         loop {
 22 |             match reader.read_event().unwrap() {
 23 |                 Eof => break,
 24 |                 e => assert!(writer.write_event(e).is_ok()),
 25 |             }
 26 |         }
 27 | 
 28 |         let result = writer.into_inner();
 29 |         assert_eq!(String::from_utf8(result).unwrap(), input);
 30 |     }
 31 | 
 32 |     #[test]
 33 |     fn empty() {
 34 |         let input = r#"<source attr="val" attr2 = ' "-->&entity;<-- '/>"#;
 35 |         let mut reader = Reader::from_str(input);
 36 |         let mut writer = Writer::new(Vec::new());
 37 |         loop {
 38 |             match reader.read_event().unwrap() {
 39 |                 Eof => break,
 40 |                 e => assert!(writer.write_event(e).is_ok()),
 41 |             }
 42 |         }
 43 | 
 44 |         let result = writer.into_inner();
 45 |         assert_eq!(String::from_utf8(result).unwrap(), input);
 46 |     }
 47 | 
 48 |     #[test]
 49 |     fn text() {
 50 |         let input = "it is just arbitrary text &amp; some character reference";
 51 |         let mut reader = Reader::from_str(input);
 52 |         let mut writer = Writer::new(Vec::new());
 53 |         loop {
 54 |             match reader.read_event().unwrap() {
 55 |                 Eof => break,
 56 |                 e => assert!(writer.write_event(e).is_ok()),
 57 |             }
 58 |         }
 59 | 
 60 |         let result = writer.into_inner();
 61 |         assert_eq!(String::from_utf8(result).unwrap(), input);
 62 |     }
 63 | 
 64 |     #[test]
 65 |     fn cdata() {
 66 |         let input = "<![CDATA[text & no references]]>";
 67 |         let mut reader = Reader::from_str(input);
 68 |         let mut writer = Writer::new(Vec::new());
 69 |         loop {
 70 |             match reader.read_event().unwrap() {
 71 |                 Eof => break,
 72 |                 e => assert!(writer.write_event(e).is_ok()),
 73 |             }
 74 |         }
 75 | 
 76 |         let result = writer.into_inner();
 77 |         assert_eq!(String::from_utf8(result).unwrap(), input);
 78 |     }
 79 | 
 80 |     #[test]
 81 |     fn pi() {
 82 |         let input = "<?!-- some strange processing instruction ?>";
 83 |         let mut reader = Reader::from_str(input);
 84 |         let mut writer = Writer::new(Vec::new());
 85 |         loop {
 86 |             match reader.read_event().unwrap() {
 87 |                 Eof => break,
 88 |                 e => assert!(writer.write_event(e).is_ok()),
 89 |             }
 90 |         }
 91 | 
 92 |         let result = writer.into_inner();
 93 |         assert_eq!(String::from_utf8(result).unwrap(), input);
 94 |     }
 95 | 
 96 |     #[test]
 97 |     fn decl() {
 98 |         let input = "<?xml some strange XML declaration ?>";
 99 |         let mut reader = Reader::from_str(input);
100 |         let mut writer = Writer::new(Vec::new());
101 |         loop {
102 |             match reader.read_event().unwrap() {
103 |                 Eof => break,
104 |                 e => assert!(writer.write_event(e).is_ok()),
105 |             }
106 |         }
107 | 
108 |         let result = writer.into_inner();
109 |         assert_eq!(String::from_utf8(result).unwrap(), input);
110 |     }
111 | 
112 |     #[test]
113 |     fn comment() {
114 |         let input = "<!-- some comment with -- inside---->";
115 |         let mut reader = Reader::from_str(input);
116 |         let mut writer = Writer::new(Vec::new());
117 |         loop {
118 |             match reader.read_event().unwrap() {
119 |                 Eof => break,
120 |                 e => assert!(writer.write_event(e).is_ok()),
121 |             }
122 |         }
123 | 
124 |         let result = writer.into_inner();
125 |         assert_eq!(String::from_utf8(result).unwrap(), input);
126 |     }
127 | }
128 | 
129 | /// Indent of the last tag mismatched intentionally
130 | const XML: &str = r#"
131 |         <?xml version="1.0" encoding="UTF-8"?>
132 |         <section ns:label="header">
133 |             <section ns:label="empty element section" />
134 |             <section ns:label="start/end section"></section>
135 |             <section ns:label="with text">data &lt;escaped&gt;</section>
136 |             </section>
137 |     "#;
138 | 
139 | /// Directly write event from reader without any processing.
140 | #[test]
141 | fn simple() {
142 |     let mut reader = Reader::from_str(XML);
143 |     let mut writer = Writer::new(Vec::new());
144 |     loop {
145 |         match reader.read_event().unwrap() {
146 |             Eof => break,
147 |             e => assert!(writer.write_event(e).is_ok()),
148 |         }
149 |     }
150 | 
151 |     let result = writer.into_inner();
152 |     assert_eq!(String::from_utf8(result).unwrap(), XML);
153 | }
154 | 
155 | /// Directly write event from reader without processing (except auto-trimming text).
156 | #[test]
157 | fn with_trim() {
158 |     let input = include_str!("documents/test_writer.xml").trim();
159 |     let mut reader = Reader::from_str(input);
160 |     reader.config_mut().trim_text(true);
161 |     let mut writer = Writer::new(Vec::new());
162 |     loop {
163 |         match reader.read_event().unwrap() {
164 |             Eof => break,
165 |             e => assert!(writer.write_event(e).is_ok()),
166 |         }
167 |     }
168 | 
169 |     let result = writer.into_inner();
170 |     assert_eq!(String::from_utf8(result).unwrap(), input);
171 | }
172 | 
173 | /// Directly write reference to event from reader without processing (except auto-trimming text).
174 | #[test]
175 | fn with_trim_ref() {
176 |     let input = include_str!("documents/test_writer.xml").trim();
177 |     let mut reader = Reader::from_str(input);
178 |     reader.config_mut().trim_text(true);
179 |     let mut writer = Writer::new(Vec::new());
180 |     loop {
181 |         match reader.read_event().unwrap() {
182 |             Eof => break,
183 |             e => assert!(writer.write_event(e.borrow()).is_ok()), // either `e` or `&e`
184 |         }
185 |     }
186 | 
187 |     let result = writer.into_inner();
188 |     assert_eq!(String::from_utf8(result).unwrap(), input);
189 | }
190 | 
191 | /// Directly write event from reader without processing (except auto-trimming text)
192 | /// with the same indentation settings as in the original document.
193 | #[test]
194 | fn with_indent() {
195 |     let input = include_str!("documents/test_writer_indent.xml");
196 |     let mut reader = Reader::from_str(input);
197 |     reader.config_mut().trim_text(true);
198 |     let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4);
199 |     loop {
200 |         match reader.read_event().unwrap() {
201 |             Eof => break,
202 |             e => assert!(writer.write_event(e).is_ok()),
203 |         }
204 |     }
205 | 
206 |     let result = writer.into_inner();
207 |     assert_eq!(String::from_utf8(result).unwrap(), input);
208 | }
209 | 
210 | /// Directly write event from reader without processing (except auto-trimming text)
211 | /// with the same indentation settings as in the original document.
212 | /// Document contains CDATA section.
213 | #[test]
214 | fn with_indent_cdata() {
215 |     let input = include_str!("documents/test_writer_indent_cdata.xml");
216 |     let mut reader = Reader::from_str(input);
217 |     reader.config_mut().trim_text(true);
218 |     let mut writer = Writer::new_with_indent(Vec::new(), b' ', 4);
219 |     loop {
220 |         match reader.read_event().unwrap() {
221 |             Eof => break,
222 |             e => assert!(writer.write_event(e).is_ok()),
223 |         }
224 |     }
225 | 
226 |     let result = writer.into_inner();
227 |     assert_eq!(String::from_utf8(result).unwrap(), input);
228 | }
229 | 
230 | /// Directly write event from reader with unescaping and re-escaping content of the `Text` events.
231 | #[test]
232 | fn reescape_text() {
233 |     let mut reader = Reader::from_str(XML);
234 |     let mut writer = Writer::new(Vec::new());
235 |     loop {
236 |         match reader.read_event().unwrap() {
237 |             Eof => break,
238 |             Text(e) => {
239 |                 let t = e.decode().unwrap();
240 |                 assert!(writer.write_event(Text(BytesText::new(&t))).is_ok());
241 |             }
242 |             e => assert!(writer.write_event(e).is_ok()),
243 |         }
244 |     }
245 | 
246 |     let result = writer.into_inner();
247 |     assert_eq!(String::from_utf8(result).unwrap(), XML);
248 | }
249 | 
250 | /// Rewrite some events during processing
251 | #[test]
252 | fn partial_rewrite() {
253 |     type AttrResult<T> = std::result::Result<T, AttrError>;
254 | 
255 |     let str_from = r#"<source attr="val"></source>"#;
256 |     let expected = r#"<copy attr="val" a="b" c="d" x="y&quot;z"></copy>"#;
257 |     let mut reader = Reader::from_str(str_from);
258 |     let mut writer = Writer::new(Vec::new());
259 |     loop {
260 |         let event = match reader.read_event().unwrap() {
261 |             Eof => break,
262 |             Start(elem) => {
263 |                 let mut attrs = elem.attributes().collect::<AttrResult<Vec<_>>>().unwrap();
264 |                 attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]);
265 |                 let mut elem = BytesStart::new("copy");
266 |                 elem.extend_attributes(attrs);
267 |                 elem.push_attribute(("x", "y\"z"));
268 |                 Start(elem)
269 |             }
270 |             End(_) => End(BytesEnd::new("copy")),
271 |             e => e,
272 |         };
273 |         assert!(writer.write_event(event).is_ok());
274 |     }
275 | 
276 |     let result = writer.into_inner();
277 |     assert_eq!(String::from_utf8(result).unwrap(), expected);
278 | }
279 | 


--------------------------------------------------------------------------------
/tests/serde-migrated.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Debug;
  2 | 
  3 | use quick_xml::de::from_str;
  4 | use serde::{de, ser};
  5 | use serde::{Deserialize, Serialize};
  6 | 
  7 | use pretty_assertions::assert_eq;
  8 | 
  9 | #[derive(PartialEq, Debug, Serialize, Deserialize)]
 10 | struct Simple {
 11 |     a: (),
 12 |     b: usize,
 13 |     c: String,
 14 |     d: Option<String>,
 15 | }
 16 | 
 17 | #[track_caller]
 18 | fn test_parse_ok<'a, T: std::fmt::Debug>(errors: &[(&'a str, T)])
 19 | where
 20 |     T: PartialEq + Debug + ser::Serialize + for<'de> de::Deserialize<'de>,
 21 | {
 22 |     for (i, &(s, ref value)) in errors.iter().enumerate() {
 23 |         match from_str::<T>(s) {
 24 |             Ok(v) => assert_eq!(
 25 |                 v, *value,
 26 |                 "{} error, expected: {:?}, found: {:?}",
 27 |                 i, value, v
 28 |             ),
 29 |             Err(e) => panic!("{} error, expected {:?}, found error {}", i, value, e),
 30 |         }
 31 | 
 32 |         // // Make sure we can deserialize into an `Element`.
 33 |         // let xml_value: Element = from_str(s).unwrap();
 34 | 
 35 |         // // Make sure we can deserialize from an `Element`.
 36 |         // let v: T = from_value(xml_value.clone()).unwrap();
 37 |         // assert_eq!(v, *value);
 38 |     }
 39 | }
 40 | 
 41 | #[track_caller]
 42 | fn test_parse_err<'a, T>(errors: &[&'a str])
 43 | where
 44 |     T: PartialEq + Debug + ser::Serialize + for<'de> de::Deserialize<'de>,
 45 | {
 46 |     for &s in errors {
 47 |         assert!(from_str::<T>(s).is_err());
 48 |     }
 49 | }
 50 | 
 51 | #[test]
 52 | fn test_namespaces() {
 53 |     #[derive(PartialEq, Serialize, Deserialize, Debug)]
 54 |     struct Envelope {
 55 |         subject: String,
 56 |     }
 57 |     let s = r#"
 58 |     <?xml version="1.0" encoding="UTF-8"?>
 59 |     <gesmes:Envelope xmlns:gesmes="http://www.gesmes.org/xml/2002-08-01" xmlns="http://www.ecb.int/vocabulary/2002-08-01/eurofxref">
 60 |         <gesmes:subject>Reference rates</gesmes:subject>
 61 |     </gesmes:Envelope>"#;
 62 |     test_parse_ok(&[(
 63 |         s,
 64 |         Envelope {
 65 |             subject: "Reference rates".to_string(),
 66 |         },
 67 |     )]);
 68 | }
 69 | 
 70 | #[test]
 71 | #[ignore] // FIXME
 72 | fn test_forwarded_namespace() {
 73 |     #[derive(PartialEq, Serialize, Deserialize, Debug)]
 74 |     struct Graphml {
 75 |         #[serde(rename = "xsi:schemaLocation")]
 76 |         schema_location: String,
 77 |     }
 78 |     let s = r#"
 79 |     <?xml version="1.0" encoding="UTF-8"?>
 80 |     <graphml xmlns="http://graphml.graphdrawing.org/xmlns"
 81 |         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 82 |         xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
 83 |         http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
 84 |     </graphml>"#;
 85 |     test_parse_ok(&[(
 86 |         s,
 87 |         Graphml {
 88 |             schema_location: "http://graphml.graphdrawing.org/xmlns
 89 |         http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd"
 90 |                 .to_string(),
 91 |         },
 92 |     )]);
 93 | }
 94 | 
 95 | #[test]
 96 | fn test_parse_string() {
 97 |     test_parse_ok(&[
 98 |         (
 99 |             "<bla>This is a String</bla>",
100 |             "This is a String".to_string(),
101 |         ),
102 |         ("<bla></bla>", "".to_string()),
103 |         ("<bla>     </bla>", "".to_string()),
104 |         ("<bla>&lt;boom/&gt;</bla>", "<boom/>".to_string()),
105 |         ("<bla>&#9835;</bla>", "♫".to_string()),
106 |         ("<bla>&#x266B;</bla>", "♫".to_string()),
107 |         //(
108 |         //    "<bla>♫<![CDATA[<cookies/>]]>♫</bla>",
109 |         //    "♫<cookies/>♫".to_string(),
110 |         //),
111 |     ]);
112 | }
113 | 
114 | #[test]
115 | #[ignore] // FIXME
116 | fn test_parse_string_not_trim() {
117 |     test_parse_ok(&[("<bla>     </bla>", "     ".to_string())]);
118 | }
119 | 
120 | #[test]
121 | fn test_option() {
122 |     test_parse_ok(&[
123 |         ("<a/>", Some("".to_string())),
124 |         ("<a></a>", Some("".to_string())),
125 |         ("<a> </a>", Some("".to_string())),
126 |         ("<a>42</a>", Some("42".to_string())),
127 |     ]);
128 | }
129 | 
130 | #[test]
131 | #[ignore] // FIXME
132 | fn test_option_not_trim() {
133 |     test_parse_ok(&[("<a> </a>", Some(" ".to_string()))]);
134 | }
135 | 
136 | #[test]
137 | fn test_parse_unfinished() {
138 |     test_parse_err::<Simple>(&["<Simple>
139 |             <c>abc</c>
140 |             <a/>
141 |             <b>2</b>
142 |             <d/>"]);
143 | }
144 | 
145 | #[test]
146 | fn test_things_qc_found() {
147 |     test_parse_err::<u32>(&["<\u{0}:/"]);
148 | }
149 | 


--------------------------------------------------------------------------------
/tests/serde_helpers/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Utility functions for serde integration tests
 2 | 
 3 | use quick_xml::de::Deserializer;
 4 | use quick_xml::DeError;
 5 | use serde::Deserialize;
 6 | 
 7 | /// Deserialize an instance of type T from a string of XML text.
 8 | /// If deserialization was succeeded checks that all XML events was consumed
 9 | pub fn from_str<'de, T>(source: &'de str) -> Result<T, DeError>
10 | where
11 |     T: Deserialize<'de>,
12 | {
13 |     // Log XML that we try to deserialize to see it in the failed tests output
14 |     dbg!(source);
15 |     let mut de = Deserializer::from_str(source);
16 |     let result = T::deserialize(&mut de);
17 | 
18 |     // If type was deserialized, the whole XML document should be consumed
19 |     if let Ok(_) = result {
20 |         assert!(de.is_empty(), "the whole XML document should be consumed");
21 |     }
22 | 
23 |     result
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/serde_roundtrip.rs:
--------------------------------------------------------------------------------
 1 | use quick_xml::{de::from_str, se::to_string};
 2 | use serde::{Deserialize, Serialize};
 3 | 
 4 | use pretty_assertions::assert_eq;
 5 | 
 6 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 7 | enum Node {
 8 |     Boolean(bool),
 9 |     Identifier { value: String, index: u32 },
10 |     EOF,
11 | }
12 | 
13 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
14 | struct Nodes {
15 |     #[serde(rename = "$value")]
16 |     items: Vec<Node>,
17 | }
18 | 
19 | #[test]
20 | #[ignore]
21 | fn round_trip_list_of_enums() {
22 |     // Construct some inputs
23 |     let nodes = Nodes {
24 |         items: vec![
25 |             Node::Boolean(true),
26 |             Node::Identifier {
27 |                 value: "foo".to_string(),
28 |                 index: 5,
29 |             },
30 |             Node::EOF,
31 |         ],
32 |     };
33 | 
34 |     let should_be = r#"
35 |     <Nodes>
36 |         <Boolean>
37 |             true
38 |         </Boolean>
39 |         <Identifier>
40 |             <value>foo</value>
41 |             <index>5</index>
42 |         </Identifier>
43 |         <EOF />
44 |     </Nodes>"#;
45 | 
46 |     let serialized_nodes = to_string(&nodes).unwrap();
47 |     assert_eq!(serialized_nodes, should_be);
48 | 
49 |     // Then turn it back into a `Nodes` struct and make sure it's the same
50 |     // as the original
51 |     let deserialized_nodes: Nodes = from_str(serialized_nodes.as_str()).unwrap();
52 |     assert_eq!(deserialized_nodes, nodes);
53 | }
54 | 


--------------------------------------------------------------------------------
/tests/writer.rs:
--------------------------------------------------------------------------------
  1 | use quick_xml::events::{
  2 |     BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event::*,
  3 | };
  4 | use quick_xml::writer::Writer;
  5 | 
  6 | use pretty_assertions::assert_eq;
  7 | 
  8 | mod declaration {
  9 |     use super::*;
 10 |     use pretty_assertions::assert_eq;
 11 | 
 12 |     /// Written: version, encoding, standalone
 13 |     #[test]
 14 |     fn full() {
 15 |         let mut writer = Writer::new(Vec::new());
 16 |         writer
 17 |             .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), Some("yo"))))
 18 |             .expect("writing xml decl should succeed");
 19 | 
 20 |         let result = writer.into_inner();
 21 |         assert_eq!(
 22 |             String::from_utf8(result).expect("utf-8 output"),
 23 |             "<?xml version=\"1.2\" encoding=\"utf-X\" standalone=\"yo\"?>",
 24 |             "writer output (LHS)"
 25 |         );
 26 |     }
 27 | 
 28 |     /// Written: version, standalone
 29 |     #[test]
 30 |     fn standalone() {
 31 |         let mut writer = Writer::new(Vec::new());
 32 |         writer
 33 |             .write_event(Decl(BytesDecl::new("1.2", None, Some("yo"))))
 34 |             .expect("writing xml decl should succeed");
 35 | 
 36 |         let result = writer.into_inner();
 37 |         assert_eq!(
 38 |             String::from_utf8(result).expect("utf-8 output"),
 39 |             "<?xml version=\"1.2\" standalone=\"yo\"?>",
 40 |             "writer output (LHS)"
 41 |         );
 42 |     }
 43 | 
 44 |     /// Written: version, encoding
 45 |     #[test]
 46 |     fn encoding() {
 47 |         let mut writer = Writer::new(Vec::new());
 48 |         writer
 49 |             .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), None)))
 50 |             .expect("writing xml decl should succeed");
 51 | 
 52 |         let result = writer.into_inner();
 53 |         assert_eq!(
 54 |             String::from_utf8(result).expect("utf-8 output"),
 55 |             "<?xml version=\"1.2\" encoding=\"utf-X\"?>",
 56 |             "writer output (LHS)"
 57 |         );
 58 |     }
 59 | 
 60 |     /// Written: version
 61 |     #[test]
 62 |     fn version() {
 63 |         let mut writer = Writer::new(Vec::new());
 64 |         writer
 65 |             .write_event(Decl(BytesDecl::new("1.2", None, None)))
 66 |             .expect("writing xml decl should succeed");
 67 | 
 68 |         let result = writer.into_inner();
 69 |         assert_eq!(
 70 |             String::from_utf8(result).expect("utf-8 output"),
 71 |             "<?xml version=\"1.2\"?>",
 72 |             "writer output (LHS)"
 73 |         );
 74 |     }
 75 | 
 76 |     /// This test ensures that empty XML declaration attribute values are not a problem.
 77 |     #[test]
 78 |     fn empty() {
 79 |         let mut writer = Writer::new(Vec::new());
 80 |         // An empty version should arguably be an error, but we don't expect anyone to actually supply
 81 |         // an empty version.
 82 |         writer
 83 |             .write_event(Decl(BytesDecl::new("", Some(""), Some(""))))
 84 |             .expect("writing xml decl should succeed");
 85 | 
 86 |         let result = writer.into_inner();
 87 |         assert_eq!(
 88 |             String::from_utf8(result).expect("utf-8 output"),
 89 |             "<?xml version=\"\" encoding=\"\" standalone=\"\"?>",
 90 |             "writer output (LHS)"
 91 |         );
 92 |     }
 93 | }
 94 | 
 95 | #[test]
 96 | fn pi() {
 97 |     let mut writer = Writer::new(Vec::new());
 98 |     writer
 99 |         .write_event(PI(BytesPI::new("xml-stylesheet href='theme.xls' ")))
100 |         .expect("writing processing instruction should succeed");
101 | 
102 |     let result = writer.into_inner();
103 |     assert_eq!(
104 |         String::from_utf8(result).expect("utf-8 output"),
105 |         "<?xml-stylesheet href='theme.xls' ?>",
106 |         "writer output (LHS)"
107 |     );
108 | }
109 | 
110 | #[test]
111 | fn empty() {
112 |     let mut writer = Writer::new(Vec::new());
113 |     writer
114 |         .write_event(Empty(
115 |             BytesStart::new("game").with_attributes([("publisher", "Blizzard")]),
116 |         ))
117 |         .expect("writing empty tag should succeed");
118 | 
119 |     let result = writer.into_inner();
120 |     assert_eq!(
121 |         String::from_utf8(result).expect("utf-8 output"),
122 |         r#"<game publisher="Blizzard"/>"#,
123 |         "writer output (LHS)"
124 |     );
125 | }
126 | 
127 | #[test]
128 | fn start() {
129 |     let mut writer = Writer::new(Vec::new());
130 |     writer
131 |         .write_event(Start(
132 |             BytesStart::new("info").with_attributes([("genre", "RTS")]),
133 |         ))
134 |         .expect("writing start tag should succeed");
135 | 
136 |     let result = writer.into_inner();
137 |     assert_eq!(
138 |         String::from_utf8(result).expect("utf-8 output"),
139 |         r#"<info genre="RTS">"#,
140 |         "writer output (LHS)"
141 |     );
142 | }
143 | 
144 | #[test]
145 | fn end() {
146 |     let mut writer = Writer::new(Vec::new());
147 |     writer
148 |         .write_event(End(BytesEnd::new("info")))
149 |         .expect("writing end tag should succeed");
150 | 
151 |     let result = writer.into_inner();
152 |     assert_eq!(
153 |         String::from_utf8(result).expect("utf-8 output"),
154 |         "</info>",
155 |         "writer output (LHS)"
156 |     );
157 | }
158 | 
159 | #[test]
160 | fn text() {
161 |     let mut writer = Writer::new(Vec::new());
162 |     writer
163 |         .write_event(Text(BytesText::new(
164 |             "Kerrigan & Raynor: The Z[erg] programming language",
165 |         )))
166 |         .expect("writing text should succeed");
167 | 
168 |     let result = writer.into_inner();
169 |     assert_eq!(
170 |         String::from_utf8(result).expect("utf-8 output"),
171 |         "Kerrigan &amp; Raynor: The Z[erg] programming language",
172 |         "writer output (LHS)"
173 |     );
174 | }
175 | 
176 | #[test]
177 | fn cdata() {
178 |     let mut writer = Writer::new(Vec::new());
179 |     writer
180 |         .write_event(CData(BytesCData::new(
181 |             "Kerrigan & Raynor: The Z[erg] programming language",
182 |         )))
183 |         .expect("writing CDATA section should succeed");
184 | 
185 |     let result = writer.into_inner();
186 |     assert_eq!(
187 |         String::from_utf8(result).expect("utf-8 output"),
188 |         "<![CDATA[Kerrigan & Raynor: The Z[erg] programming language]]>",
189 |         "writer output (LHS)"
190 |     );
191 | }
192 | 
193 | #[test]
194 | fn comment() {
195 |     let mut writer = Writer::new(Vec::new());
196 |     writer
197 |         .write_event(Comment(BytesText::from_escaped(
198 |             "Kerrigan & Raynor: The Z[erg] programming language",
199 |         )))
200 |         .expect("writing comment should succeed");
201 | 
202 |     let result = writer.into_inner();
203 |     assert_eq!(
204 |         String::from_utf8(result).expect("utf-8 output"),
205 |         "<!--Kerrigan & Raynor: The Z[erg] programming language-->",
206 |         "writer output (LHS)"
207 |     );
208 | }
209 | 
210 | #[test]
211 | fn doctype() {
212 |     let mut writer = Writer::new(Vec::new());
213 |     writer
214 |         .write_event(DocType(BytesText::new("some DTD here...")))
215 |         .expect("writing DTD should succeed");
216 | 
217 |     let result = writer.into_inner();
218 |     assert_eq!(
219 |         String::from_utf8(result).expect("utf-8 output"),
220 |         "<!DOCTYPE some DTD here...>",
221 |         "writer output (LHS)"
222 |     );
223 | }
224 | 
225 | #[test]
226 | fn eof() {
227 |     let mut writer = Writer::new(Vec::new());
228 |     writer.write_event(Eof).expect("writing EOF should succeed");
229 | 
230 |     let result = writer.into_inner();
231 |     assert_eq!(
232 |         String::from_utf8(result).expect("utf-8 output"),
233 |         "",
234 |         "writer output (LHS)"
235 |     );
236 | }
237 | 


--------------------------------------------------------------------------------