├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── main.yml
├── .gitignore
├── .rustfmt.toml
├── Cargo.toml
├── Changelog.md
├── LICENSE
├── README.md
├── benches
    └── bench.rs
├── examples
    ├── print_events.rs
    ├── rewrite.rs
    └── xml-analyze.rs
├── src
    ├── attribute.rs
    ├── common.rs
    ├── escape.rs
    ├── lib.rs
    ├── macros.rs
    ├── name.rs
    ├── namespace.rs
    ├── reader.rs
    ├── reader
    │   ├── config.rs
    │   ├── error.rs
    │   ├── events.rs
    │   ├── indexset.rs
    │   ├── lexer.rs
    │   ├── parser.rs
    │   └── parser
    │   │   ├── inside_cdata.rs
    │   │   ├── inside_closing_tag_name.rs
    │   │   ├── inside_comment.rs
    │   │   ├── inside_declaration.rs
    │   │   ├── inside_doctype.rs
    │   │   ├── inside_opening_tag.rs
    │   │   ├── inside_processing_instruction.rs
    │   │   ├── inside_reference.rs
    │   │   └── outside_tag.rs
    ├── util.rs
    ├── writer.rs
    └── writer
    │   ├── config.rs
    │   ├── emitter.rs
    │   └── events.rs
└── tests
    ├── cases
        ├── autosar.xml
        ├── feed.xml
        ├── quote.xml
        └── xmlnsquote.xml
    ├── documents
        ├── sample_1.xml
        ├── sample_1_full.txt
        ├── sample_1_short.txt
        ├── sample_2.xml
        ├── sample_2_full.txt
        ├── sample_2_short.txt
        ├── sample_3.xml
        ├── sample_3_full.txt
        ├── sample_3_short.txt
        ├── sample_4.xml
        ├── sample_4_full.txt
        ├── sample_4_short.txt
        ├── sample_5.xml
        ├── sample_5_short.txt
        ├── sample_6.xml
        ├── sample_6_full.txt
        ├── sample_7.xml
        ├── sample_7_full.txt
        ├── sample_8.xml
        ├── sample_8_c.txt
        ├── sample_8_coalesce_all.txt
        ├── sample_8_coalesce_cwscdch.txt
        ├── sample_8_coalesce_wscdch.txt
        ├── sample_8_full.txt
        ├── sample_8_wscdch.txt
        └── sample_8_wsch.txt
    ├── errata2e.fail.txt
    ├── errata3e.fail.txt
    ├── errata4e.fail.txt
    ├── event_reader.rs
    ├── event_writer.rs
    ├── ibm_oasis_valid.fail.txt
    ├── ibm_valid.fail.txt
    ├── oasis.fail.txt
    ├── rmt-ns10.fail.txt
    ├── rmt-ns11.fail.txt
    ├── streaming.rs
    ├── sun-not-wf.fail.txt
    ├── sun-valid.fail.txt
    ├── tests.xml
    ├── xml11.fail.txt
    ├── xmlconf.rs
    ├── xmltest.fail.txt
    └── xmlts20130923.zip


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: ["kornelski"]
2 | liberapay: ["kornel"]
3 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         rust: [beta, "1.70.0"]
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v3
18 |       
19 |     - uses: dtolnay/rust-toolchain@master
20 |       with:
21 |         toolchain: ${{ matrix.rust }}
22 |     - run: cargo test
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.swo
 3 | /doc
 4 | *~
 5 | /target/
 6 | /Cargo.lock
 7 | .idea/
 8 | *.iml
 9 | /tests/xmlconf/
10 | .DS_Store
11 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | # rustfmt is too aggressive and introduces too many inconsistencies and questionable choices to be applied unconditionally
2 | # please do not use it.
3 | disable_all_formatting = true
4 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "xml"
 3 | version = "1.0.0"
 4 | authors = ["Vladimir Matveev <vmatveev@citrine.cc>", "Kornel (https://github.com/kornelski)"]
 5 | license = "MIT"
 6 | description = "An XML library in pure Rust"
 7 | repository = "https://github.com/kornelski/xml-rs"
 8 | homepage = "https://lib.rs/crates/xml-rs"
 9 | documentation = "https://docs.rs/xml-rs/"
10 | readme = "README.md"
11 | keywords = ["xml", "parser", "sax", "event-reader", "writer"]
12 | categories = ["parser-implementations"]
13 | edition = "2021"
14 | rust-version = "1.70"
15 | include = ["src/**", "LICENSE", "README.md"]
16 | 
17 | [lib]
18 | name = "xml"
19 | path = "src/lib.rs"
20 | 
21 | [badges]
22 | maintenance = { status = "actively-developed" }
23 | 
24 | [package.metadata.docs.rs]
25 | targets = ["x86_64-unknown-linux-gnu"]
26 | rustdoc-args = ["--generate-link-to-definition"]
27 | 
28 | [package.metadata.release]
29 | tag-name = "{{version}}"
30 | tag-message = ""
31 | 


--------------------------------------------------------------------------------
/Changelog.md:
--------------------------------------------------------------------------------
  1 | ## Version 1.0.0
  2 | 
  3 | * Added `Doctype` event
  4 | * Marked structs as `#[non_exhaustive]`
  5 | * Merged `ParserConfig2` back into `ParserConfig`
  6 | * Added option to the writer to pass through XML markup unmodified
  7 | * `xml-analyze` binary has been moved to examples
  8 | * Writer escapes `--` in comments and `]]>` in CDATA
  9 | 
 10 | ## Version 0.8.27
 11 | 
 12 | * Added detection of invalid `<?` in attributes
 13 | 
 14 | ## Version 0.8.26
 15 | 
 16 | * Fixed buffering of files with a broken UTF-16 encoding
 17 | 
 18 | ## Version 0.8.25
 19 | 
 20 | * `TryFrom` for converting from reader to writer events, to make `.as_writer_event()` more discoverable.
 21 | 
 22 | ## Version 0.8.24
 23 | 
 24 | * Fixed reporting of line/column position of CDATA when trimming whitespace
 25 | 
 26 | ## Version 0.8.23
 27 | 
 28 | * StartDocument event will consistently use uppercase "UTF-8" name for encoding when the document did not declare it expicitly, but beware that documents can still use lowercase encoding names, so you must always use case-insensitive comparisons.
 29 | 
 30 | ## Version 0.8.22
 31 | 
 32 | * Ability to retrieve the whole DOCTYPE. For backwards compatibility, it's a getter on the reader, not an event.
 33 | 
 34 | ## Version 0.8.21
 35 | 
 36 | * Added `EventWriter::inner_ref`
 37 | * ~15% performance improvement
 38 | 
 39 | ## Version 0.8.20
 40 | 
 41 | * Fixed escaping of literal `]]>` in CDATA
 42 | 
 43 | ## Version 0.8.19
 44 | 
 45 | * Fixed whitespace event when parsing DOCTYPE with internal subset
 46 | 
 47 | ## Version 0.8.18
 48 | 
 49 | * Option to tolerate invalid entities and chars
 50 | 
 51 | ## Version 0.8.17
 52 | 
 53 | * Added configuration for document size/complexity limits.
 54 | 
 55 | ## Version 0.8.16
 56 | 
 57 | * Fixed error line numbers when parsing CDATA as characters
 58 | 
 59 | ## Version 0.8.15
 60 | 
 61 | * Improved speed of parsing elements with huge number of arguments
 62 | 
 63 | ## Version 0.8.14
 64 | 
 65 | * Fixed error line numbers when ignoring comments
 66 | 
 67 | ## Version 0.8.13
 68 | 
 69 | * Backward-compatibility fix
 70 | 
 71 | ## Version 0.8.12
 72 | 
 73 | * Improved conformance of parsing invalid codepoints, XML prolog
 74 | * Reduced number of allocations
 75 | 
 76 | ## Version 0.8.11
 77 | 
 78 | * Improved conformance of PI
 79 | * Forbidden invalid multiple root elements, unless an option allowing them is enabled.
 80 | 
 81 | ## Version 0.8.10
 82 | 
 83 | * Improved parsing conformance
 84 | * Internal error handling improvements
 85 | 
 86 | ## Version 0.8.9
 87 | 
 88 | * Added support for UTF-16 and ASCII
 89 | * Fixed CDATA parsing
 90 | * Added PE entities parsing
 91 | 
 92 | ## Version 0.8.8
 93 | 
 94 | * Added recursive entity expansion (with length protection)
 95 | * Expanded parsing of DTD
 96 | 
 97 | ## Version 0.8.7
 98 | 
 99 | * Basic parsing of DTD internal subset
100 | * Speed improvements
101 | 
102 | ## Version 0.8.6
103 | 
104 | * Fixed parsing of incorrectly nested comments and processing instructions
105 | 
106 | ## Version 0.8.5
107 | 
108 | * Updated source code to edition 2018 and fixed/updated some Rust idioms.
109 | 
110 | ## Version 0.8.4
111 | 
112 | * Fixed recognition of `?>`, `]]>` and `/>` tokens as characters.
113 | * Fixed writer output operations to use `write_all` to ensure that the data
114 |   is written fully.
115 | * The document declaration is now written before any characters automatically.
116 | 
117 | ## Version 0.8.3
118 | 
119 | * Added a new parser option, `ignore_root_level_whitespace`, which makes the parser
120 |   skip emitting whitespace events outside of the root element when set to `true`.
121 |   This helps with certain tasks like canonicalization.
122 | 
123 | ## Version 0.8.2
124 | 
125 | * Added a new parser option, `replace_unknown_entity_references`, which allows to ignore
126 |   invalid Unicode code points and replace them with a Unicode "replacement character"
127 |   during parsing. This can be helpful to deal with e.g. UTF-16 surrogate pairs.
128 | * Added a new emitter option, `pad_self_closing`, which determines the style of the self-closing
129 |   elements when they are emitted: `<a />` (`true`) vs `<a/>` (`false`).
130 | 
131 | ## Version 0.8.1
132 | 
133 | * Fixed various issues with tests introduced by updates in Rust.
134 | * Adjusted the lexer to ignore contents of the `<!DOCTYPE>` tag.
135 | * Removed unnecessary unsafety in tests.
136 | * Added tests for doc comments in the readme file.
137 | * Switched to GitHub Actions from Travis CI.
138 | 
139 | ## Version 0.8.0
140 | 
141 | * Same as 0.7.1, with 0.7.1 being yanked because of the incorrect semver bump.
142 | 
143 | ## Version 0.7.1
144 | 
145 | * Removed dependency on bitflags.
146 | * Added the `XmlWriter::inner_mut()` method.
147 | * Fixed some rustdoc warnings.
148 | 
149 | ## Version 0.7.0
150 | 
151 | * Same as 0.6.2, with 0.6.2 being yanked because of the incompatible bump of minimum required version of rustc.
152 | 
153 | ## Version 0.6.2
154 | 
155 | * Bumped `bitflags` to 1.0.
156 | 
157 | ## Version 0.6.1
158 | 
159 | * Fixed the writer to escape some special characters when writing attribute values.
160 | 
161 | ## Version 0.6.0
162 | 
163 | * Changed the target type of extra entities from `char` to `String`. This is an incompatible
164 |   change.
165 | 
166 | ## Version 0.5.0
167 | 
168 | * Added support for ignoring EOF errors in order to read documents from streams incrementally.
169 | * Bumped `bitflags` to 0.9.
170 | 
171 | ## Version 0.4.1
172 | 
173 | * Added missing `Debug` implementation to `xml::writer::XmlEvent`.
174 | 
175 | ## Version 0.4.0
176 | 
177 | * Bumped version number, since changes introduced in 0.3.7 break backwards compatibility.
178 | 
179 | ## Version 0.3.8
180 | 
181 | * Fixed a problem introduced in 0.3.7 with entities in attributes causing parsing errors.
182 | 
183 | ## Version 0.3.7
184 | 
185 | * Fixed the problem with parsing non-whitespace character entities as whitespace (issue #140).
186 | * Added support for configuring custom entities in the parser configuration.
187 | 
188 | ## Version 0.3.6
189 | 
190 | * Added an `Error` implementation for `EmitterError`.
191 | * Fixed escaping of strings with multi-byte code points.
192 | 
193 | ## Version 0.3.5
194 | 
195 | * Added `Debug` implementation for `XmlVersion`.
196 | * Fixed some failing tests.
197 | 
198 | ## Version 0.3.3
199 | 
200 | * Updated `bitflags` to 0.7.
201 | 
202 | ## Version 0.3.2
203 | 
204 | * Added `From<io::Error>` for `xml::reader::Error`, which improves usability of working with parsing errors.
205 | 
206 | ## Version 0.3.1
207 | 
208 | * Bumped `bitflags` dependency to 0.4, some internal warning fixes.
209 | 
210 | ## Version 0.3.0
211 | 
212 | * Changed error handling in `EventReader` - now I/O errors are properly bubbled up from the lexer.
213 | 
214 | ## Version 0.2.4
215 | 
216 | * Fixed #112 - incorrect handling of namespace redefinitions when writing a document.
217 | 
218 | ## Version 0.2.3
219 | 
220 | * Added `into_inner()` methods to `EventReader` and `EventWriter`.
221 | 
222 | ## Version 0.2.2
223 | 
224 | * Using `join` instead of the deprecated `connect`.
225 | * Added a simple XML analyzer program which demonstrates library usage and can be used to check XML documents for well-formedness.
226 | * Fixed incorrect handling of unqualified attribute names (#107).
227 | * Added this changelog.
228 | 
229 | ## Version 0.2.1
230 | 
231 | * Fixed #105 - incorrect handling of double dashes.
232 | 
233 | ## Version 0.2.0
234 | 
235 | * Major update, includes proper document writing support and significant architecture changes.
236 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Vladimir Matveev
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | xml-rs, an XML library for Rust
  2 | ===============================
  3 | 
  4 | [![CI](https://github.com/kornelski/xml-rs/actions/workflows/main.yml/badge.svg)](https://github.com/kornelski/xml-rs/actions/workflows/main.yml)
  5 | [![crates.io][crates-io-img]](https://lib.rs/crates/xml-rs)
  6 | [![docs][docs-img]](https://docs.rs/xml-rs/)
  7 | 
  8 | [Documentation](https://docs.rs/xml-rs/)
  9 | 
 10 |   [crates-io-img]: https://img.shields.io/crates/v/xml-rs.svg
 11 |   [docs-img]: https://img.shields.io/badge/docs-latest%20release-6495ed.svg
 12 | 
 13 | xml-rs is an XML library for the [Rust](https://www.rust-lang.org/) programming language.
 14 | It supports reading and writing of XML documents in a streaming fashion (without DOM).
 15 | 
 16 | ### Features
 17 | 
 18 | * XML spec conformance better than other pure-Rust libraries.
 19 | 
 20 | * Easy to use API based on `Iterator`s and regular `String`s without tricky lifetimes.
 21 | 
 22 | * Support for UTF-16, UTF-8, ISO-8859-1, and ASCII encodings.
 23 | 
 24 | * Written entirely in the safe Rust subset. Designed to safely handle untrusted input.
 25 | 
 26 | 
 27 | The API is heavily inspired by Java Streaming API for XML ([StAX][stax]). It contains a pull parser much like StAX event reader. It provides an iterator API, so you can leverage Rust's existing iterators library features.
 28 | 
 29 |   [stax]: https://en.wikipedia.org/wiki/StAX
 30 | 
 31 | It also provides a streaming document writer much like StAX event writer.
 32 | This writer consumes its own set of events, but reader events can be converted to
 33 | writer events easily, and so it is possible to write XML transformation chains in a pretty
 34 | clean manner.
 35 | 
 36 | This parser is mostly full-featured, however, there are limitations:
 37 | * Legacy code pages and non-Unicode encodings are not supported;
 38 | * DTD validation is not supported (but entities defined in the internal subset are supported);
 39 | * attribute value normalization is not performed, and end-of-line characters are not normalized either.
 40 | 
 41 | Other than that the parser tries to be mostly XML-1.1-compliant.
 42 | 
 43 | Writer is also mostly full-featured with the following limitations:
 44 | * no support for encodings other than UTF-8,
 45 | * no support for emitting `<!DOCTYPE>` declarations;
 46 | * more validations of input are needed, for example, checking that namespace prefixes are bounded
 47 |   or comments are well-formed.
 48 | 
 49 | Building and using
 50 | ------------------
 51 | 
 52 | xml-rs uses [Cargo](https://crates.io), so add it with `cargo add xml` or modify `Cargo.toml`:
 53 | 
 54 | ```toml
 55 | [dependencies]
 56 | xml = "1.0"
 57 | ```
 58 | 
 59 | The package exposes a single crate called `xml`.
 60 | 
 61 | Reading XML documents
 62 | ---------------------
 63 | 
 64 | [`xml::reader::EventReader`](EventReader) requires a [`Read`](stdread) instance to read from. It can be a `File` wrapped in `BufReader`, or a `Vec<u8>`, or a `&[u8]` slice.
 65 | 
 66 | [EventReader]: https://docs.rs/xml-rs/latest/xml/reader/struct.EventReader.html
 67 | [stdread]: https://doc.rust-lang.org/stable/std/io/trait.Read.html
 68 | 
 69 | `EventReader` implements `IntoIterator` trait, so you can use it in a `for` loop directly:
 70 | 
 71 | ```rust,no_run
 72 | use std::fs::File;
 73 | use std::io::BufReader;
 74 | 
 75 | use xml::reader::{EventReader, XmlEvent};
 76 | 
 77 | fn main() -> std::io::Result<()> {
 78 |     let file = File::open("file.xml")?;
 79 |     let file = BufReader::new(file); // Buffering is important for performance
 80 | 
 81 |     let parser = EventReader::new(file);
 82 |     let mut depth = 0;
 83 |     for e in parser {
 84 |         match e {
 85 |             Ok(XmlEvent::StartElement { name, .. }) => {
 86 |                 println!("{:spaces$}+{name}", "", spaces = depth * 2);
 87 |                 depth += 1;
 88 |             }
 89 |             Ok(XmlEvent::EndElement { name }) => {
 90 |                 depth -= 1;
 91 |                 println!("{:spaces$}-{name}", "", spaces = depth * 2);
 92 |             }
 93 |             Err(e) => {
 94 |                 eprintln!("Error: {e}");
 95 |                 break;
 96 |             }
 97 |             // There's more: https://docs.rs/xml-rs/latest/xml/reader/enum.XmlEvent.html
 98 |             _ => {}
 99 |         }
100 |     }
101 | 
102 |     Ok(())
103 | }
104 | ```
105 | 
106 | Document parsing can end normally or with an error. Regardless of exact cause, the parsing
107 | process will be stopped, and the iterator will terminate normally.
108 | 
109 | You can also have finer control over when to pull the next event from the parser using its own
110 | `next()` method:
111 | 
112 | ```rust,ignore
113 | match parser.next() {
114 |     ...
115 | }
116 | ```
117 | 
118 | Upon the end of the document or an error, the parser will remember the last event and will always
119 | return it in the result of `next()` call afterwards. If iterator is used, then it will yield
120 | error or end-of-document event once and will produce `None` afterwards.
121 | 
122 | It is also possible to tweak parsing process a little using [`xml::reader::ParserConfig`][ParserConfig] structure.
123 | See its documentation for more information and examples.
124 | 
125 | [ParserConfig]: https://docs.rs/xml-rs/latest/xml/reader/struct.ParserConfig.html
126 | 
127 | You can find a more extensive example of using `EventReader` in `src/analyze.rs`, which is a
128 | small program (BTW, it is built with `cargo build` and can be run after that) which shows various
129 | statistics about specified XML document. It can also be used to check for well-formedness of
130 | XML documents - if a document is not well-formed, this program will exit with an error.
131 | 
132 | 
133 | ## Parsing untrusted inputs
134 | 
135 | The parser is written in safe Rust subset, so by Rust's guarantees the worst that it can do is to cause a panic.
136 | You can use `ParserConfig` to set limits on maximum lenghts of names, attributes, text, entities, etc.
137 | You should also set a maximum document size via `io::Read`'s [`take(max)`](https://doc.rust-lang.org/stable/std/io/trait.Read.html#method.take) method.
138 | 
139 | Writing XML documents
140 | ---------------------
141 | 
142 | xml-rs also provides a streaming writer much like StAX event writer. With it you can write an
143 | XML document to any `Write` implementor.
144 | 
145 | ```rust,no_run
146 | use std::io;
147 | use xml::writer::{EmitterConfig, XmlEvent};
148 | 
149 | /// A simple demo syntax where "+foo" makes `<foo>`, "-foo" makes `</foo>`
150 | fn make_event_from_line(line: &str) -> XmlEvent {
151 |     let line = line.trim();
152 |     if let Some(name) = line.strip_prefix("+") {
153 |         XmlEvent::start_element(name).into()
154 |     } else if line.starts_with("-") {
155 |         XmlEvent::end_element().into()
156 |     } else {
157 |         XmlEvent::characters(line).into()
158 |     }
159 | }
160 | 
161 | fn main() -> io::Result<()> {
162 |     let input = io::stdin();
163 |     let output = io::stdout();
164 |     let mut writer = EmitterConfig::new()
165 |         .perform_indent(true)
166 |         .create_writer(output);
167 | 
168 |     let mut line = String::new();
169 |     loop {
170 |         line.clear();
171 |         let bytes_read = input.read_line(&mut line)?;
172 |         if bytes_read == 0 {
173 |             break; // EOF
174 |         }
175 | 
176 |         let event = make_event_from_line(&line);
177 |         if let Err(e) = writer.write(event) {
178 |             panic!("Write error: {e}")
179 |         }
180 |     }
181 |     Ok(())
182 | }
183 | ```
184 | 
185 | The code example above also demonstrates how to create a writer out of its configuration.
186 | Similar thing also works with `EventReader`.
187 | 
188 | The library provides an XML event building DSL which helps to construct complex events,
189 | e.g. ones having namespace definitions. Some examples:
190 | 
191 | ```rust,ignore
192 | // <a:hello a:param="value" xmlns:a="urn:some:document">
193 | XmlEvent::start_element("a:hello").attr("a:param", "value").ns("a", "urn:some:document")
194 | 
195 | // <hello b:config="name" xmlns="urn:default:uri">
196 | XmlEvent::start_element("hello").attr("b:config", "value").default_ns("urn:defaul:uri")
197 | 
198 | // <![CDATA[some unescaped text]]>
199 | XmlEvent::cdata("some unescaped text")
200 | ```
201 | 
202 | Of course, one can create `XmlEvent` enum variants directly instead of using the builder DSL.
203 | There are more examples in [`xml::writer::XmlEvent`][XmlEvent] documentation.
204 | 
205 | [XmlEvent]: https://docs.rs/xml-rs/latest/xml/reader/enum.XmlEvent.html
206 | 
207 | The writer has multiple configuration options; see `EmitterConfig` documentation for more
208 | information.
209 | 
210 | [EmitterConfig]: https://docs.rs/xml-rs/latest/xml/writer/struct.EmitterConfig.html
211 | 
212 | Bug reports
213 | ------------
214 | 
215 | Please report issues at: <https://github.com/kornelski/xml-rs/issues>.
216 | 
217 | Before reporting issues with XML conformance, please find the relevant section in the XML spec first.
218 | 
219 | ## [Upgrading from 0.8 to 1.0](https://github.com/kornelski/xml-rs/blob/main/Changelog.md)
220 | 
221 | It should be pretty painless:
222 | 
223 | * Change `xml-rs = "0.8"` to `xml = "1.0"` in `Cargo.toml`
224 | * Add `_ => {}` to `match` statements where the compiler complains. A new `Doctype` event has been added, and error enums are non-exhaustive.
225 | * If you were creating `ParserConfig` using a struct literal, please use `ParserConfig::new()` and the setters.
226 | 


--------------------------------------------------------------------------------
/benches/bench.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | use test::Bencher;
 5 | use xml::{EventReader, EventWriter};
 6 | 
 7 | #[bench]
 8 | fn read(bencher: &mut Bencher) {
 9 |     let xml = std::fs::read("tests/documents/sample_1.xml").unwrap();
10 |     bencher.iter(move || {
11 |         let parser = EventReader::new(xml.as_slice());
12 |         for e in parser {
13 |             e.unwrap();
14 |         }
15 |     });
16 | }
17 | 
18 | #[bench]
19 | fn read_lots_attrs(bencher: &mut Bencher) {
20 |     let xml = r#"<x
21 |         a0="" b0="" c0="" d0="" e0="" f0="" g0="" h0="" i0="" j0="" k0="" l0="" m0="" n0="" o0="" p0="" q0="" r0="" s0="" t0="" u0="" v0="" w0="" x0="" y0="" z0="" a1="" b1="" c1="" d1="" e1="" f1="" g1="" h1="" i1="" j1="" k1="" l1="" m1="" n1="" o1="" p1="" q1="" r1="" s1="" t1="" u1="" v1="" w1="" x1="" y1="" z1="" a2="" b2="" c2="" d2="" e2="" f2="" g2="" h2="" i2="" j2="" k2="" l2="" m2="" n2="" o2="" p2="" q2="" r2="" s2="" t2="" u2="" v2="" w2="" x2="" y2="" z2="" a3="" b3="" c3="" d3="" e3="" f3="" g3="" h3="" i3="" j3="" k3="" l3="" m3="" n3="" o3="" p3="" q3="" r3="" s3="" t3="" u3="" v3="" w3="" x3="" y3="" z3="" a4="" b4="" c4="" d4="" e4="" f4="" g4="" h4="" i4="" j4="" k4="" l4="" m4="" n4="" o4="" p4="" q4="" r4="" s4="" t4="" u4="" v4="" w4="" x4="" y4="" z4="" a5="" b5="" c5="" d5="" e5="" f5="" g5="" h5="" i5="" j5="" k5="" l5="" m5="" n5="" o5="" p5="" q5="" r5="" s5="" t5="" u5="" v5="" w5="" x5="" y5="" z5="" a6="" b6="" c6="" d6="" e6="" f6="" g6="" h6="" i6="" j6="" k6="" l6="" m6="" n6="" o6="" p6="" q6="" r6="" s6="" t6="" u6="" v6="" w6="" x6="" y6="" z6="" a7="" b7="" c7="" d7="" e7="" f7="" g7="" h7="" i7="" j7="" k7="" l7="" m7="" n7="" o7="" p7="" q7="" r7="" s7="" t7="" u7="" v7="" w7="" x7="" y7="" z7="" a8="" b8="" c8="" d8="" e8="" f8="" g8="" h8="" i8="" j8="" k8="" l8="" m8="" n8="" o8="" p8="" q8="" r8="" s8="" t8="" u8="" v8="" w8="" x8="" y8="" z8="" a9="" b9="" c9="" d9="" e9="" f9="" g9="" h9="" i9="" j9="" k9="" l9="" m9="" n9="" o9="" p9="" q9="" r9="" s9="" t9="" u9="" v9="" w9="" x9="" y9="" z9="" a10="" b10="" c10="" d10="" e10="" f10="" g10="" h10="" i10="" j10="" k10="" l10="" m10="" n10="" o10="" p10="" q10="" r10="" s10="" t10="" u10="" v10="" w10="" x10="" y10="" z10="" a11="" b11="" c11="" d11="" e11="" f11="" g11="" h11="" i11="" j11="" k11="" l11="" m11="" n11="" o11="" p11="" q11="" r11="" s11="" t11="" u11="" v11="" w11="" x11="" y11="" z11="" a12="" b12="" c12="" d12="" e12="" f12="" g12="" h12="" i12="" j12="" k12="" l12="" m12="" n12="" o12="" p12="" q12="" r12="" s12="" t12="" u12="" v12="" w12="" x12="" y12="" z12="" a13="" b13="" c13="" d13="" e13="" f13="" g13="" h13="" i13="" j13="" k13="" l13="" m13="" n13="" o13="" p13="" q13="" r13="" s13="" t13="" u13="" v13="" w13="" x13="" y13="" z13="" a14="" b14="" c14="" d14="" e14="" f14="" g14="" h14="" i14="" j14="" k14="" l14="" m14="" n14="" o14="" p14="" q14="" r14="" s14="" t14="" u14="" v14="" w14="" x14="" y14="" z14="" a15="" b15="" c15="" d15="" e15="" f15="" g15="" h15="" i15="" j15="" k15="" l15="" m15="" n15="" o15="" p15="" q15="" r15="" s15="" t15="" u15="" v15="" w15="" x15="" y15="" z15="" a16="" b16="" c16="" d16="" e16="" f16="" g16="" h16="" i16="" j16="" k16="" l16="" m16="" n16="" o16="" p16="" q16="" r16="" s16="" t16="" u16="" v16="" w16="" x16="" y16="" z16="" a17="" b17="" c17="" d17="" e17="" f17="" g17="" h17="" i17="" j17="" k17="" l17="" m17="" n17="" o17="" p17="" q17="" r17="" s17="" t17="" u17="" v17="" w17="" x17="" y17="" z17="" a18="" b18="" c18="" d18="" e18="" f18="" g18="" h18="" i18="" j18="" k18="" l18="" m18="" n18="" o18="" p18="" q18="" r18="" s18="" t18="" u18="" v18="" w18="" x18="" y18="" z18="" a19="" b19="" c19="" d19="" e19="" f19="" g19="" h19="" i19="" j19="" k19="" l19="" m19="" n19="" o19="" p19="" q19="" r19="" s19="" t19="" u19="" v19="" w19="" x19="" y19="" z19="" a20="" b20="" c20="" d20="" e20="" f20="" g20="" h20="" i20="" j20="" k20="" l20="" m20="" n20="" o20="" p20="" q20="" r20="" s20="" t20="" u20="" v20="" w20="" x20="" y20="" z20="" a21="" b21="" c21="" d21="" e21="" f21="" g21="" h21="" i21="" j21="" k21="" l21="" m21="" n21="" o21="" p21="" q21="" r21="" s21="" t21="" u21="" v21="" w21="" x21="" y21="" z21="" a22="" b22="" c22="" d22="" e22="" f22="" g22="" h22="" i22="" j22="" k22="" l22="" m22="" n22="" o22="" p22="" q22="" r22="" s22="" t22="" u22="" v22="" w22="" x22="" y22="" z22="" a23="" b23="" c23="" d23="" e23="" f23="" g23="" h23="" i23="" j23="" k23="" l23="" m23="" n23="" o23="" p23="" q23="" r23="" s23="" t23="" u23="" v23="" w23="" x23="" y23="" z23="" a24="" b24="" c24="" d24="" e24="" f24="" g24="" h24="" i24="" j24="" k24="" l24="" m24="" n24="" o24="" p24="" q24="" r24="" s24="" t24="" u24="" v24="" w24="" x24="" y24="" z24="" a25="" b25="" c25="" d25="" e25="" f25="" g25="" h25="" i25="" j25="" k25="" l25="" m25="" n25="" o25="" p25="" q25="" r25="" s25="" t25="" u25="" v25="" w25="" x25="" y25="" z25="" a26="" b26="" c26="" d26="" e26="" f26="" g26="" h26="" i26="" j26="" k26="" l26="" m26="" n26="" o26="" p26="" q26="" r26="" s26="" t26="" u26="" v26="" w26="" x26="" y26="" z26="" a27="" b27="" c27="" d27="" e27="" f27="" g27="" h27="" i27="" j27="" k27="" l27="" m27="" n27="" o27="" p27="" q27="" r27="" s27="" t27="" u27="" v27="" w27="" x27="" y27="" z27="" a28="" b28="" c28="" d28="" e28="" f28="" g28="" h28="" i28="" j28="" k28="" l28="" m28="" n28="" o28="" p28="" q28="" r28="" s28="" t28="" u28="" v28="" w28="" x28="" y28="" z28="" a29="" b29="" c29="" d29="" e29="" f29="" g29="" h29="" i29="" j29="" k29="" l29="" m29="" n29="" o29="" p29="" q29="" r29="" s29="" t29="" u29="" v29="" w29="" x29="" y29="" z29="" a30="" b30="" c30="" d30="" e30="" f30="" g30="" h30="" i30="" j30="" k30="" l30="" m30="" n30="" o30="" p30="" q30="" r30="" s30="" t30="" u30="" v30="" w30="" x30="" y30="" z30="" a31="" b31="" c31="" d31="" e31="" f31="" g31="" h31="" i31="" j31="" k31="" l31="" m31="" n31="" o31="" p31="" q31="" r31="" s31="" t31="" u31="" v31="" w31="" x31="" y31="" z31="" a32="" b32="" c32="" d32="" e32="" f32="" g32="" h32="" i32="" j32="" k32="" l32="" m32="" n32="" o32="" p32="" q32="" r32="" s32="" t32="" u32="" v32="" w32="" x32="" y32="" z32="" a33="" b33="" c33="" d33="" e33="" f33="" g33="" h33="" i33="" j33="" k33="" l33="" m33="" n33="" o33="" p33="" q33="" r33="" s33="" t33="" u33="" v33="" w33="" x33="" y33="" z33="" a34="" b34="" c34="" d34="" e34="" f34="" g34="" h34="" i34="" j34="" k34="" l34="" m34="" n34="" o34="" p34="" q34="" r34="" s34="" t34="" u34="" v34="" w34="" x34="" y34="" z34="" a35="" b35="" c35="" d35="" e35="" f35="" g35="" h35="" i35="" j35="" k35="" l35="" m35="" n35="" o35="" p35="" q35="" r35="" s35="" t35="" u35="" v35="" w35="" x35="" y35="" z35="" a36="" b36="" c36="" d36="" e36="" f36="" g36="" h36="" i36="" j36="" k36="" l36="" m36="" n36="" o36="" p36="" q36="" r36="" s36="" t36="" u36="" v36="" w36="" x36="" y36="" z36="" a37="" b37="" c37="" d37="" e37="" f37="" g37="" h37="" i37="" j37="" k37="" l37="" m37="" n37="" o37="" p37="" q37="" r37="" s37="" t37="" u37="" v37="" w37="" x37="" y37="" z37="" a38="" b38="" c38="" d38="" e38="" f38="" g38="" h38="" i38="" j38="" k38="" l38="" m38="" n38="" o38="" p38="" q38="" r38="" s38="" t38="" u38="" v38="" w38="" x38="" y38="" z38="" a39="" b39="" c39="" d39="" e39="" f39="" g39="" h39="" i39="" j39="" k39="" l39="" m39="" n39="" o39="" p39="" q39="" r39="" s39="" t39="" u39="" v39="" w39="" x39="" y39="" z39="" a40="" b40="" c40="" d40="" e40="" f40="" g40="" h40="" i40="" j40="" k40="" l40="" m40="" n40="" o40="" p40="" q40="" r40="" s40="" t40="" u40="" v40="" w40="" x40="" y40="" z40="" a41="" b41="" c41="" d41="" e41="" f41="" g41="" h41="" i41="" j41="" k41="" l41="" m41="" n41="" o41="" p41="" q41="" r41="" s41="" t41="" u41="" v41="" w41="" x41="" y41="" z41="" a42="" b42="" c42="" d42="" e42="" f42="" g42="" h42="" i42="" j42="" k42="" l42="" m42="" n42="" o42="" p42="" q42="" r42="" s42="" t42="" u42="" v42="" w42="" x42="" y42="" z42="" a43="" b43="" c43="" d43="" e43="" f43="" g43="" h43="" i43="" j43="" k43="" l43="" m43="" n43="" o43="" p43="" q43="" r43="" s43="" t43="" u43="" v43="" w43="" x43="" y43="" z43="" a44="" b44="" c44="" d44="" e44="" f44="" g44="" h44="" i44="" j44="" k44="" l44="" m44="" n44="" o44="" p44="" q44="" r44="" s44="" t44="" u44="" v44="" w44="" x44="" y44="" z44="" a45="" b45="" c45="" d45="" e45="" f45="" g45="" h45="" i45="" j45="" k45="" l45="" m45="" n45="" o45="" p45="" q45="" r45="" s45="" t45="" u45="" v45="" w45="" x45="" y45="" z45="" a46="" b46="" c46="" d46="" e46="" f46="" g46="" h46="" i46="" j46="" k46="" l46="" m46="" n46="" o46="" p46="" q46="" r46="" s46="" t46="" u46="" v46="" w46="" x46="" y46="" z46="" a47="" b47="" c47="" d47="" e47="" f47="" g47="" h47="" i47="" j47="" k47="" l47="" m47="" n47="" o47="" p47="" q47="" r47="" s47="" t47="" u47="" v47="" w47="" x47="" y47="" z47="" a48="" b48="" c48="" d48="" e48="" f48="" g48="" h48="" i48="" j48="" k48="" l48="" m48="" n48="" o48="" p48="" q48="" r48="" s48="" t48="" u48="" v48=""
22 |     />"#;
23 |     bencher.iter(move || {
24 |         let parser = EventReader::new(xml.as_bytes());
25 |         for e in parser {
26 |             e.unwrap();
27 |         }
28 |     });
29 | }
30 | 
31 | #[bench]
32 | fn write(bencher: &mut Bencher) {
33 |     let xml = std::fs::read("tests/documents/sample_1.xml").unwrap();
34 |     let events: Vec<_> = EventReader::new(xml.as_slice()).into_iter().map(|e| e.unwrap()).collect();
35 |     let events: Vec<_> = events.iter().filter_map(|e| e.as_writer_event()).collect();
36 | 
37 |     bencher.iter(move || {
38 |         let mut serializer = EventWriter::new(Vec::new());
39 |         for e in &events {
40 |             serializer.write((*e).clone()).unwrap();
41 |         }
42 |         serializer.into_inner()
43 |     });
44 | }
45 | 


--------------------------------------------------------------------------------
/examples/print_events.rs:
--------------------------------------------------------------------------------
 1 | use std::fs::File;
 2 | use std::io::BufReader;
 3 | use xml::common::Position;
 4 | use xml::reader::{ParserConfig, XmlEvent};
 5 | 
 6 | fn main() {
 7 |     let file_path = std::env::args_os().nth(1).expect("Please specify a path to an XML file");
 8 |     let file = File::open(file_path).unwrap();
 9 | 
10 |     let mut reader = ParserConfig::default()
11 |         .ignore_root_level_whitespace(false)
12 |         .create_reader(BufReader::new(file));
13 | 
14 |     loop {
15 |         match reader.next() {
16 |             Ok(e) => {
17 |                 print!("{}\t", reader.position());
18 | 
19 |                 match e {
20 |                     XmlEvent::StartDocument { version, encoding, .. } => {
21 |                         println!("StartDocument({version}, {encoding})");
22 |                     },
23 |                     XmlEvent::EndDocument => {
24 |                         println!("EndDocument");
25 |                         break;
26 |                     },
27 |                     XmlEvent::ProcessingInstruction { name, data } => {
28 |                         println!("ProcessingInstruction({name}={:?})", data.as_deref().unwrap_or_default());
29 |                     },
30 |                     XmlEvent::StartElement { name, attributes, .. } => {
31 |                         if attributes.is_empty() {
32 |                             println!("StartElement({name})");
33 |                         } else {
34 |                             let attrs: Vec<_> = attributes
35 |                                 .iter()
36 |                                 .map(|a| format!("{}={:?}", &a.name, a.value))
37 |                                 .collect();
38 |                             println!("StartElement({name} [{}])", attrs.join(", "));
39 |                         }
40 |                     },
41 |                     XmlEvent::EndElement { name } => {
42 |                         println!("EndElement({name})");
43 |                     },
44 |                     XmlEvent::Comment(data) => {
45 |                         println!(r#"Comment("{}")"#, data.escape_debug());
46 |                     },
47 |                     XmlEvent::CData(data) => println!(r#"CData("{}")"#, data.escape_debug()),
48 |                     XmlEvent::Characters(data) => {
49 |                         println!(r#"Characters("{}")"#, data.escape_debug());
50 |                     },
51 |                     XmlEvent::Whitespace(data) => {
52 |                         println!(r#"Whitespace("{}")"#, data.escape_debug());
53 |                     },
54 |                     XmlEvent::Doctype { syntax } => {
55 |                         println!(r#"Doctype("{}")"#, syntax.escape_debug());
56 |                     },
57 |                 }
58 |             },
59 |             Err(e) => {
60 |                 eprintln!("Error at {}: {e}", reader.position());
61 |                 break;
62 |             },
63 |         }
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/examples/rewrite.rs:
--------------------------------------------------------------------------------
 1 | //! See <https://lib.rs/crates/svg-hush> for a real-world example.
 2 | 
 3 | use std::fs::File;
 4 | use std::io::BufReader;
 5 | use std::path::Path;
 6 | use xml::EmitterConfig;
 7 | use xml::reader::{ParserConfig, Result};
 8 | 
 9 | fn main() -> Result<(), Box<dyn std::error::Error>> {
10 |     let arg = std::env::args_os().nth(1);
11 |     let file_path = Path::new(arg.as_deref().unwrap_or("tests/documents/sample_1.xml".as_ref()));
12 |     let file = BufReader::new(File::open(file_path)
13 |             .map_err(|e| format!("Can't open {}: {e}", file_path.display()))?);
14 | 
15 |     let mut reader = ParserConfig::default()
16 |         .ignore_root_level_whitespace(true)
17 |         .ignore_comments(false)
18 |         .cdata_to_characters(true)
19 |         .coalesce_characters(true)
20 |         .create_reader(file);
21 | 
22 |     let stdout = std::io::stdout().lock();
23 | 
24 |     let mut writer = EmitterConfig::default()
25 |         .create_writer(stdout);
26 | 
27 |     loop {
28 |         let reader_event = reader.next()?;
29 | 
30 |         match reader_event {
31 |             xml::reader::XmlEvent::EndDocument => break,
32 |             xml::reader::XmlEvent::StartElement { name, mut attributes, namespace } => {
33 |                 let event = xml::writer::XmlEvent::StartElement {
34 |                     name: name.borrow(),
35 |                     namespace: namespace.borrow(),
36 |                     attributes: attributes.iter_mut().map(|attr| {
37 |                         attr.value = alternating_caps(&attr.value);
38 |                         attr.borrow()
39 |                     }).collect(),
40 |                 };
41 |                 writer.write(event)?;
42 |             },
43 |             xml::reader::XmlEvent::Characters(text) => {
44 |                 let text = alternating_caps(&text);
45 |                 let event = xml::writer::XmlEvent::Characters(&text);
46 |                 writer.write(event)?;
47 |             },
48 |             xml::reader::XmlEvent::Comment(text) => {
49 |                 let text = alternating_caps(&text);
50 |                 let event = xml::writer::XmlEvent::Comment(&text);
51 |                 writer.write(event)?;
52 |             },
53 |             other => {
54 |                 if let Some(writer_event) = other.as_writer_event() {
55 |                     writer.write(writer_event)?;
56 |                 }
57 |             },
58 |         }
59 |     }
60 |     Ok(())
61 | }
62 | 
63 | fn alternating_caps(text: &str) -> String {
64 |     text.chars().enumerate()
65 |         .map(|(i, ch)| if i&1==0 { ch.to_ascii_uppercase() } else { ch.to_ascii_lowercase() })
66 |         .collect()
67 | }
68 | 


--------------------------------------------------------------------------------
/examples/xml-analyze.rs:
--------------------------------------------------------------------------------
 1 | #![forbid(unsafe_code)]
 2 | 
 3 | use std::collections::HashSet;
 4 | use std::fs::File;
 5 | use std::io::{self, BufReader, Read};
 6 | use std::{cmp, env};
 7 | 
 8 | use xml::ParserConfig;
 9 | use xml::reader::XmlEvent;
10 | 
11 | fn main() -> Result<(), Box<dyn std::error::Error>> {
12 |     let mut file;
13 |     let mut stdin;
14 |     let source: &mut dyn Read = if let Some(file_name) = env::args().nth(1) {
15 |         file = File::open(file_name).map_err(|e| format!("Cannot open input file: {e}"))?;
16 |         &mut file
17 |     } else {
18 |         stdin = io::stdin();
19 |         &mut stdin
20 |     };
21 | 
22 |     let reader = ParserConfig::new()
23 |         .whitespace_to_characters(true)
24 |         .ignore_comments(false)
25 |         .create_reader(BufReader::new(source));
26 | 
27 |     let mut processing_instructions = 0;
28 |     let mut elements = 0;
29 |     let mut character_blocks = 0;
30 |     let mut cdata_blocks = 0;
31 |     let mut characters = 0;
32 |     let mut comment_blocks = 0;
33 |     let mut comment_characters = 0;
34 |     let mut namespaces = HashSet::new();
35 |     let mut depth = 0;
36 |     let mut max_depth = 0;
37 | 
38 |     for e in reader {
39 |         let e = e.map_err(|e| format!("Error parsing XML document: {e}"))?;
40 |         match e {
41 |             XmlEvent::StartDocument { version, encoding, standalone } => {
42 |                 println!(
43 |                     "XML document version {}, encoded in {}, {}standalone",
44 |                     version, encoding, if standalone.unwrap_or(false) { "" } else { "not " }
45 |                 );
46 |             },
47 |             XmlEvent::Doctype { syntax } => {
48 |                 println!("The Doctype is: {syntax}");
49 |             },
50 |             XmlEvent::EndDocument => println!("Document finished"),
51 |             XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1,
52 |             XmlEvent::Whitespace(_) => {}, // can't happen due to configuration
53 |             XmlEvent::Characters(s) => {
54 |                 character_blocks += 1;
55 |                 characters += s.len();
56 |             },
57 |             XmlEvent::CData(s) => {
58 |                 cdata_blocks += 1;
59 |                 characters += s.len();
60 |             },
61 |             XmlEvent::Comment(s) => {
62 |                 comment_blocks += 1;
63 |                 comment_characters += s.len();
64 |             },
65 |             XmlEvent::StartElement { namespace, .. } => {
66 |                 depth += 1;
67 |                 max_depth = cmp::max(max_depth, depth);
68 |                 elements += 1;
69 |                 namespaces.extend(namespace.0.into_values());
70 |             },
71 |             XmlEvent::EndElement { .. } => {
72 |                 depth -= 1;
73 |             },
74 |         }
75 |     }
76 | 
77 |     namespaces.remove(xml::namespace::NS_EMPTY_URI);
78 |     namespaces.remove(xml::namespace::NS_XMLNS_URI);
79 |     namespaces.remove(xml::namespace::NS_XML_URI);
80 | 
81 |     println!("Elements: {elements}, maximum depth: {max_depth}");
82 |     println!("Namespaces (excluding built-in): {}", namespaces.len());
83 |     println!("Characters: {characters}, characters blocks: {character_blocks}, CDATA blocks: {cdata_blocks}");
84 |     println!("Comment blocks: {comment_blocks}, comment characters: {comment_characters}");
85 |     println!("Processing instructions (excluding built-in): {processing_instructions}");
86 | 
87 |     Ok(())
88 | }
89 | 


--------------------------------------------------------------------------------
/src/attribute.rs:
--------------------------------------------------------------------------------
  1 | //! Contains XML attributes manipulation types and functions.
  2 | 
  3 | use std::fmt;
  4 | 
  5 | use crate::escape::{AttributeEscapes, Escaped};
  6 | use crate::name::{Name, OwnedName};
  7 | 
  8 | /// A borrowed version of an XML attribute.
  9 | ///
 10 | /// Consists of a borrowed qualified name and a borrowed string value.
 11 | #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
 12 | pub struct Attribute<'a> {
 13 |     /// Attribute name.
 14 |     pub name: Name<'a>,
 15 | 
 16 |     /// Attribute value.
 17 |     pub value: &'a str,
 18 | }
 19 | 
 20 | impl fmt::Display for Attribute<'_> {
 21 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 22 |         write!(f, "{}=\"{}\"", self.name, Escaped::<AttributeEscapes>::new(self.value))
 23 |     }
 24 | }
 25 | 
 26 | impl<'a> Attribute<'a> {
 27 |     /// Creates an owned attribute out of this borrowed one.
 28 |     #[inline]
 29 |     #[must_use]
 30 |     pub fn to_owned(&self) -> OwnedAttribute {
 31 |         OwnedAttribute {
 32 |             name: self.name.into(),
 33 |             value: self.value.into(),
 34 |         }
 35 |     }
 36 | 
 37 |     /// Creates a borrowed attribute using the provided borrowed name and a borrowed string value.
 38 |     #[inline]
 39 |     #[must_use]
 40 |     pub const fn new(name: Name<'a>, value: &'a str) -> Self {
 41 |         Attribute { name, value }
 42 |     }
 43 | }
 44 | 
 45 | /// An owned version of an XML attribute.
 46 | ///
 47 | /// Consists of an owned qualified name and an owned string value.
 48 | #[derive(Clone, Eq, PartialEq, Hash, Debug)]
 49 | pub struct OwnedAttribute {
 50 |     /// Attribute name.
 51 |     pub name: OwnedName,
 52 | 
 53 |     /// Attribute value.
 54 |     pub value: String,
 55 | }
 56 | 
 57 | impl OwnedAttribute {
 58 |     /// Returns a borrowed `Attribute` out of this owned one.
 59 |     #[must_use]
 60 |     #[inline]
 61 |     pub fn borrow(&self) -> Attribute<'_> {
 62 |         Attribute {
 63 |             name: self.name.borrow(),
 64 |             value: &self.value,
 65 |         }
 66 |     }
 67 | 
 68 |     /// Creates a new owned attribute using the provided owned name and an owned string value.
 69 |     #[inline]
 70 |     pub fn new<S: Into<String>>(name: OwnedName, value: S) -> Self {
 71 |         Self { name, value: value.into() }
 72 |     }
 73 | }
 74 | 
 75 | impl fmt::Display for OwnedAttribute {
 76 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 77 |         write!(f, "{}=\"{}\"", self.name, Escaped::<AttributeEscapes>::new(&self.value))
 78 |     }
 79 | }
 80 | 
 81 | #[cfg(test)]
 82 | mod tests {
 83 |     use super::Attribute;
 84 | 
 85 |     use crate::name::Name;
 86 | 
 87 |     #[test]
 88 |     fn attribute_display() {
 89 |         let attr = Attribute::new(
 90 |             Name::qualified("attribute", "urn:namespace", Some("n")),
 91 |             "its value with > & \" ' < weird symbols",
 92 |         );
 93 | 
 94 |         assert_eq!(
 95 |             &*attr.to_string(),
 96 |             "{urn:namespace}n:attribute=\"its value with &gt; &amp; &quot; &apos; &lt; weird symbols\""
 97 |         );
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/common.rs:
--------------------------------------------------------------------------------
  1 | //! Contains common types and functions used throughout the library.
  2 | 
  3 | use std::fmt;
  4 | 
  5 | /// Represents a position inside some textual document.
  6 | #[derive(Copy, Clone, PartialEq, Eq)]
  7 | pub struct TextPosition {
  8 |     #[doc(hidden)]
  9 |     pub row: u64,
 10 | 
 11 |     #[doc(hidden)]
 12 |     pub column: u64,
 13 | }
 14 | 
 15 | impl TextPosition {
 16 |     /// Creates a new position initialized to the beginning of the document
 17 |     #[inline]
 18 |     #[must_use]
 19 |     pub const fn new() -> Self {
 20 |         Self { row: 0, column: 0 }
 21 |     }
 22 | 
 23 |     /// Advances the position in a line
 24 |     #[inline]
 25 |     pub fn advance(&mut self, count: u8) {
 26 |         self.column += u64::from(count);
 27 |     }
 28 | 
 29 |     #[doc(hidden)]
 30 |     #[deprecated]
 31 |     pub fn advance_to_tab(&mut self, width: u8) {
 32 |         let width = u64::from(width);
 33 |         self.column += width - self.column % width;
 34 |     }
 35 | 
 36 |     /// Advances the position to the beginning of the next line
 37 |     #[inline]
 38 |     pub fn new_line(&mut self) {
 39 |         self.column = 0;
 40 |         self.row += 1;
 41 |     }
 42 | 
 43 |     /// Row, counting from 0. Add 1 to display as users expect!
 44 |     #[must_use]
 45 |     pub fn row(&self) -> u64 {
 46 |         self.row
 47 |     }
 48 | 
 49 |     /// Column, counting from 0. Add 1 to display as users expect!
 50 |     #[must_use]
 51 |     pub fn column(&self) -> u64 {
 52 |         self.column
 53 |     }
 54 | }
 55 | 
 56 | impl fmt::Debug for TextPosition {
 57 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 58 |         fmt::Display::fmt(self, f)
 59 |     }
 60 | }
 61 | 
 62 | impl fmt::Display for TextPosition {
 63 |     #[inline]
 64 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 65 |         write!(f, "{}:{}", self.row + 1, self.column + 1)
 66 |     }
 67 | }
 68 | 
 69 | /// Get the position in the document corresponding to the object
 70 | ///
 71 | /// This trait is implemented by parsers, lexers and errors.
 72 | pub trait Position {
 73 |     /// Returns the current position or a position corresponding to the object.
 74 |     fn position(&self) -> TextPosition;
 75 | }
 76 | 
 77 | impl Position for TextPosition {
 78 |     #[inline]
 79 |     fn position(&self) -> TextPosition {
 80 |         *self
 81 |     }
 82 | }
 83 | 
 84 | /// XML version enumeration.
 85 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
 86 | pub enum XmlVersion {
 87 |     /// XML version 1.0.
 88 |     Version10,
 89 | 
 90 |     /// XML version 1.1.
 91 |     Version11,
 92 | }
 93 | 
 94 | impl fmt::Display for XmlVersion {
 95 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 96 |         match *self {
 97 |             Self::Version10 => "1.0",
 98 |             Self::Version11 => "1.1",
 99 |         }.fmt(f)
100 |     }
101 | }
102 | 
103 | impl fmt::Debug for XmlVersion {
104 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105 |         fmt::Display::fmt(self, f)
106 |     }
107 | }
108 | 
109 | /// Checks whether the given character is a white space character (`S`)
110 | /// as is defined by XML 1.1 specification, [section 2.3][1].
111 | ///
112 | /// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
113 | #[must_use]
114 | #[inline]
115 | pub const fn is_whitespace_char(c: char) -> bool {
116 |     matches!(c, '\x20' | '\x0a' | '\x09' | '\x0d')
117 | }
118 | 
119 | /// Checks whether the given string is compound only by white space
120 | /// characters (`S`) using the previous `is_whitespace_char` to check
121 | /// all characters of this string
122 | pub fn is_whitespace_str(s: &str) -> bool {
123 |     s.chars().all(is_whitespace_char)
124 | }
125 | 
126 | /// Is it a valid character in XML 1.0
127 | #[must_use]
128 | pub const fn is_xml10_char(c: char) -> bool {
129 |     matches!(c, '\u{09}' | '\u{0A}' | '\u{0D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
130 | }
131 | 
132 | /// Is it a valid character in XML 1.1
133 | #[must_use]
134 | pub const fn is_xml11_char(c: char) -> bool {
135 |     matches!(c, '\u{01}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
136 | }
137 | 
138 | /// Is it a valid character in XML 1.1 but not part of the restricted character set
139 | #[must_use]
140 | pub const fn is_xml11_char_not_restricted(c: char) -> bool {
141 |     is_xml11_char(c) &&
142 |         !matches!(c, '\u{01}'..='\u{08}' | '\u{0B}'..='\u{0C}' | '\u{0E}'..='\u{1F}' | '\u{7F}'..='\u{84}' | '\u{86}'..='\u{9F}')
143 | }
144 | 
145 | /// Checks whether the given character is a name start character (`NameStartChar`)
146 | /// as is defined by XML 1.1 specification, [section 2.3][1].
147 | ///
148 | /// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
149 | #[must_use]
150 | pub const fn is_name_start_char(c: char) -> bool {
151 |     matches!(c,
152 |         ':' | 'A'..='Z' | '_' | 'a'..='z' |
153 |         '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' |
154 |         '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' |
155 |         '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' |
156 |         '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' |
157 |         '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
158 |         '\u{10000}'..='\u{EFFFF}'
159 |     )
160 | }
161 | 
162 | /// Checks whether the given character is a name character (`NameChar`)
163 | /// as is defined by XML 1.1 specification, [section 2.3][1].
164 | ///
165 | /// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
166 | #[must_use]
167 | pub const fn is_name_char(c: char) -> bool {
168 |     if is_name_start_char(c) {
169 |         return true;
170 |     }
171 |     matches!(c,
172 |         '-' | '.' | '0'..='9' | '\u{B7}' |
173 |         '\u{300}'..='\u{36F}' | '\u{203F}'..='\u{2040}'
174 |     )
175 | }
176 | 


--------------------------------------------------------------------------------
/src/escape.rs:
--------------------------------------------------------------------------------
  1 | //! Contains functions for performing XML special characters escaping.
  2 | 
  3 | use std::borrow::Cow;
  4 | use std::fmt::{Display, Formatter, Result};
  5 | use std::marker::PhantomData;
  6 | 
  7 | pub(crate) trait Escapes {
  8 |     fn escape(c: u8) -> Option<&'static str>;
  9 | 
 10 |     fn byte_needs_escaping(c: u8) -> bool {
 11 |         Self::escape(c).is_some()
 12 |     }
 13 | 
 14 |     fn str_needs_escaping(s: &str) -> bool {
 15 |         s.bytes().any(|c| Self::escape(c).is_some())
 16 |     }
 17 | }
 18 | 
 19 | pub(crate) struct Escaped<'a, E: Escapes> {
 20 |     _escape_phantom: PhantomData<E>,
 21 |     to_escape: &'a str,
 22 | }
 23 | 
 24 | impl<'a, E: Escapes> Escaped<'a, E> {
 25 |     pub const fn new(s: &'a str) -> Self {
 26 |         Escaped {
 27 |             _escape_phantom: PhantomData,
 28 |             to_escape: s,
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | impl<E: Escapes> Display for Escaped<'_, E> {
 34 |     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
 35 |         let mut total_remaining = self.to_escape;
 36 | 
 37 |         // find the next occurence
 38 |         while let Some(n) = total_remaining.bytes().position(E::byte_needs_escaping) {
 39 |             let (start, remaining) = total_remaining.split_at(n);
 40 | 
 41 |             f.write_str(start)?;
 42 | 
 43 |             // unwrap is safe because we checked is_some for position n earlier
 44 |             let next_byte = remaining.bytes().next().unwrap();
 45 |             let replacement = E::escape(next_byte).unwrap_or("unexpected token");
 46 |             f.write_str(replacement)?;
 47 | 
 48 |             total_remaining = &remaining[1..];
 49 |         }
 50 | 
 51 |         f.write_str(total_remaining)
 52 |     }
 53 | }
 54 | 
 55 | fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> {
 56 |     if E::str_needs_escaping(s) {
 57 |         Cow::Owned(Escaped::<E>::new(s).to_string())
 58 |     } else {
 59 |         Cow::Borrowed(s)
 60 |     }
 61 | }
 62 | 
 63 | macro_rules! escapes {
 64 |     {
 65 |         $name: ident,
 66 |         $($k: expr => $v: expr),* $(,)?
 67 |     } => {
 68 |         pub(crate) struct $name;
 69 | 
 70 |         impl Escapes for $name {
 71 |             fn escape(c: u8) -> Option<&'static str> {
 72 |                 match c {
 73 |                     $( $k => Some($v),)*
 74 |                     _ => None
 75 |                 }
 76 |             }
 77 |         }
 78 |     };
 79 | }
 80 | 
 81 | escapes!(
 82 |     AttributeEscapes,
 83 |     b'<'  => "&lt;",
 84 |     b'>'  => "&gt;",
 85 |     b'"'  => "&quot;",
 86 |     b'\'' => "&apos;",
 87 |     b'&'  => "&amp;",
 88 |     b'\n' => "&#xA;",
 89 |     b'\r' => "&#xD;",
 90 | );
 91 | 
 92 | escapes!(
 93 |     PcDataEscapes,
 94 |     b'<' => "&lt;",
 95 |     b'>' => "&gt;",
 96 |     b'&' => "&amp;",
 97 | );
 98 | 
 99 | /// Performs escaping of common XML characters inside an attribute value.
100 | ///
101 | /// This function replaces several important markup characters with their
102 | /// entity equivalents:
103 | ///
104 | /// * `<` → `&lt;`
105 | /// * `>` → `&gt;`
106 | /// * `"` → `&quot;`
107 | /// * `'` → `&apos;`
108 | /// * `&` → `&amp;`
109 | ///
110 | /// The following characters are escaped so that attributes are printed on
111 | /// a single line:
112 | /// * `\n` → `&#xA;`
113 | /// * `\r` → `&#xD;`
114 | ///
115 | /// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
116 | ///
117 | /// Does not perform allocations if the given string does not contain escapable characters.
118 | #[inline]
119 | #[must_use]
120 | pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
121 |     escape_str::<AttributeEscapes>(s)
122 | }
123 | 
124 | /// Performs escaping of common XML characters inside PCDATA.
125 | ///
126 | /// This function replaces several important markup characters with their
127 | /// entity equivalents:
128 | ///
129 | /// * `<` → `&lt;`
130 | /// * `&` → `&amp;`
131 | ///
132 | /// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
133 | ///
134 | /// Does not perform allocations if the given string does not contain escapable characters.
135 | #[inline]
136 | #[must_use]
137 | pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
138 |     escape_str::<PcDataEscapes>(s)
139 | }
140 | 
141 | #[cfg(test)]
142 | mod tests {
143 |     use super::{escape_str_attribute, escape_str_pcdata};
144 | 
145 |     #[test]
146 |     fn test_escape_str_attribute() {
147 |         assert_eq!(escape_str_attribute("<>'\"&\n\r"), "&lt;&gt;&apos;&quot;&amp;&#xA;&#xD;");
148 |         assert_eq!(escape_str_attribute("no_escapes"), "no_escapes");
149 |     }
150 | 
151 |     #[test]
152 |     fn test_escape_str_pcdata() {
153 |         assert_eq!(escape_str_pcdata("<>&"), "&lt;&gt;&amp;");
154 |         assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes");
155 |     }
156 | 
157 |     #[test]
158 |     fn test_escape_multibyte_code_points() {
159 |         assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
160 |         assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
161 |     }
162 | }
163 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![warn(missing_docs)]
 2 | #![forbid(non_camel_case_types)]
 3 | #![forbid(unsafe_code)]
 4 | #![allow(clippy::redundant_closure_for_method_calls)]
 5 | #![allow(clippy::module_name_repetitions)]
 6 | 
 7 | //! This crate currently provides an almost XML 1.0/1.1-compliant pull parser.
 8 | //!
 9 | //! Please note that functions of this parser may panic.
10 | //! If a panic could cause a Denial Of Service in your codebase, *you're* responsible for wrapping access to this library in `catch_unwind`.
11 | 
12 | #![cfg_attr(doctest, doc = include_str!("../README.md"))]
13 | 
14 | pub use crate::reader::{EventReader, ParserConfig};
15 | pub use crate::util::Encoding;
16 | pub use crate::writer::{EmitterConfig, EventWriter};
17 | 
18 | pub mod attribute;
19 | pub mod common;
20 | pub mod escape;
21 | #[doc(hidden)] // FIXME: not supposed to be public
22 | pub mod macros;
23 | pub mod name;
24 | pub mod namespace;
25 | pub mod reader;
26 | mod util;
27 | pub mod writer;
28 | 


--------------------------------------------------------------------------------
/src/macros.rs:
--------------------------------------------------------------------------------
 1 | #![macro_use]
 2 | 
 3 | //! Contains several macros used in this crate.
 4 | 
 5 | macro_rules! gen_setter {
 6 |     ($(#[$comments:meta])* $field:ident : into $t:ty) => {
 7 | 
 8 |             $(#[$comments])*
 9 |             ///
10 |             /// <small>See [`ParserConfig`][crate::ParserConfig] fields docs for details</small>
11 |             #[inline]
12 |             #[must_use]
13 |             pub fn $field<T: Into<$t>>(mut self, value: T) -> Self {
14 |                 self.$field = value.into();
15 |                 self
16 |             }
17 |     };
18 |     ($(#[$comments:meta])* $field:ident : val $t:ty) => {
19 |             $(#[$comments])*
20 |             ///
21 |             /// <small>See [`ParserConfig`][crate::ParserConfig] fields docs for details</small>
22 |             #[inline]
23 |             #[must_use]
24 |             pub const fn $field(mut self, value: $t) -> Self {
25 |                 self.$field = value;
26 |                 self
27 |             }
28 |     };
29 | }
30 | 
31 | macro_rules! gen_setters {
32 |     ($target:ident, $($(#[$comments:meta])* $field:ident : $k:tt $tpe:ty),+) => (
33 |         impl $target {$(
34 | 
35 |             gen_setter! { $(#[$comments])* $field : $k $tpe }
36 |         )+
37 |     })
38 | }
39 | 


--------------------------------------------------------------------------------
/src/reader.rs:
--------------------------------------------------------------------------------
  1 | //! Contains high-level interface for a pull-based XML parser.
  2 | //!
  3 | //! The most important type in this module is `EventReader`, which provides an iterator
  4 | //! view for events in XML document.
  5 | 
  6 | use std::io::Read;
  7 | use std::iter::FusedIterator;
  8 | use std::result;
  9 | 
 10 | use crate::common::{Position, TextPosition};
 11 | 
 12 | pub use self::config::ParserConfig;
 13 | pub use self::error::{Error, ErrorKind};
 14 | pub use self::events::XmlEvent;
 15 | 
 16 | // back compat
 17 | #[doc(hidden)]
 18 | #[deprecated(note = "Merged into ParserConfig")]
 19 | pub type ParserConfig2 = ParserConfig;
 20 | 
 21 | use self::parser::PullParser;
 22 | 
 23 | mod config;
 24 | mod error;
 25 | mod events;
 26 | mod indexset;
 27 | mod lexer;
 28 | mod parser;
 29 | 
 30 | /// A result type yielded by `XmlReader`.
 31 | pub type Result<T, E = Error> = result::Result<T, E>;
 32 | 
 33 | /// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
 34 | ///
 35 | /// The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
 36 | pub struct EventReader<R: Read> {
 37 |     source: R,
 38 |     parser: PullParser,
 39 | }
 40 | 
 41 | impl<R: Read> EventReader<R> {
 42 |     /// Creates a new reader, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
 43 |     #[inline]
 44 |     pub fn new(source: R) -> Self {
 45 |         Self::new_with_config(source, ParserConfig::new())
 46 |     }
 47 | 
 48 |     /// Creates a new reader with the provded configuration, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
 49 |     #[inline]
 50 |     pub fn new_with_config(source: R, config: impl Into<ParserConfig>) -> Self {
 51 |         Self {
 52 |             source,
 53 |             parser: PullParser::new(config),
 54 |         }
 55 |     }
 56 | 
 57 |     /// Pulls and returns next XML event from the stream.
 58 |     ///
 59 |     /// If this returns [Err] or [`XmlEvent::EndDocument`] then further calls to
 60 |     /// this method will return this event again.
 61 |     #[inline]
 62 |     #[allow(clippy::should_implement_trait)]
 63 |     pub fn next(&mut self) -> Result<XmlEvent> {
 64 |         self.parser.next(&mut self.source)
 65 |     }
 66 | 
 67 |     /// Skips all XML events until the next end tag at the current level.
 68 |     ///
 69 |     /// Convenience function that is useful for the case where you have
 70 |     /// encountered a start tag that is of no interest and want to
 71 |     /// skip the entire XML subtree until the corresponding end tag.
 72 |     #[inline]
 73 |     pub fn skip(&mut self) -> Result<()> {
 74 |         let mut depth = 1;
 75 | 
 76 |         while depth > 0 {
 77 |             match self.next()? {
 78 |                 XmlEvent::StartElement { .. } => depth += 1,
 79 |                 XmlEvent::EndElement { .. } => depth -= 1,
 80 |                 XmlEvent::EndDocument => return Err(Error {
 81 |                     kind: ErrorKind::UnexpectedEof,
 82 |                     pos: self.parser.position(),
 83 |                 }),
 84 |                 _ => {},
 85 |             }
 86 |         }
 87 | 
 88 |         Ok(())
 89 |     }
 90 | 
 91 |     /// Access underlying reader
 92 |     ///
 93 |     /// Using it directly while the event reader is parsing is not recommended
 94 |     pub fn source(&self) -> &R { &self.source }
 95 | 
 96 |     /// Access underlying reader
 97 |     ///
 98 |     /// Using it directly while the event reader is parsing is not recommended
 99 |     pub fn source_mut(&mut self) -> &mut R { &mut self.source }
100 | 
101 |     /// Unwraps this `EventReader`, returning the underlying reader.
102 |     ///
103 |     /// Note that this operation is destructive; unwrapping the reader and wrapping it
104 |     /// again with `EventReader::new()` will create a fresh reader which will attempt
105 |     /// to parse an XML document from the beginning.
106 |     pub fn into_inner(self) -> R {
107 |         self.source
108 |     }
109 | 
110 |     /// Returns the DOCTYPE of the document if it has already been seen
111 |     ///
112 |     /// Available only after the root `StartElement` event
113 |     #[inline]
114 |     #[deprecated(note = "there is `XmlEvent::Doctype` now")]
115 |     #[allow(deprecated)]
116 |     pub fn doctype(&self) -> Option<&str> {
117 |         self.parser.doctype()
118 |     }
119 | }
120 | 
121 | impl<B: Read> Position for EventReader<B> {
122 |     /// Returns the position of the last event produced by the reader.
123 |     #[inline]
124 |     fn position(&self) -> TextPosition {
125 |         self.parser.position()
126 |     }
127 | }
128 | 
129 | impl<R: Read> IntoIterator for EventReader<R> {
130 |     type IntoIter = Events<R>;
131 |     type Item = Result<XmlEvent>;
132 | 
133 |     fn into_iter(self) -> Events<R> {
134 |         Events { reader: self, finished: false }
135 |     }
136 | }
137 | 
138 | /// An iterator over XML events created from some type implementing `Read`.
139 | ///
140 | /// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
141 | /// it will be returned by the iterator once, and then it will stop producing events.
142 | pub struct Events<R: Read> {
143 |     reader: EventReader<R>,
144 |     finished: bool,
145 | }
146 | 
147 | impl<R: Read> Events<R> {
148 |     /// Unwraps the iterator, returning the internal `EventReader`.
149 |     #[inline]
150 |     pub fn into_inner(self) -> EventReader<R> {
151 |         self.reader
152 |     }
153 | 
154 |     /// Access the underlying reader
155 |     ///
156 |     /// It's not recommended to use it while the events are still being parsed
157 |     pub fn source(&self) -> &R { &self.reader.source }
158 | 
159 |     /// Access the underlying reader
160 |     ///
161 |     /// It's not recommended to use it while the events are still being parsed
162 |     pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
163 | }
164 | 
165 | impl<R: Read> FusedIterator for Events<R> {
166 | }
167 | 
168 | impl<R: Read> Iterator for Events<R> {
169 |     type Item = Result<XmlEvent>;
170 | 
171 |     #[inline]
172 |     fn next(&mut self) -> Option<Result<XmlEvent>> {
173 |         if self.finished && !self.reader.parser.is_ignoring_end_of_stream() {
174 |             None
175 |         } else {
176 |             let ev = self.reader.next();
177 |             if let Ok(XmlEvent::EndDocument) | Err(_) = ev {
178 |                 self.finished = true;
179 |             }
180 |             Some(ev)
181 |         }
182 |     }
183 | }
184 | 
185 | impl<'r> EventReader<&'r [u8]> {
186 |     /// A convenience method to create an `XmlReader` from a string slice.
187 |     #[inline]
188 |     #[must_use]
189 |     #[allow(clippy::should_implement_trait)]
190 |     pub fn from_str(source: &'r str) -> Self {
191 |         EventReader::new(source.as_bytes())
192 |     }
193 | }
194 | 


--------------------------------------------------------------------------------
/src/reader/events.rs:
--------------------------------------------------------------------------------
  1 | //! Contains `XmlEvent` datatype, instances of which are emitted by the parser.
  2 | 
  3 | use crate::attribute::OwnedAttribute;
  4 | use crate::common::XmlVersion;
  5 | use crate::name::OwnedName;
  6 | use crate::namespace::Namespace;
  7 | use std::fmt;
  8 | 
  9 | /// An element of an XML input stream.
 10 | ///
 11 | /// Items of this enum are emitted by `reader::EventReader`. They correspond to different
 12 | /// elements of an XML document.
 13 | #[derive(PartialEq, Clone)]
 14 | pub enum XmlEvent {
 15 |     /// Corresponds to XML document declaration.
 16 |     ///
 17 |     /// This event is always emitted before any other event. It is emitted
 18 |     /// even if the actual declaration is not present in the document.
 19 |     StartDocument {
 20 |         /// XML version.
 21 |         ///
 22 |         /// If XML declaration is not present, defaults to `Version10`.
 23 |         version: XmlVersion,
 24 | 
 25 |         /// XML document encoding.
 26 |         ///
 27 |         /// If XML declaration is not present or does not contain `encoding` attribute,
 28 |         /// defaults to `"UTF-8"`. This field is currently used for no other purpose than
 29 |         /// informational.
 30 |         encoding: String,
 31 | 
 32 |         /// XML standalone declaration.
 33 |         ///
 34 |         /// If XML document is not present or does not contain `standalone` attribute,
 35 |         /// defaults to `None`. This field is currently used for no other purpose than
 36 |         /// informational.
 37 |         standalone: Option<bool>,
 38 |     },
 39 | 
 40 |     /// Denotes to the end of the document stream.
 41 |     ///
 42 |     /// This event is always emitted after any other event (except `Error`). After it
 43 |     /// is emitted for the first time, it will always be emitted on next event pull attempts.
 44 |     EndDocument,
 45 | 
 46 |     /// Denotes an XML processing instruction.
 47 |     ///
 48 |     /// This event contains a processing instruction target (`name`) and opaque `data`. It
 49 |     /// is up to the application to process them.
 50 |     ProcessingInstruction {
 51 |         /// Processing instruction target.
 52 |         name: String,
 53 | 
 54 |         /// Processing instruction content.
 55 |         data: Option<String>,
 56 |     },
 57 | 
 58 |     /// Denotes a beginning of an XML element.
 59 |     ///
 60 |     /// This event is emitted after parsing opening tags or after parsing bodiless tags. In the
 61 |     /// latter case `EndElement` event immediately follows.
 62 |     StartElement {
 63 |         /// Qualified name of the element.
 64 |         name: OwnedName,
 65 | 
 66 |         /// A list of attributes associated with the element.
 67 |         ///
 68 |         /// Currently attributes are not checked for duplicates (TODO)
 69 |         attributes: Vec<OwnedAttribute>,
 70 | 
 71 |         /// Contents of the namespace mapping at this point of the document.
 72 |         namespace: Namespace,
 73 |     },
 74 | 
 75 |     /// Denotes an end of an XML element.
 76 |     ///
 77 |     /// This event is emitted after parsing closing tags or after parsing bodiless tags. In the
 78 |     /// latter case it is emitted immediately after corresponding `StartElement` event.
 79 |     EndElement {
 80 |         /// Qualified name of the element.
 81 |         name: OwnedName,
 82 |     },
 83 | 
 84 |     /// Denotes CDATA content.
 85 |     ///
 86 |     /// This event contains unparsed data. No unescaping will be performed.
 87 |     ///
 88 |     /// It is possible to configure a parser to emit `Characters` event instead of `CData`. See
 89 |     /// `pull::ParserConfiguration` structure for more information.
 90 |     CData(String),
 91 | 
 92 |     /// Denotes a comment.
 93 |     ///
 94 |     /// It is possible to configure a parser to ignore comments, so this event will never be emitted.
 95 |     /// See `pull::ParserConfiguration` structure for more information.
 96 |     Comment(String),
 97 | 
 98 |     /// Denotes character data outside of tags.
 99 |     ///
100 |     /// Contents of this event will always be unescaped, so no entities like `&lt;` or `&amp;` or `&#123;`
101 |     /// will appear in it.
102 |     ///
103 |     /// It is possible to configure a parser to trim leading and trailing whitespace for this event.
104 |     /// See `pull::ParserConfiguration` structure for more information.
105 |     Characters(String),
106 | 
107 |     /// Denotes a chunk of whitespace outside of tags.
108 |     ///
109 |     /// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`.
110 |     /// See `pull::ParserConfiguration` structure for more information. When combined with whitespace
111 |     /// trimming, it will eliminate standalone whitespace from the event stream completely.
112 |     Whitespace(String),
113 |     /// The whole DOCTYPE markup
114 |     Doctype {
115 |         /// Everything including `<` and `>`
116 |         syntax: String,
117 |     },
118 | }
119 | 
120 | impl fmt::Debug for XmlEvent {
121 |     #[cold]
122 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123 |         match self {
124 |             Self::StartDocument { version, encoding, standalone } =>
125 |                 write!(f, "StartDocument({}, {}, {:?})", version, *encoding, standalone),
126 |             Self::EndDocument =>
127 |                 write!(f, "EndDocument"),
128 |             Self::ProcessingInstruction { name, data } =>
129 |                 write!(f, "ProcessingInstruction({}{})", *name, match data {
130 |                     Some(data) => format!(", {data}"),
131 |                     None       => String::new()
132 |                 }),
133 |             Self::StartElement { name, attributes, namespace: Namespace(namespace) } =>
134 |                 write!(f, "StartElement({}, {:?}{})", name, namespace, if attributes.is_empty() {
135 |                     String::new()
136 |                 } else {
137 |                     let attributes: Vec<String> = attributes.iter().map(
138 |                         |a| format!("{} -> {}", a.name, a.value)
139 |                     ).collect();
140 |                     format!(", [{}]", attributes.join(", "))
141 |                 }),
142 |             Self::EndElement { name } =>
143 |                 write!(f, "EndElement({name})"),
144 |             Self::Comment(data) =>
145 |                 write!(f, "Comment({data})"),
146 |             Self::CData(data) =>
147 |                 write!(f, "CData({data})"),
148 |             Self::Characters(data) =>
149 |                 write!(f, "Characters({data})"),
150 |             Self::Whitespace(data) =>
151 |                 write!(f, "Whitespace({data})"),
152 |             Self::Doctype { syntax } =>
153 |                 write!(f, "Doctype({syntax})"),
154 |         }
155 |     }
156 | }
157 | 
158 | impl XmlEvent {
159 |     /// Obtains a writer event from this reader event.
160 |     ///
161 |     /// This method is useful for streaming processing of XML documents where the output
162 |     /// is also an XML document. With this method it is possible to process some events
163 |     /// while passing other events through to the writer unchanged:
164 |     ///
165 |     /// ```rust
166 |     /// use std::str;
167 |     ///
168 |     /// use xml::reader::XmlEvent as ReaderEvent;
169 |     /// use xml::writer::XmlEvent as WriterEvent;
170 |     /// use xml::{EventReader, EventWriter};
171 |     ///
172 |     /// let mut input: &[u8] = b"<hello>world</hello>";
173 |     /// let mut output: Vec<u8> = Vec::new();
174 |     ///
175 |     /// {
176 |     ///     let mut reader = EventReader::new(&mut input);
177 |     ///     let mut writer = EventWriter::new(&mut output);
178 |     ///
179 |     ///     for e in reader {
180 |     ///         match e.unwrap() {
181 |     ///             ReaderEvent::Characters(s) =>
182 |     ///                 writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(),
183 |     ///             e => if let Some(e) = e.as_writer_event() {
184 |     ///                 writer.write(e).unwrap()
185 |     ///             }
186 |     ///         }
187 |     ///     }
188 |     /// }
189 |     ///
190 |     /// assert_eq!(
191 |     ///     str::from_utf8(&output).unwrap(),
192 |     ///     r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"#
193 |     /// );
194 |     /// ```
195 |     ///
196 |     /// Note that this API may change or get additions in future to improve its ergonomics.
197 |     #[must_use]
198 |     pub fn as_writer_event(&self) -> Option<crate::writer::events::XmlEvent<'_>> {
199 |         match self {
200 |             Self::StartDocument { version, encoding, standalone } =>
201 |                 Some(crate::writer::events::XmlEvent::StartDocument {
202 |                     version: *version,
203 |                     encoding: Some(encoding),
204 |                     standalone: *standalone
205 |                 }),
206 |             Self::ProcessingInstruction { name, data } =>
207 |                 Some(crate::writer::events::XmlEvent::ProcessingInstruction {
208 |                     name,
209 |                     data: data.as_ref().map(|s| &**s)
210 |                 }),
211 |             Self::StartElement { name, attributes, namespace } =>
212 |                 Some(crate::writer::events::XmlEvent::StartElement {
213 |                     name: name.borrow(),
214 |                     attributes: attributes.iter().map(|a| a.borrow()).collect(),
215 |                     namespace: namespace.borrow(),
216 |                 }),
217 |             Self::EndElement { name } =>
218 |                 Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
219 |             Self::Comment(data) => Some(crate::writer::events::XmlEvent::Comment(data)),
220 |             Self::CData(data) => Some(crate::writer::events::XmlEvent::CData(data)),
221 |             Self::Characters(data) |
222 |             Self::Whitespace(data) => Some(crate::writer::events::XmlEvent::Characters(data)),
223 |             Self::Doctype { syntax } => Some(crate::writer::events::XmlEvent::Doctype(syntax)),
224 |             Self::EndDocument => None,
225 |         }
226 |     }
227 | }
228 | 


--------------------------------------------------------------------------------
/src/reader/indexset.rs:
--------------------------------------------------------------------------------
  1 | use crate::attribute::OwnedAttribute;
  2 | use crate::name::OwnedName;
  3 | 
  4 | use std::collections::hash_map::RandomState;
  5 | use std::collections::HashSet;
  6 | use std::hash::{BuildHasher, Hash, Hasher};
  7 | 
  8 | /// An ordered set
  9 | pub struct AttributesSet {
 10 |     vec: Vec<OwnedAttribute>,
 11 |     /// Uses a no-op hasher, because these u64s are hashes already
 12 |     may_contain: HashSet<u64, U64HasherBuilder>,
 13 |     /// This is real hasher for the `OwnedName`
 14 |     hasher: RandomState,
 15 | }
 16 | 
 17 | /// Use linear search and don't allocate `HashSet` if there are few attributes,
 18 | /// because allocation costs more than a few comparisons.
 19 | const HASH_THRESHOLD: usize = 8;
 20 | 
 21 | impl AttributesSet {
 22 |     pub fn new() -> Self {
 23 |         Self {
 24 |             vec: Vec::new(),
 25 |             hasher: RandomState::new(),
 26 |             may_contain: HashSet::default(),
 27 |         }
 28 |     }
 29 | 
 30 |     fn hash(&self, val: &OwnedName) -> u64 {
 31 |         let mut h = self.hasher.build_hasher();
 32 |         val.hash(&mut h);
 33 |         h.finish()
 34 |     }
 35 | 
 36 |     pub fn len(&self) -> usize {
 37 |         self.vec.len()
 38 |     }
 39 | 
 40 |     pub fn contains(&self, name: &OwnedName) -> bool {
 41 |         // fall back to linear search only on duplicate or hash collision
 42 |         (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) &&
 43 |             self.vec.iter().any(move |a| &a.name == name)
 44 |     }
 45 | 
 46 |     pub fn push(&mut self, attr: OwnedAttribute) {
 47 |         if self.vec.len() >= HASH_THRESHOLD {
 48 |             if self.vec.len() == HASH_THRESHOLD {
 49 |                 self.may_contain.reserve(HASH_THRESHOLD * 2);
 50 |                 for attr in &self.vec {
 51 |                     self.may_contain.insert(self.hash(&attr.name));
 52 |                 }
 53 |             }
 54 |             self.may_contain.insert(self.hash(&attr.name));
 55 |         }
 56 |         self.vec.push(attr);
 57 |     }
 58 | 
 59 |     pub fn into_vec(self) -> Vec<OwnedAttribute> {
 60 |         self.vec
 61 |     }
 62 | }
 63 | 
 64 | #[test]
 65 | fn indexset() {
 66 |     let mut s = AttributesSet::new();
 67 |     let not_here = OwnedName {
 68 |         local_name: "attr1000".into(),
 69 |         namespace: Some("test".into()),
 70 |         prefix: None,
 71 |     };
 72 | 
 73 |     // this test will take a lot of time if the `contains()` is linear, and the loop is quadratic
 74 |     for i in 0..50000 {
 75 |         let name = OwnedName {
 76 |             local_name: format!("attr{i}"), namespace: None, prefix: None,
 77 |         };
 78 |         assert!(!s.contains(&name));
 79 | 
 80 |         s.push(OwnedAttribute { name, value: String::new() });
 81 |         assert!(!s.contains(&not_here));
 82 |     }
 83 | 
 84 |     assert!(s.contains(&OwnedName {
 85 |         local_name: "attr1234".into(), namespace: None, prefix: None,
 86 |     }));
 87 |     assert!(s.contains(&OwnedName {
 88 |         local_name: "attr0".into(), namespace: None, prefix: None,
 89 |     }));
 90 |     assert!(s.contains(&OwnedName {
 91 |         local_name: "attr49999".into(), namespace: None, prefix: None,
 92 |     }));
 93 | }
 94 | 
 95 | /// Hashser that does nothing except passing u64 through
 96 | struct U64Hasher(u64);
 97 | 
 98 | impl Hasher for U64Hasher {
 99 |     fn finish(&self) -> u64 { self.0 }
100 |     fn write(&mut self, slice: &[u8]) {
101 |         for &v in slice { self.0 ^= u64::from(v) } // unused in practice
102 |     }
103 |     fn write_u64(&mut self, i: u64) {
104 |         self.0 ^= i;
105 |     }
106 | }
107 | 
108 | #[derive(Default)]
109 | struct U64HasherBuilder;
110 | 
111 | impl BuildHasher for U64HasherBuilder {
112 |     type Hasher = U64Hasher;
113 |     fn build_hasher(&self) -> U64Hasher { U64Hasher(0) }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/reader/parser/inside_cdata.rs:
--------------------------------------------------------------------------------
 1 | use crate::common::is_whitespace_char;
 2 | use crate::reader::error::SyntaxError;
 3 | use crate::reader::events::XmlEvent;
 4 | use crate::reader::lexer::Token;
 5 | 
 6 | use super::{PullParser, Result, State};
 7 | 
 8 | impl PullParser {
 9 |     pub fn inside_cdata(&mut self, t: Token) -> Option<Result> {
10 |         match t {
11 |             Token::CDataEnd => {
12 |                 let event = if self.config.cdata_to_characters {
13 |                     // start called push_pos, but there will be no event to pop it
14 |                     if self.buf.is_empty() {
15 |                         self.next_pos();
16 |                     }
17 |                     None
18 |                 } else {
19 |                     let data = self.take_buf();
20 |                     Some(Ok(XmlEvent::CData(data)))
21 |                 };
22 |                 self.into_state(State::OutsideTag, event)
23 |             },
24 | 
25 |             Token::Character(c) if !self.is_valid_xml_char(c) => {
26 |                 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
27 |             },
28 |             Token::Character(c) => {
29 |                 if !is_whitespace_char(c) {
30 |                     self.inside_whitespace = false;
31 |                 }
32 |                 self.buf.push(c);
33 |                 None
34 |             },
35 |             _ => {
36 |                 debug_assert!(false, "unreachable");
37 |                 None
38 |             },
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/reader/parser/inside_closing_tag_name.rs:
--------------------------------------------------------------------------------
 1 | use super::{ClosingTagSubstate, PullParser, QualifiedNameTarget, Result, State};
 2 | use crate::common::is_whitespace_char;
 3 | use crate::namespace;
 4 | use crate::reader::error::SyntaxError;
 5 | use crate::reader::lexer::Token;
 6 | 
 7 | impl PullParser {
 8 |     pub fn inside_closing_tag_name(&mut self, t: Token, s: ClosingTagSubstate) -> Option<Result> {
 9 |         match s {
10 |             ClosingTagSubstate::CTInsideName => self.read_qualified_name(t, QualifiedNameTarget::ClosingTagNameTarget, |this, token, name| {
11 |                 match name.prefix_ref() {
12 |                     Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
13 |                                     prefix == namespace::NS_XMLNS_PREFIX =>
14 |                         Some(this.error(SyntaxError::InvalidNamePrefix(prefix.into()))),
15 |                     _ => {
16 |                         this.data.element_name = Some(name.clone());
17 |                         match token {
18 |                             Token::TagEnd => this.emit_end_element(),
19 |                             Token::Character(c) if is_whitespace_char(c) => this.into_state_continue(State::InsideClosingTag(ClosingTagSubstate::CTAfterName)),
20 |                             _ => Some(this.error(SyntaxError::UnexpectedTokenInClosingTag(token))),
21 |                         }
22 |                     }
23 |                 }
24 |             }),
25 |             ClosingTagSubstate::CTAfterName => match t {
26 |                 Token::TagEnd => self.emit_end_element(),
27 |                 Token::Character(c) if is_whitespace_char(c) => None, //  Skip whitespace
28 |                 _ => Some(self.error(SyntaxError::UnexpectedTokenInClosingTag(t))),
29 |             },
30 |         }
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/reader/parser/inside_comment.rs:
--------------------------------------------------------------------------------
 1 | use crate::reader::error::SyntaxError;
 2 | use crate::reader::events::XmlEvent;
 3 | use crate::reader::lexer::Token;
 4 | 
 5 | use super::{PullParser, Result, State};
 6 | 
 7 | impl PullParser {
 8 |     pub fn inside_comment(&mut self, t: Token) -> Option<Result> {
 9 |         match t {
10 |             Token::CommentEnd if self.config.ignore_comments => {
11 |                 self.into_state_continue(State::OutsideTag)
12 |             },
13 | 
14 |             Token::CommentEnd => {
15 |                 let data = self.take_buf();
16 |                 self.into_state_emit(State::OutsideTag, Ok(XmlEvent::Comment(data)))
17 |             },
18 | 
19 |             Token::Character(c) if !self.is_valid_xml_char(c) => {
20 |                 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
21 |             },
22 | 
23 |             _ if self.config.ignore_comments => None, // Do not modify buffer if ignoring the comment
24 | 
25 |             _ => {
26 |                 if self.buf.len() > self.config.max_data_length {
27 |                     return Some(self.error(SyntaxError::ExceededConfiguredLimit));
28 |                 }
29 |                 t.push_to_string(&mut self.buf);
30 |                 None
31 |             },
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/reader/parser/inside_declaration.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{is_whitespace_char, XmlVersion};
  2 | use crate::reader::error::SyntaxError;
  3 | use crate::reader::events::XmlEvent;
  4 | use crate::reader::lexer::Token;
  5 | use crate::util::Encoding;
  6 | 
  7 | use super::{
  8 |     DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State,
  9 |     DEFAULT_VERSION,
 10 | };
 11 | 
 12 | impl PullParser {
 13 |     #[inline(never)]
 14 |     fn emit_start_document(&mut self) -> Option<Result> {
 15 |         debug_assert!(self.encountered == Encountered::None);
 16 |         self.encountered = Encountered::Declaration;
 17 | 
 18 |         let version = self.data.version;
 19 |         let encoding = self.data.take_encoding();
 20 |         let standalone = self.data.standalone;
 21 | 
 22 |         if let Some(new_encoding) = encoding.as_deref() {
 23 |             let new_encoding = match new_encoding.parse() {
 24 |                 Ok(e) => e,
 25 |                 Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1,
 26 |                 Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))),
 27 |             };
 28 |             let current_encoding = self.lexer.encoding();
 29 |             if current_encoding != new_encoding {
 30 |                 let set = match (current_encoding, new_encoding) {
 31 |                     (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new,
 32 |                     (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding,
 33 |                     _ if self.config.ignore_invalid_encoding_declarations => current_encoding,
 34 |                     _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))),
 35 |                 };
 36 |                 self.lexer.set_encoding(set);
 37 |             }
 38 |         }
 39 | 
 40 |         let current_encoding = self.lexer.encoding();
 41 |         self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
 42 |             version: version.unwrap_or(DEFAULT_VERSION),
 43 |             encoding: encoding.unwrap_or_else(move || current_encoding.to_string()),
 44 |             standalone
 45 |         }))
 46 |     }
 47 | 
 48 |     // TODO: remove redundancy via macros or extra methods
 49 |     pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
 50 | 
 51 |         match s {
 52 |             DeclarationSubstate::BeforeVersion => match t {
 53 |                 Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
 54 |                 Token::Character(c) if is_whitespace_char(c) => None,  // continue
 55 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
 56 |             },
 57 | 
 58 |             DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
 59 |                 match &*name.local_name {
 60 |                     "ersion" if name.namespace.is_none() =>
 61 |                         this.into_state_continue(State::InsideDeclaration(
 62 |                             if token == Token::EqualsSign {
 63 |                                 DeclarationSubstate::InsideVersionValue
 64 |                             } else {
 65 |                                 DeclarationSubstate::AfterVersion
 66 |                             }
 67 |                         )),
 68 |                     _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))),
 69 |                 }
 70 |             }),
 71 | 
 72 |             DeclarationSubstate::AfterVersion => match t {
 73 |                 Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
 74 |                 Token::Character(c) if is_whitespace_char(c) => None,
 75 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
 76 |             },
 77 | 
 78 |             DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
 79 |                 this.data.version = match &*value {
 80 |                     "1.0" => Some(XmlVersion::Version10),
 81 |                     "1.1" => Some(XmlVersion::Version11),
 82 |                     _     => None
 83 |                 };
 84 |                 if this.data.version.is_some() {
 85 |                     this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
 86 |                 } else {
 87 |                     Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into())))
 88 |                 }
 89 |             }),
 90 | 
 91 |             DeclarationSubstate::AfterVersionValue => match t {
 92 |                 Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)),
 93 |                 Token::ProcessingInstructionEnd => self.emit_start_document(),
 94 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
 95 |             },
 96 | 
 97 |             DeclarationSubstate::BeforeEncoding => match t {
 98 |                 Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
 99 |                 Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
100 |                 Token::ProcessingInstructionEnd => self.emit_start_document(),
101 |                 Token::Character(c) if is_whitespace_char(c) => None,  // skip whitespace
102 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
103 |             },
104 | 
105 |             DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
106 |                 match &*name.local_name {
107 |                     "ncoding" if name.namespace.is_none() =>
108 |                         this.into_state_continue(State::InsideDeclaration(
109 |                             if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
110 |                         )),
111 |                     _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into())))
112 |                 }
113 |             }),
114 | 
115 |             DeclarationSubstate::AfterEncoding => match t {
116 |                 Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
117 |                 Token::Character(c) if is_whitespace_char(c) => None,
118 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
119 |             },
120 | 
121 |             DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
122 |                 this.data.encoding = Some(value);
123 |                 this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue))
124 |             }),
125 | 
126 |             DeclarationSubstate::AfterEncodingValue => match t {
127 |                 Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)),
128 |                 Token::ProcessingInstructionEnd => self.emit_start_document(),
129 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
130 |             },
131 | 
132 |             DeclarationSubstate::BeforeStandaloneDecl => match t {
133 |                 Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
134 |                 Token::ProcessingInstructionEnd => self.emit_start_document(),
135 |                 Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
136 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
137 |             },
138 | 
139 |             DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
140 |                 match &*name.local_name {
141 |                     "tandalone" if name.namespace.is_none() =>
142 |                         this.into_state_continue(State::InsideDeclaration(
143 |                             if token == Token::EqualsSign {
144 |                                 DeclarationSubstate::InsideStandaloneDeclValue
145 |                             } else {
146 |                                 DeclarationSubstate::AfterStandaloneDecl
147 |                             }
148 |                         )),
149 |                     _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))),
150 |                 }
151 |             }),
152 | 
153 |             DeclarationSubstate::AfterStandaloneDecl => match t {
154 |                 Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
155 |                 Token::Character(c) if is_whitespace_char(c) => None,
156 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
157 |             },
158 | 
159 |             DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
160 |                 let standalone = match &*value {
161 |                     "yes" => Some(true),
162 |                     "no"  => Some(false),
163 |                     _     => None
164 |                 };
165 |                 if standalone.is_some() {
166 |                     this.data.standalone = standalone;
167 |                     this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
168 |                 } else {
169 |                     Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into())))
170 |                 }
171 |             }),
172 | 
173 |             DeclarationSubstate::AfterStandaloneDeclValue => match t {
174 |                 Token::ProcessingInstructionEnd => self.emit_start_document(),
175 |                 Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
176 |                 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
177 |             },
178 |         }
179 |     }
180 | }
181 | 


--------------------------------------------------------------------------------
/src/reader/parser/inside_opening_tag.rs:
--------------------------------------------------------------------------------
  1 | use crate::attribute::OwnedAttribute;
  2 | use crate::common::{is_name_start_char, is_whitespace_char};
  3 | use crate::namespace;
  4 | use crate::reader::error::SyntaxError;
  5 | 
  6 | use crate::reader::lexer::Token;
  7 | 
  8 | use super::{OpeningTagSubstate, PullParser, QualifiedNameTarget, Result, State};
  9 | 
 10 | impl PullParser {
 11 |     pub fn inside_opening_tag(&mut self, t: Token, s: OpeningTagSubstate) -> Option<Result> {
 12 |         let max_attrs = self.config.max_attributes;
 13 |         match s {
 14 |             OpeningTagSubstate::InsideName => self.read_qualified_name(t, QualifiedNameTarget::OpeningTagNameTarget, |this, token, name| {
 15 |                 match name.prefix_ref() {
 16 |                     Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
 17 |                                     prefix == namespace::NS_XMLNS_PREFIX =>
 18 |                         Some(this.error(SyntaxError::InvalidNamePrefix(prefix.into()))),
 19 |                     _ => {
 20 |                         this.data.element_name = Some(name.clone());
 21 |                         match token {
 22 |                             Token::TagEnd => this.emit_start_element(false),
 23 |                             Token::EmptyTagEnd => this.emit_start_element(true),
 24 |                             Token::Character(c) if is_whitespace_char(c) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag)),
 25 |                             _ => {
 26 |                                 debug_assert!(false, "unreachable");
 27 |                                 None
 28 |                             },
 29 |                         }
 30 |                     }
 31 |                 }
 32 |             }),
 33 | 
 34 |             OpeningTagSubstate::InsideTag => match t {
 35 |                 Token::TagEnd => self.emit_start_element(false),
 36 |                 Token::EmptyTagEnd => self.emit_start_element(true),
 37 |                 Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
 38 |                 Token::Character(c) if is_name_start_char(c) => {
 39 |                     if self.buf.len() > self.config.max_name_length {
 40 |                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
 41 |                     }
 42 |                     self.buf.push(c);
 43 |                     self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeName))
 44 |                 },
 45 |                 _ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t))),
 46 |             },
 47 | 
 48 |             OpeningTagSubstate::InsideAttributeName => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
 49 |                 // check that no attribute with such name is already present
 50 |                 // if there is one, XML is not well-formed
 51 |                 if this.data.attributes.contains(&name) {
 52 |                     return Some(this.error(SyntaxError::RedefinedAttribute(name.to_string().into())))
 53 |                 }
 54 | 
 55 |                 this.data.attr_name = Some(name);
 56 |                 match token {
 57 |                     Token::EqualsSign => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
 58 |                     Token::Character(c) if is_whitespace_char(c) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeName)),
 59 |                     _ => Some(this.error(SyntaxError::UnexpectedTokenInOpeningTag(t))) // likely unreachable
 60 |                 }
 61 |             }),
 62 | 
 63 |             OpeningTagSubstate::AfterAttributeName => match t {
 64 |                 Token::EqualsSign => self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
 65 |                 Token::Character(c) if is_whitespace_char(c) => None,
 66 |                 _ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t)))
 67 |             },
 68 | 
 69 |             OpeningTagSubstate::InsideAttributeValue => self.read_attribute_value(t, |this, value| {
 70 |                 let name = this.data.take_attr_name()?;  // will always succeed here
 71 |                 match name.prefix_ref() {
 72 |                     // declaring a new prefix; it is sufficient to check prefix only
 73 |                     // because "xmlns" prefix is reserved
 74 |                     Some(namespace::NS_XMLNS_PREFIX) => {
 75 |                         let ln = &*name.local_name;
 76 |                         if ln == namespace::NS_XMLNS_PREFIX {
 77 |                             Some(this.error(SyntaxError::CannotRedefineXmlnsPrefix))
 78 |                         } else if ln == namespace::NS_XML_PREFIX && &*value != namespace::NS_XML_URI {
 79 |                             Some(this.error(SyntaxError::CannotRedefineXmlPrefix))
 80 |                         } else if value.is_empty() {
 81 |                             Some(this.error(SyntaxError::CannotUndefinePrefix(ln.into())))
 82 |                         } else {
 83 |                             this.nst.put(name.local_name.clone(), value);
 84 |                             this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeValue))
 85 |                         }
 86 |                     },
 87 | 
 88 |                     // declaring default namespace
 89 |                     None if &*name.local_name == namespace::NS_XMLNS_PREFIX =>
 90 |                         match &*value {
 91 |                             namespace::NS_XMLNS_PREFIX | namespace::NS_XML_PREFIX | namespace::NS_XML_URI | namespace::NS_XMLNS_URI =>
 92 |                                 Some(this.error(SyntaxError::InvalidDefaultNamespace(value.into()))),
 93 |                             _ => {
 94 |                                 this.nst.put(namespace::NS_NO_PREFIX, value.clone());
 95 |                                 this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeValue))
 96 |                             }
 97 |                         },
 98 | 
 99 |                     // regular attribute
100 |                     _ => {
101 |                         if this.data.attributes.len() >= max_attrs {
102 |                             return Some(this.error(SyntaxError::ExceededConfiguredLimit));
103 |                         }
104 |                         this.data.attributes.push(OwnedAttribute { name, value });
105 |                         this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeValue))
106 |                     },
107 |                 }
108 |             }),
109 | 
110 |             OpeningTagSubstate::AfterAttributeValue => match t {
111 |                 Token::Character(c) if is_whitespace_char(c) => {
112 |                     self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
113 |                 },
114 |                 Token::TagEnd => self.emit_start_element(false),
115 |                 Token::EmptyTagEnd => self.emit_start_element(true),
116 |                 _ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t))),
117 |             },
118 |         }
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/reader/parser/inside_processing_instruction.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
  2 | use crate::reader::error::SyntaxError;
  3 | 
  4 | use crate::reader::events::XmlEvent;
  5 | use crate::reader::lexer::Token;
  6 | 
  7 | use super::{DeclarationSubstate, Encountered, ProcessingInstructionSubstate, PullParser, Result, State};
  8 | 
  9 | impl PullParser {
 10 |     pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
 11 |         match s {
 12 |             ProcessingInstructionSubstate::PIInsideName => match t {
 13 |                 Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) ||
 14 |                                  self.buf_has_data() && is_name_char(c) => {
 15 |                     if self.buf.len() > self.config.max_name_length {
 16 |                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
 17 |                     }
 18 |                     self.buf.push(c);
 19 |                     None
 20 |                 },
 21 | 
 22 |                 Token::ProcessingInstructionEnd => {
 23 |                     // self.buf contains PI name
 24 |                     let name = self.take_buf();
 25 | 
 26 |                     // Don't need to check for declaration because it has mandatory attributes
 27 |                     // but there is none
 28 |                     match &*name {
 29 |                         // Name is empty, it is an error
 30 |                         "" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)),
 31 | 
 32 |                         // Found <?xml-like PI not at the beginning of a document,
 33 |                         // it is an error - see section 2.6 of XML 1.1 spec
 34 |                         n if "xml".eq_ignore_ascii_case(n) =>
 35 |                             Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
 36 | 
 37 |                         // All is ok, emitting event
 38 |                         _ => {
 39 |                             debug_assert!(self.next_event.is_none(), "{:?}", self.next_event);
 40 |                             // can't have a PI before `<?xml`
 41 |                             let event1 = self.set_encountered(Encountered::Declaration);
 42 |                             let event2 = Some(Ok(XmlEvent::ProcessingInstruction {
 43 |                                 name,
 44 |                                 data: None
 45 |                             }));
 46 |                             // emitting two events at once is cumbersome
 47 |                             let event1 = if event1.is_some() {
 48 |                                 self.next_event = event2;
 49 |                                 event1
 50 |                             } else {
 51 |                                 event2
 52 |                             };
 53 |                             self.into_state(State::OutsideTag, event1)
 54 |                         },
 55 |                     }
 56 |                 },
 57 | 
 58 |                 Token::Character(c) if is_whitespace_char(c) => {
 59 |                     // self.buf contains PI name
 60 |                     let name = self.take_buf();
 61 | 
 62 |                     match &*name {
 63 |                         // We have not ever encountered an element and have not parsed XML declaration
 64 |                         "xml" if self.encountered == Encountered::None =>
 65 |                             self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
 66 | 
 67 |                         // Found <?xml-like PI after the beginning of a document,
 68 |                         // it is an error - see section 2.6 of XML 1.1 spec
 69 |                         n if "xml".eq_ignore_ascii_case(n) =>
 70 |                             Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
 71 | 
 72 |                         // All is ok, starting parsing PI data
 73 |                         _ => {
 74 |                             self.data.name = name;
 75 |                             // can't have a PI before `<?xml`
 76 |                             let next_event = self.set_encountered(Encountered::Declaration);
 77 |                             self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event)
 78 |                         },
 79 |                     }
 80 |                 },
 81 | 
 82 |                 _ => {
 83 |                     let buf = self.take_buf();
 84 |                     Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t)))
 85 |                 },
 86 |             },
 87 | 
 88 |             ProcessingInstructionSubstate::PIInsideData => match t {
 89 |                 Token::ProcessingInstructionEnd => {
 90 |                     let name = self.data.take_name();
 91 |                     let data = self.take_buf();
 92 |                     self.into_state_emit(
 93 |                         State::OutsideTag,
 94 |                         Ok(XmlEvent::ProcessingInstruction { name, data: Some(data) }),
 95 |                     )
 96 |                 },
 97 | 
 98 |                 Token::Character(c) if !self.is_valid_xml_char(c) => {
 99 |                     Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
100 |                 },
101 | 
102 |                 // Any other token should be treated as plain characters
103 |                 _ => {
104 |                     if self.buf.len() > self.config.max_data_length {
105 |                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
106 |                     }
107 |                     t.push_to_string(&mut self.buf);
108 |                     None
109 |                 },
110 |             },
111 |         }
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/reader/parser/inside_reference.rs:
--------------------------------------------------------------------------------
 1 | use super::{PullParser, Result, State};
 2 | use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
 3 | use crate::reader::error::SyntaxError;
 4 | use crate::reader::lexer::Token;
 5 | use std::char;
 6 | 
 7 | impl PullParser {
 8 |     pub fn inside_reference(&mut self, t: Token) -> Option<Result> {
 9 |         match t {
10 |             Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
11 |                              self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
12 |                 self.data.ref_data.push(c);
13 |                 None
14 |             },
15 | 
16 |             Token::ReferenceEnd => {
17 |                 let name = self.data.take_ref_data();
18 |                 if name.is_empty() {
19 |                     return Some(self.error(SyntaxError::EmptyEntity));
20 |                 }
21 | 
22 |                 let c = match &*name {
23 |                     "lt"   => Some('<'),
24 |                     "gt"   => Some('>'),
25 |                     "amp"  => Some('&'),
26 |                     "apos" => Some('\''),
27 |                     "quot" => Some('"'),
28 |                     _ if name.starts_with('#') => match self.numeric_reference_from_str(&name[1..]) {
29 |                         Ok(c) => Some(c),
30 |                         Err(e) => return Some(self.error(e)),
31 |                     },
32 |                     _ => None,
33 |                 };
34 |                 if let Some(c) = c {
35 |                     self.buf.push(c);
36 |                 } else if let Some(v) = self.config.extra_entities.get(&name) {
37 |                     self.buf.push_str(v);
38 |                 } else if let Some(v) = self.entities.get(&name) {
39 |                     if self.state_after_reference == State::OutsideTag {
40 |                         // an entity can expand to *elements*, so outside of a tag it needs a full reparse
41 |                         if let Err(e) = self.lexer.reparse(v) {
42 |                             return Some(Err(e));
43 |                         }
44 |                     } else {
45 |                         // however, inside attributes it's not allowed to affect attribute quoting,
46 |                         // so it can't be fed to the lexer
47 |                         self.buf.push_str(v);
48 |                     }
49 |                 } else {
50 |                     return Some(self.error(SyntaxError::UnexpectedEntity(name.into())));
51 |                 }
52 |                 let prev_st = self.state_after_reference;
53 |                 if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or('\0')) {
54 |                     self.inside_whitespace = false;
55 |                 }
56 |                 self.into_state_continue(prev_st)
57 |             },
58 | 
59 |             _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
60 |         }
61 |     }
62 | 
63 |     pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> {
64 |         let val = if let Some(hex) = num_str.strip_prefix('x') {
65 |             u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
66 |         } else {
67 |             num_str.parse::<u32>().map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
68 |         };
69 |         match char::from_u32(val) {
70 |             Some(c) if self.is_valid_xml_char(c) => Ok(c),
71 |             Some(_) if self.config.replace_unknown_entity_references => Ok('\u{fffd}'),
72 |             None if self.config.replace_unknown_entity_references => Ok('\u{fffd}'),
73 |             _ => Err(SyntaxError::InvalidCharacterEntity(val)),
74 |         }
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/reader/parser/outside_tag.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::is_whitespace_char;
  2 | use crate::reader::error::SyntaxError;
  3 | use crate::reader::events::XmlEvent;
  4 | use crate::reader::lexer::Token;
  5 | 
  6 | use super::{
  7 |     ClosingTagSubstate, DoctypeSubstate, Encountered, OpeningTagSubstate,
  8 |     ProcessingInstructionSubstate, PullParser, Result, State,
  9 | };
 10 | 
 11 | impl PullParser {
 12 |     pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
 13 |         match t {
 14 |             Token::Character(c) => {
 15 |                 if is_whitespace_char(c) {
 16 |                     // skip whitespace outside of the root element
 17 |                     if (self.config.trim_whitespace && self.buf.is_empty()) ||
 18 |                         (self.depth() == 0 && self.config.ignore_root_level_whitespace) {
 19 |                             return None;
 20 |                     }
 21 |                 } else {
 22 |                     self.inside_whitespace = false;
 23 |                     if self.depth() == 0 {
 24 |                         return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
 25 |                     }
 26 |                 }
 27 | 
 28 |                 if !self.is_valid_xml_char_not_restricted(c) {
 29 |                     return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
 30 |                 }
 31 | 
 32 |                 if self.buf.is_empty() {
 33 |                     self.push_pos();
 34 |                 } else if self.buf.len() > self.config.max_data_length {
 35 |                     return Some(self.error(SyntaxError::ExceededConfiguredLimit));
 36 |                 }
 37 |                 self.buf.push(c);
 38 |                 None
 39 |             },
 40 | 
 41 |             Token::CommentEnd | Token::TagEnd | Token::EqualsSign |
 42 |             Token::DoubleQuote | Token::SingleQuote |
 43 |             Token::ProcessingInstructionEnd | Token::EmptyTagEnd => {
 44 |                 if self.depth() == 0 {
 45 |                     return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
 46 |                 }
 47 |                 self.inside_whitespace = false;
 48 | 
 49 |                 if let Some(s) = t.as_static_str() {
 50 |                     if self.buf.is_empty() {
 51 |                         self.push_pos();
 52 |                     } else if self.buf.len() > self.config.max_data_length {
 53 |                         return Some(self.error(SyntaxError::ExceededConfiguredLimit));
 54 |                     }
 55 | 
 56 |                     self.buf.push_str(s);
 57 |                 }
 58 |                 None
 59 |             },
 60 | 
 61 |             Token::ReferenceStart if self.depth() > 0 => {
 62 |                 self.state_after_reference = State::OutsideTag;
 63 |                 self.into_state_continue(State::InsideReference)
 64 |             },
 65 | 
 66 |             Token::ReferenceEnd if self.depth() > 0 => { // Semi-colon in a text outside an entity
 67 |                 self.inside_whitespace = false;
 68 |                 if self.buf.len() > self.config.max_data_length {
 69 |                     return Some(self.error(SyntaxError::ExceededConfiguredLimit));
 70 |                 }
 71 |                 Token::ReferenceEnd.push_to_string(&mut self.buf);
 72 |                 None
 73 |             },
 74 | 
 75 |             Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => {
 76 |                 let next_event = self.set_encountered(Encountered::Comment);
 77 |                 // We need to switch the lexer into a comment mode inside comments
 78 |                 self.into_state(State::InsideComment, next_event)
 79 |             }
 80 | 
 81 |             Token::CDataStart if self.depth() > 0 && self.config.coalesce_characters && self.config.cdata_to_characters => {
 82 |                 if self.buf.is_empty() {
 83 |                     self.push_pos(); // CDataEnd will pop pos if the buffer remains empty
 84 |                 }
 85 |                 // if coalescing chars, continue without event
 86 |                 self.into_state_continue(State::InsideCData)
 87 |             },
 88 | 
 89 |             _ => {
 90 |                 // Encountered some markup event, flush the buffer as characters
 91 |                 // or a whitespace
 92 |                 let mut next_event = if self.buf_has_data() {
 93 |                     let buf = self.take_buf();
 94 |                     if self.inside_whitespace && self.config.trim_whitespace {
 95 |                         // there will be no event emitted for this, but start of buffering has pushed a pos
 96 |                         self.next_pos();
 97 |                         None
 98 |                     } else if self.inside_whitespace && !self.config.whitespace_to_characters {
 99 |                         debug_assert!(buf.chars().all(|ch| ch.is_whitespace()), "ws={buf:?}");
100 |                         Some(Ok(XmlEvent::Whitespace(buf)))
101 |                     } else if self.config.trim_whitespace {
102 |                         Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
103 |                     } else {
104 |                         Some(Ok(XmlEvent::Characters(buf)))
105 |                     }
106 |                 } else { None };
107 |                 self.inside_whitespace = true;  // Reset inside_whitespace flag
108 | 
109 |                 // pos is popped whenever an event is emitted, so pushes must happen only if there will be an event to balance it
110 |                 // and ignored comments don't pop
111 |                 if t != Token::CommentStart || !self.config.ignore_comments {
112 |                     self.push_pos();
113 |                 }
114 |                 match t {
115 |                     Token::OpeningTagStart if self.depth() > 0 || self.encountered < Encountered::Element || self.config.allow_multiple_root_elements => {
116 |                         if let Some(e) = self.set_encountered(Encountered::Element) {
117 |                             next_event = Some(e);
118 |                         }
119 |                         self.nst.push_empty();
120 |                         self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
121 |                     },
122 | 
123 |                     Token::ClosingTagStart if self.depth() > 0 =>
124 |                         self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
125 | 
126 |                     Token::CommentStart => {
127 |                         if let Some(e) = self.set_encountered(Encountered::Comment) {
128 |                             next_event = Some(e);
129 |                         }
130 |                         // We need to switch the lexer into a comment mode inside comments
131 |                         self.into_state(State::InsideComment, next_event)
132 |                     },
133 | 
134 |                     Token::DoctypeStart if self.encountered < Encountered::Doctype => {
135 |                         if let Some(e) = self.set_encountered(Encountered::Doctype) {
136 |                             next_event = Some(e);
137 |                         }
138 |                         self.data.doctype = Some(Token::DoctypeStart.to_string());
139 | 
140 |                         self.push_pos();
141 |                         self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
142 |                     },
143 | 
144 |                     Token::ProcessingInstructionStart =>
145 |                         self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
146 | 
147 |                     Token::CDataStart if self.depth() > 0 => {
148 |                         self.into_state(State::InsideCData, next_event)
149 |                     },
150 | 
151 |                     _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
152 |                 }
153 |             },
154 |         }
155 |     }
156 | 
157 |     pub fn document_start(&mut self, t: Token) -> Option<Result> {
158 |         debug_assert!(self.encountered < Encountered::Declaration);
159 | 
160 |         match t {
161 |             Token::Character(c) => {
162 |                 let next_event = self.set_encountered(Encountered::AnyChars);
163 | 
164 |                 if !is_whitespace_char(c) {
165 |                     return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
166 |                 }
167 |                 self.inside_whitespace = true;
168 | 
169 |                 // skip whitespace outside of the root element
170 |                 if (self.config.trim_whitespace && self.buf.is_empty()) ||
171 |                     (self.depth() == 0 && self.config.ignore_root_level_whitespace) {
172 |                         return self.into_state(State::OutsideTag, next_event);
173 |                 }
174 | 
175 |                 self.push_pos();
176 |                 self.buf.push(c);
177 |                 self.into_state(State::OutsideTag, next_event)
178 |             },
179 | 
180 |             Token::CommentStart => {
181 |                 let next_event = self.set_encountered(Encountered::Comment);
182 |                 self.into_state(State::InsideComment, next_event)
183 |             },
184 | 
185 |             Token::OpeningTagStart => {
186 |                 let next_event = self.set_encountered(Encountered::Element);
187 |                 self.nst.push_empty();
188 |                 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
189 |             },
190 | 
191 |             Token::DoctypeStart => {
192 |                 let next_event = self.set_encountered(Encountered::Doctype);
193 |                 self.data.doctype = Some(Token::DoctypeStart.to_string());
194 | 
195 |                 self.push_pos();
196 |                 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
197 |             },
198 | 
199 |             Token::ProcessingInstructionStart => {
200 |                 self.push_pos();
201 |                 self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName))
202 |             },
203 | 
204 |             _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
205 |         }
206 |     }
207 | }
208 | 


--------------------------------------------------------------------------------
/src/writer.rs:
--------------------------------------------------------------------------------
 1 | //! Contains high-level interface for an events-based XML emitter.
 2 | //!
 3 | //! The most important type in this module is `EventWriter` which allows writing an XML document
 4 | //! to some output stream.
 5 | 
 6 | pub use self::config::EmitterConfig;
 7 | pub use self::emitter::EmitterError as Error;
 8 | pub use self::emitter::Result;
 9 | pub use self::events::XmlEvent;
10 | 
11 | use self::emitter::Emitter;
12 | 
13 | use std::io::prelude::*;
14 | 
15 | mod config;
16 | mod emitter;
17 | pub mod events;
18 | 
19 | /// A wrapper around an `std::io::Write` instance which emits XML document according to provided
20 | /// events.
21 | pub struct EventWriter<W> {
22 |     sink: W,
23 |     emitter: Emitter,
24 | }
25 | 
26 | impl<W: Write> EventWriter<W> {
27 |     /// Creates a new `EventWriter` out of an `std::io::Write` instance using the default
28 |     /// configuration.
29 |     #[inline]
30 |     pub fn new(sink: W) -> Self {
31 |         Self::new_with_config(sink, EmitterConfig::new())
32 |     }
33 | 
34 |     /// Creates a new `EventWriter` out of an `std::io::Write` instance using the provided
35 |     /// configuration.
36 |     #[inline]
37 |     pub fn new_with_config(sink: W, config: EmitterConfig) -> Self {
38 |         Self {
39 |             sink,
40 |             emitter: Emitter::new(config),
41 |         }
42 |     }
43 | 
44 |     /// Writes the next piece of XML document according to the provided event.
45 |     ///
46 |     /// Note that output data may not exactly correspond to the written event because
47 |     /// of various configuration options. For example, `XmlEvent::EndElement` may
48 |     /// correspond to a separate closing element or it may cause writing an empty element.
49 |     /// Another example is that `XmlEvent::CData` may be represented as characters in
50 |     /// the output stream.
51 |     pub fn write<'a, E>(&mut self, event: E) -> Result<()> where E: Into<XmlEvent<'a>> {
52 |         match event.into() {
53 |             XmlEvent::StartDocument { version, encoding, standalone } =>
54 |                 self.emitter.emit_start_document(&mut self.sink, version, encoding.unwrap_or("UTF-8"), standalone),
55 |             XmlEvent::ProcessingInstruction { name, data } =>
56 |                 self.emitter.emit_processing_instruction(&mut self.sink, name, data),
57 |             XmlEvent::StartElement { name, attributes, namespace } => {
58 |                 self.emitter.namespace_stack_mut().push_empty().checked_target().extend(namespace.as_ref());
59 |                 self.emitter.emit_start_element(&mut self.sink, name, &attributes)
60 |             },
61 |             XmlEvent::EndElement { name } => {
62 |                 let r = self.emitter.emit_end_element(&mut self.sink, name);
63 |                 self.emitter.namespace_stack_mut().try_pop();
64 |                 r
65 |             },
66 |             XmlEvent::Comment(content) => self.emitter.emit_comment(&mut self.sink, content),
67 |             XmlEvent::CData(content) => self.emitter.emit_cdata(&mut self.sink, content),
68 |             XmlEvent::Characters(content) => self.emitter.emit_characters(&mut self.sink, content),
69 |             XmlEvent::RawCharacters(content) => self.emitter.emit_raw_characters(&mut self.sink, content),
70 |             XmlEvent::Doctype(content) => self.emitter.emit_raw_characters(&mut self.sink, content),
71 |         }
72 |     }
73 | 
74 |     /// Returns a mutable reference to the underlying `Writer`.
75 |     ///
76 |     /// Note that having a reference to the underlying sink makes it very easy to emit invalid XML
77 |     /// documents. Use this method with care. Valid use cases for this method include accessing
78 |     /// methods like `Write::flush`, which do not emit new data but rather change the state
79 |     /// of the stream itself.
80 |     pub fn inner_mut(&mut self) -> &mut W {
81 |         &mut self.sink
82 |     }
83 | 
84 |     /// Returns an immutable reference to the underlying `Writer`.
85 |     pub fn inner_ref(&self) -> &W {
86 |         &self.sink
87 |     }
88 | 
89 |     /// Unwraps this `EventWriter`, returning the underlying writer.
90 |     ///
91 |     /// Note that this is a destructive operation: unwrapping a writer and then wrapping
92 |     /// it again with `EventWriter::new()` will create a fresh writer whose state will be
93 |     /// blank; for example, accumulated namespaces will be reset.
94 |     pub fn into_inner(self) -> W {
95 |         self.sink
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/src/writer/config.rs:
--------------------------------------------------------------------------------
  1 | //! Contains emitter configuration structure.
  2 | 
  3 | use crate::writer::EventWriter;
  4 | use std::borrow::Cow;
  5 | use std::io::Write;
  6 | 
  7 | /// Emitter configuration structure.
  8 | ///
  9 | /// This structure contains various options which control XML document emitter behavior.
 10 | #[derive(Clone, PartialEq, Eq, Debug)]
 11 | pub struct EmitterConfig {
 12 |     /// Line separator used to separate lines in formatted output. Default is `"\n"`.
 13 |     pub line_separator: Cow<'static, str>,
 14 | 
 15 |     /// A string which will be used for a single level of indentation. Default is `"  "`
 16 |     /// (two spaces).
 17 |     pub indent_string: Cow<'static, str>,
 18 | 
 19 |     /// Whether or not the emitted document should be indented. Default is false.
 20 |     ///
 21 |     /// The emitter is capable to perform automatic indentation of the emitted XML document.
 22 |     /// It is done in stream-like fashion and does not require the knowledge of the whole
 23 |     /// document in advance.
 24 |     ///
 25 |     /// Sometimes, however, automatic indentation is undesirable, e.g. when you want to keep
 26 |     /// existing layout when processing an existing XML document. Also the indentiation algorithm
 27 |     /// is not thoroughly tested. Hence by default it is disabled.
 28 |     pub perform_indent: bool,
 29 | 
 30 |     /// Whether or not characters in output events will be escaped. Default is true.
 31 |     ///
 32 |     /// The emitter can automatically escape characters which can't appear in PCDATA sections
 33 |     /// or element attributes of an XML document, like `<` or `"` (in attributes). This may
 34 |     /// introduce some overhead because then every corresponding piece of character data
 35 |     /// should be scanned for invalid characters.
 36 |     ///
 37 |     /// If this option is disabled, the XML writer may produce non-well-formed documents, so
 38 |     /// use `false` value for this option with care.
 39 |     pub perform_escaping: bool,
 40 | 
 41 |     /// Whether or not to write XML document declaration at the beginning of a document.
 42 |     /// Default is true.
 43 |     ///
 44 |     /// This option controls whether the document declaration should be emitted automatically
 45 |     /// before a root element is written if it was not emitted explicitly by the user.
 46 |     pub write_document_declaration: bool,
 47 | 
 48 |     /// Whether or not to convert elements with empty content to empty elements. Default is true.
 49 |     ///
 50 |     /// This option allows turning elements like `<a></a>` (an element with empty content)
 51 |     /// into `<a />` (an empty element).
 52 |     pub normalize_empty_elements: bool,
 53 | 
 54 |     /// Whether or not to emit CDATA events as plain characters. Default is false.
 55 |     ///
 56 |     /// This option forces the emitter to convert CDATA events into regular character events,
 57 |     /// performing all the necessary escaping beforehand. This may be occasionally useful
 58 |     /// for feeding the document into incorrect parsers which do not support CDATA.
 59 |     pub cdata_to_characters: bool,
 60 | 
 61 |     /// Whether or not to keep element names to support `EndElement` events without explicit names.
 62 |     /// Default is true.
 63 |     ///
 64 |     /// This option makes the emitter to keep names of written elements in order to allow
 65 |     /// omitting names when writing closing element tags. This could incur some memory overhead.
 66 |     pub keep_element_names_stack: bool,
 67 | 
 68 |     /// Whether or not to automatically insert leading and trailing spaces in emitted comments,
 69 |     /// if necessary. Default is true.
 70 |     ///
 71 |     /// This is a convenience option in order for the user not to append spaces before and after
 72 |     /// comments text in order to get more pretty comments: `<!-- something -->` instead of
 73 |     /// `<!--something-->`.
 74 |     pub autopad_comments: bool,
 75 | 
 76 |     /// Whether or not to automatically insert spaces before the trailing `/>` in self-closing
 77 |     /// elements. Default is true.
 78 |     ///
 79 |     /// This option is only meaningful if `normalize_empty_elements` is true. For example, the
 80 |     /// element `<a></a>` would be unaffected. When `normalize_empty_elements` is true, then when
 81 |     /// this option is also true, the same element would appear `<a />`. If this option is false,
 82 |     /// then the same element would appear `<a/>`.
 83 |     pub pad_self_closing: bool,
 84 | }
 85 | 
 86 | impl EmitterConfig {
 87 |     /// Creates an emitter configuration with default values.
 88 |     ///
 89 |     /// You can tweak default options with builder-like pattern:
 90 |     ///
 91 |     /// ```rust
 92 |     /// use xml::writer::EmitterConfig;
 93 |     ///
 94 |     /// let config = EmitterConfig::new()
 95 |     ///     .line_separator("\r\n")
 96 |     ///     .perform_indent(true)
 97 |     ///     .normalize_empty_elements(false);
 98 |     /// ```
 99 |     #[inline]
100 |     #[must_use]
101 |     pub fn new() -> Self {
102 |         Self {
103 |             line_separator: "\n".into(),
104 |             indent_string: "  ".into(), // two spaces
105 |             perform_indent: false,
106 |             perform_escaping: true,
107 |             write_document_declaration: true,
108 |             normalize_empty_elements: true,
109 |             cdata_to_characters: false,
110 |             keep_element_names_stack: true,
111 |             autopad_comments: true,
112 |             pad_self_closing: true,
113 |         }
114 |     }
115 | 
116 |     /// Creates an XML writer with this configuration.
117 |     ///
118 |     /// This is a convenience method for configuring and creating a writer at the same time:
119 |     ///
120 |     /// ```rust
121 |     /// use xml::writer::EmitterConfig;
122 |     ///
123 |     /// let mut target: Vec<u8> = Vec::new();
124 |     ///
125 |     /// let writer = EmitterConfig::new()
126 |     ///     .line_separator("\r\n")
127 |     ///     .perform_indent(true)
128 |     ///     .normalize_empty_elements(false)
129 |     ///     .create_writer(&mut target);
130 |     /// ```
131 |     ///
132 |     /// This method is exactly equivalent to calling `EventWriter::new_with_config()` with
133 |     /// this configuration object.
134 |     #[inline]
135 |     pub fn create_writer<W: Write>(self, sink: W) -> EventWriter<W> {
136 |         EventWriter::new_with_config(sink, self)
137 |     }
138 | }
139 | 
140 | impl Default for EmitterConfig {
141 |     #[inline]
142 |     fn default() -> Self {
143 |         Self::new()
144 |     }
145 | }
146 | 
147 | gen_setters!(EmitterConfig,
148 |     line_separator: into Cow<'static, str>,
149 |     indent_string: into Cow<'static, str>,
150 |     perform_indent: val bool,
151 |     write_document_declaration: val bool,
152 |     normalize_empty_elements: val bool,
153 |     cdata_to_characters: val bool,
154 |     keep_element_names_stack: val bool,
155 |     autopad_comments: val bool,
156 |     pad_self_closing: val bool
157 | );
158 | 


--------------------------------------------------------------------------------
/src/writer/events.rs:
--------------------------------------------------------------------------------
  1 | //! Contains `XmlEvent` datatype, instances of which are consumed by the writer.
  2 | 
  3 | use std::borrow::Cow;
  4 | 
  5 | use crate::attribute::Attribute;
  6 | use crate::common::XmlVersion;
  7 | use crate::name::Name;
  8 | use crate::namespace::{Namespace, NS_NO_PREFIX};
  9 | use crate::reader::ErrorKind;
 10 | 
 11 | /// A part of an XML output stream.
 12 | ///
 13 | /// Objects of this enum are consumed by `EventWriter`. They correspond to different parts of
 14 | /// an XML document.
 15 | #[derive(Debug, Clone)]
 16 | #[non_exhaustive]
 17 | pub enum XmlEvent<'a> {
 18 |     /// Corresponds to XML document declaration.
 19 |     ///
 20 |     /// This event should always be written before any other event. If it is not written
 21 |     /// at all, a default XML declaration will be outputted if the corresponding option
 22 |     /// is set in the configuration. Otherwise an error will be returned.
 23 |     StartDocument {
 24 |         /// XML version.
 25 |         ///
 26 |         /// Defaults to `XmlVersion::Version10`.
 27 |         version: XmlVersion,
 28 | 
 29 |         /// XML document encoding.
 30 |         ///
 31 |         /// Defaults to `Some("UTF-8")`.
 32 |         encoding: Option<&'a str>,
 33 | 
 34 |         /// XML standalone declaration.
 35 |         ///
 36 |         /// Defaults to `None`.
 37 |         standalone: Option<bool>,
 38 |     },
 39 | 
 40 |     /// Denotes an XML processing instruction.
 41 |     ProcessingInstruction {
 42 |         /// Processing instruction target.
 43 |         name: &'a str,
 44 | 
 45 |         /// Processing instruction content.
 46 |         data: Option<&'a str>,
 47 |     },
 48 | 
 49 |     /// Denotes a beginning of an XML element.
 50 |     StartElement {
 51 |         /// Qualified name of the element.
 52 |         name: Name<'a>,
 53 | 
 54 |         /// A list of attributes associated with the element.
 55 |         ///
 56 |         /// Currently attributes are not checked for duplicates (TODO). Attribute values
 57 |         /// will be escaped, and all characters invalid for attribute values like `"` or `<`
 58 |         /// will be changed into character entities.
 59 |         attributes: Cow<'a, [Attribute<'a>]>,
 60 | 
 61 |         /// Contents of the namespace mapping at this point of the document.
 62 |         ///
 63 |         /// This mapping will be inspected for "new" entries, and if at this point of the document
 64 |         /// a particular pair of prefix and namespace URI is already defined, no namespace
 65 |         /// attributes will be emitted.
 66 |         namespace: Cow<'a, Namespace>,
 67 |     },
 68 | 
 69 |     /// Denotes an end of an XML element.
 70 |     EndElement {
 71 |         /// Optional qualified name of the element.
 72 |         ///
 73 |         /// If `None`, then it is assumed that the element name should be the last valid one.
 74 |         /// If `Some` and element names tracking is enabled, then the writer will check it for
 75 |         /// correctness.
 76 |         name: Option<Name<'a>>,
 77 |     },
 78 | 
 79 |     /// Denotes CDATA content.
 80 |     ///
 81 |     /// This event contains unparsed data, and no escaping will be performed when writing it
 82 |     /// to the output stream.
 83 |     CData(&'a str),
 84 | 
 85 |     /// Denotes a comment.
 86 |     ///
 87 |     /// The string will be checked for invalid sequences and error will be returned by the
 88 |     /// write operation
 89 |     Comment(&'a str),
 90 | 
 91 |     /// Denotes character data outside of tags.
 92 |     ///
 93 |     /// Contents of this event will be escaped if `perform_escaping` option is enabled,
 94 |     /// that is, every character invalid for PCDATA will appear as a character entity.
 95 |     Characters(&'a str),
 96 | 
 97 |     /// Emits raw characters which will never be escaped.
 98 |     ///
 99 |     /// This event is only used for writing to an output stream, there is no equivalent
100 |     /// reader event. Care must be taken when using this event, as it can easily result
101 |     /// non-well-formed documents.
102 |     RawCharacters(&'a str),
103 | 
104 |     /// Syntax of the `DOCTYPE`, everyhing including `<` and `>`
105 |     Doctype(&'a str),
106 | }
107 | 
108 | impl<'a> XmlEvent<'a> {
109 |     /// Returns an writer event for a processing instruction.
110 |     #[inline]
111 |     #[must_use]
112 |     pub const fn processing_instruction(name: &'a str, data: Option<&'a str>) -> Self {
113 |         XmlEvent::ProcessingInstruction { name, data }
114 |     }
115 | 
116 |     /// Returns a builder for a starting element.
117 |     ///
118 |     /// This builder can then be used to tweak attributes and namespace starting at
119 |     /// this element.
120 |     #[inline]
121 |     pub fn start_element<S>(name: S) -> StartElementBuilder<'a> where S: Into<Name<'a>> {
122 |         StartElementBuilder {
123 |             name: name.into(),
124 |             attributes: Vec::new(),
125 |             namespace: Namespace::empty(),
126 |         }
127 |     }
128 | 
129 |     /// Returns a builder for an closing element.
130 |     ///
131 |     /// This method, unline `start_element()`, does not accept a name because by default
132 |     /// the writer is able to determine it automatically. However, when this functionality
133 |     /// is disabled, it is possible to specify the name with `name()` method on the builder.
134 |     #[inline]
135 |     #[must_use]
136 |     pub const fn end_element() -> EndElementBuilder<'a> {
137 |         EndElementBuilder { name: None }
138 |     }
139 | 
140 |     /// Returns a CDATA event.
141 |     ///
142 |     /// Naturally, the provided string won't be escaped, except for closing CDATA token `]]>`
143 |     /// (depending on the configuration).
144 |     #[inline]
145 |     #[must_use]
146 |     pub const fn cdata(data: &'a str) -> Self {
147 |         XmlEvent::CData(data)
148 |     }
149 | 
150 |     /// Returns a regular characters (PCDATA) event.
151 |     ///
152 |     /// All offending symbols, in particular, `&` and `<`, will be escaped by the writer.
153 |     #[inline]
154 |     #[must_use]
155 |     pub const fn characters(data: &'a str) -> Self {
156 |         XmlEvent::Characters(data)
157 |     }
158 | 
159 |     /// Returns a raw characters event.
160 |     ///
161 |     /// No escaping takes place.
162 |     /// This event is only used for writing to an output stream, there is no equivalent
163 |     /// reader event. Care must be taken when using this event, as it can easily result
164 |     /// non-well-formed documents.
165 |     #[inline]
166 |     #[must_use]
167 |     pub const fn raw_characters(data: &'a str) -> Self {
168 |         XmlEvent::RawCharacters(data)
169 |     }
170 | 
171 |     /// Returns a comment event.
172 |     #[inline]
173 |     #[must_use]
174 |     pub const fn comment(data: &'a str) -> Self {
175 |         XmlEvent::Comment(data)
176 |     }
177 | }
178 | 
179 | impl<'a> From<&'a str> for XmlEvent<'a> {
180 |     #[inline]
181 |     fn from(s: &'a str) -> Self {
182 |         XmlEvent::Characters(s)
183 |     }
184 | }
185 | 
186 | /// A builder for a closing element event.
187 | pub struct EndElementBuilder<'a> {
188 |     name: Option<Name<'a>>,
189 | }
190 | 
191 | /// A builder for a closing element event.
192 | impl<'a> EndElementBuilder<'a> {
193 |     /// Sets the name of this closing element.
194 |     ///
195 |     /// Usually the writer is able to determine closing element names automatically. If
196 |     /// this functionality is enabled (by default it is), then this name is checked for correctness.
197 |     /// It is possible, however, to disable such behavior; then the user must ensure that
198 |     /// closing element name is correct manually.
199 |     #[inline]
200 |     #[must_use]
201 |     pub fn name<N>(mut self, name: N) -> Self where N: Into<Name<'a>> {
202 |         self.name = Some(name.into());
203 |         self
204 |     }
205 | }
206 | 
207 | impl<'a> From<EndElementBuilder<'a>> for XmlEvent<'a> {
208 |     fn from(b: EndElementBuilder<'a>) -> Self {
209 |         XmlEvent::EndElement { name: b.name }
210 |     }
211 | }
212 | 
213 | /// A builder for a starting element event.
214 | pub struct StartElementBuilder<'a> {
215 |     name: Name<'a>,
216 |     attributes: Vec<Attribute<'a>>,
217 |     namespace: Namespace,
218 | }
219 | 
220 | impl<'a> StartElementBuilder<'a> {
221 |     /// Sets an attribute value of this element to the given string.
222 |     ///
223 |     /// This method can be used to add attributes to the starting element. Name is a qualified
224 |     /// name; its namespace is ignored, but its prefix is checked for correctness, that is,
225 |     /// it is checked that the prefix is bound to some namespace in the current context.
226 |     ///
227 |     /// Currently attributes are not checked for duplicates. Note that duplicate attributes
228 |     /// are a violation of XML document well-formedness.
229 |     ///
230 |     /// The writer checks that you don't specify reserved prefix names, for example `xmlns`.
231 |     #[inline]
232 |     #[must_use]
233 |     pub fn attr<N>(mut self, name: N, value: &'a str) -> Self
234 |     where N: Into<Name<'a>> {
235 |         self.attributes.push(Attribute::new(name.into(), value));
236 |         self
237 |     }
238 | 
239 |     /// Adds a namespace to the current namespace context.
240 |     ///
241 |     /// If no namespace URI was bound to the provided prefix at this point of the document,
242 |     /// then the mapping from the prefix to the provided namespace URI will be written as
243 |     /// a part of this element attribute set.
244 |     ///
245 |     /// If the same namespace URI was bound to the provided prefix at this point of the document,
246 |     /// then no namespace attributes will be emitted.
247 |     ///
248 |     /// If some other namespace URI was bound to the provided prefix at this point of the document,
249 |     /// then another binding will be added as a part of this element attribute set, shadowing
250 |     /// the outer binding.
251 |     #[inline]
252 |     #[must_use]
253 |     pub fn ns<S1, S2>(mut self, prefix: S1, uri: S2) -> Self
254 |         where S1: Into<String>, S2: Into<String>
255 |     {
256 |         self.namespace.put(prefix, uri);
257 |         self
258 |     }
259 | 
260 |     /// Adds a default namespace mapping to the current namespace context.
261 |     ///
262 |     /// Same rules as for `ns()` are also valid for the default namespace mapping.
263 |     #[inline]
264 |     #[must_use]
265 |     pub fn default_ns<S>(mut self, uri: S) -> Self
266 |     where S: Into<String> {
267 |         self.namespace.put(NS_NO_PREFIX, uri);
268 |         self
269 |     }
270 | }
271 | 
272 | impl<'a> From<StartElementBuilder<'a>> for XmlEvent<'a> {
273 |     #[inline]
274 |     fn from(b: StartElementBuilder<'a>) -> Self {
275 |         XmlEvent::StartElement {
276 |             name: b.name,
277 |             attributes: Cow::Owned(b.attributes),
278 |             namespace: Cow::Owned(b.namespace),
279 |         }
280 |     }
281 | }
282 | 
283 | impl<'a> TryFrom<&'a crate::reader::XmlEvent> for XmlEvent<'a> {
284 |     type Error = crate::reader::Error;
285 | 
286 |     fn try_from(event: &crate::reader::XmlEvent) -> Result<XmlEvent<'_>, Self::Error> {
287 |         Ok(event.as_writer_event().ok_or(ErrorKind::UnexpectedEof)?)
288 |     }
289 | }
290 | 


--------------------------------------------------------------------------------
/tests/cases/autosar.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>
 2 | <!DOCTYPE DCF [
 3 |         <!ELEMENT DCF ((NAME,ATTRDEF?,PROFILESETTINGS?,FILEREF*)?)>
 4 |         <!ATTLIST DCF
 5 |         ARSCHEMA (21XSDREV0017 | 30XSDREV0003) "21XSDREV0017">
 6 |         <!ELEMENT NAME (#PCDATA)>
 7 |         <!ELEMENT ATTRDEF (#PCDATA)>
 8 |         <!ELEMENT FILEREF (ARXML, DCB?, ECUC?, GENATTR?)>
 9 |         <!ELEMENT ARXML (#PCDATA)>
10 |         <!ATTLIST ARXML TYPE CDATA ""
11 |     ROOTITEM (CONSTANT|DATATYPE|PORTINTERFACE|SIGNAL|COMPONENTTYPE|ECUPROJECT|VEHICLEPROJECT) #REQUIRED >
12 |         <!ELEMENT DCB (#PCDATA)>
13 |         <!ELEMENT ECUC (#PCDATA)>
14 |   <!ELEMENT GENATTR (#PCDATA)>
15 |   <!ELEMENT PROFILESETTINGS (#PCDATA)>
16 | ]>
17 | <DCF />
18 | 


--------------------------------------------------------------------------------
/tests/cases/quote.xml:
--------------------------------------------------------------------------------
1 | <x arg=unquoted"" />
2 | 


--------------------------------------------------------------------------------
/tests/cases/xmlnsquote.xml:
--------------------------------------------------------------------------------
1 | <message xmlns=invalid "jabber:client"/>
2 | 


--------------------------------------------------------------------------------
/tests/documents/sample_1.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="iso-8859-1" standalone="yes"?>
 2 | <project name="project-name">
 3 |     <libraries>
 4 |         <library groupId="org.example" artifactId="&lt;name&gt;" version="0.1"/>
 5 |         <library groupId="com.example" artifactId="&quot;cool-lib&amp;" version="999"/>
 6 |     </libraries>
 7 |     <module name="module-1">
 8 |         <files>
 9 |             <file name="somefile.java" type="java">
10 |                 Some &lt;java&gt; class
11 |             </file>
12 |             <file name="another_file.java" type="java">
13 |                 Another &quot;java&quot; class
14 |             </file>
15 |             <file name="config.xml" type="xml">
16 |                 Weird &apos;XML&apos; config
17 |             </file>
18 |         </files>
19 |         <libraries>
20 |             <library groupId="junit" artifactId="junit" version="1.9.5"/>
21 |         </libraries>
22 |     </module>
23 |     <module name="module-2">
24 |         <files>
25 |             <file name="program.js" type="javascript">
26 |                 JavaScript &amp; program
27 |             </file>
28 |             <file name="style.css" type="css">
29 |                 Cascading style sheet: &#xA9; - &#1161;
30 |             </file>
31 |         </files>
32 |     </module>
33 | </project>
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/documents/sample_1_full.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, iso-8859-1)
 2 | StartElement(project [name="project-name"])
 3 | Whitespace("\n    ")
 4 | StartElement(libraries)
 5 | Whitespace("\n        ")
 6 | StartElement(library [groupId="org.example", artifactId="<name>", version="0.1"])
 7 | EndElement(library)
 8 | Whitespace("\n        ")
 9 | StartElement(library [groupId="com.example", artifactId="\"cool-lib&", version="999"])
10 | EndElement(library)
11 | Whitespace("\n    ")
12 | EndElement(libraries)
13 | Whitespace("\n    ")
14 | StartElement(module [name="module-1"])
15 | Whitespace("\n        ")
16 | StartElement(files)
17 | Whitespace("\n            ")
18 | StartElement(file [name="somefile.java", type="java"])
19 | Characters("\n                Some <java> class\n            ")
20 | EndElement(file)
21 | Whitespace("\n            ")
22 | StartElement(file [name="another_file.java", type="java"])
23 | Characters("\n                Another \"java\" class\n            ")
24 | EndElement(file)
25 | Whitespace("\n            ")
26 | StartElement(file [name="config.xml", type="xml"])
27 | Characters("\n                Weird \'XML\' config\n            ")
28 | EndElement(file)
29 | Whitespace("\n        ")
30 | EndElement(files)
31 | Whitespace("\n        ")
32 | StartElement(libraries)
33 | Whitespace("\n            ")
34 | StartElement(library [groupId="junit", artifactId="junit", version="1.9.5"])
35 | EndElement(library)
36 | Whitespace("\n        ")
37 | EndElement(libraries)
38 | Whitespace("\n    ")
39 | EndElement(module)
40 | Whitespace("\n    ")
41 | StartElement(module [name="module-2"])
42 | Whitespace("\n        ")
43 | StartElement(files)
44 | Whitespace("\n            ")
45 | StartElement(file [name="program.js", type="javascript"])
46 | Characters("\n                JavaScript & program\n            ")
47 | EndElement(file)
48 | Whitespace("\n            ")
49 | StartElement(file [name="style.css", type="css"])
50 | Characters("\n                Cascading style sheet: © - ҉\n            ")
51 | EndElement(file)
52 | Whitespace("\n        ")
53 | EndElement(files)
54 | Whitespace("\n    ")
55 | EndElement(module)
56 | Whitespace("\n")
57 | EndElement(project)
58 | EndDocument
59 | 


--------------------------------------------------------------------------------
/tests/documents/sample_1_short.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, iso-8859-1)
 2 | StartElement(project [name="project-name"])
 3 | StartElement(libraries)
 4 | StartElement(library [groupId="org.example", artifactId="<name>", version="0.1"])
 5 | EndElement(library)
 6 | StartElement(library [groupId="com.example", artifactId="\"cool-lib&", version="999"])
 7 | EndElement(library)
 8 | EndElement(libraries)
 9 | StartElement(module [name="module-1"])
10 | StartElement(files)
11 | StartElement(file [name="somefile.java", type="java"])
12 | Characters("Some <java> class")
13 | EndElement(file)
14 | StartElement(file [name="another_file.java", type="java"])
15 | Characters("Another \"java\" class")
16 | EndElement(file)
17 | StartElement(file [name="config.xml", type="xml"])
18 | Characters("Weird \'XML\' config")
19 | EndElement(file)
20 | EndElement(files)
21 | StartElement(libraries)
22 | StartElement(library [groupId="junit", artifactId="junit", version="1.9.5"])
23 | EndElement(library)
24 | EndElement(libraries)
25 | EndElement(module)
26 | StartElement(module [name="module-2"])
27 | StartElement(files)
28 | StartElement(file [name="program.js", type="javascript"])
29 | Characters("JavaScript & program")
30 | EndElement(file)
31 | StartElement(file [name="style.css", type="css"])
32 | Characters("Cascading style sheet: © - ҉")
33 | EndElement(file)
34 | EndElement(files)
35 | EndElement(module)
36 | EndElement(project)
37 | EndDocument
38 | 


--------------------------------------------------------------------------------
/tests/documents/sample_2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <p:data xmlns:d="urn:example:double" xmlns:h="urn:example:header" xmlns:p="urn:example:namespace">
 3 |   <p:datum id="34">
 4 |     <p:name>Name</p:name>
 5 |     <d:name>Another name</d:name>
 6 |     <d:arg>0.3</d:arg>
 7 |     <d:arg>0.2</d:arg>
 8 |     <p:arg>0.1</p:arg>
 9 |     <p:arg>0.01</p:arg>
10 |     <h:header name="Header-1">header 1 value</h:header>
11 |     <h:header name="Header-2">
12 |       Some bigger value
13 |     </h:header>
14 |   </p:datum>
15 | </p:data>
16 | 


--------------------------------------------------------------------------------
/tests/documents/sample_2_full.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, utf-8)
 2 | StartElement({urn:example:namespace}p:data)
 3 | Whitespace("\n  ")
 4 | StartElement({urn:example:namespace}p:datum [id="34"])
 5 | Whitespace("\n    ")
 6 | StartElement({urn:example:namespace}p:name)
 7 | Characters("Name")
 8 | EndElement({urn:example:namespace}p:name)
 9 | Whitespace("\n    ")
10 | StartElement({urn:example:double}d:name)
11 | Characters("Another name")
12 | EndElement({urn:example:double}d:name)
13 | Whitespace("\n    ")
14 | StartElement({urn:example:double}d:arg)
15 | Characters("0.3")
16 | EndElement({urn:example:double}d:arg)
17 | Whitespace("\n    ")
18 | StartElement({urn:example:double}d:arg)
19 | Characters("0.2")
20 | EndElement({urn:example:double}d:arg)
21 | Whitespace("\n    ")
22 | StartElement({urn:example:namespace}p:arg)
23 | Characters("0.1")
24 | EndElement({urn:example:namespace}p:arg)
25 | Whitespace("\n    ")
26 | StartElement({urn:example:namespace}p:arg)
27 | Characters("0.01")
28 | EndElement({urn:example:namespace}p:arg)
29 | Whitespace("\n    ")
30 | StartElement({urn:example:header}h:header [name="Header-1"])
31 | Characters("header 1 value")
32 | EndElement({urn:example:header}h:header)
33 | Whitespace("\n    ")
34 | StartElement({urn:example:header}h:header [name="Header-2"])
35 | Characters("\n      Some bigger value\n    ")
36 | EndElement({urn:example:header}h:header)
37 | Whitespace("\n  ")
38 | EndElement({urn:example:namespace}p:datum)
39 | Whitespace("\n")
40 | EndElement({urn:example:namespace}p:data)
41 | EndDocument
42 | 


--------------------------------------------------------------------------------
/tests/documents/sample_2_short.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, utf-8)
 2 | StartElement({urn:example:namespace}p:data)
 3 | StartElement({urn:example:namespace}p:datum [id="34"])
 4 | StartElement({urn:example:namespace}p:name)
 5 | Characters("Name")
 6 | EndElement({urn:example:namespace}p:name)
 7 | StartElement({urn:example:double}d:name)
 8 | Characters("Another name")
 9 | EndElement({urn:example:double}d:name)
10 | StartElement({urn:example:double}d:arg)
11 | Characters("0.3")
12 | EndElement({urn:example:double}d:arg)
13 | StartElement({urn:example:double}d:arg)
14 | Characters("0.2")
15 | EndElement({urn:example:double}d:arg)
16 | StartElement({urn:example:namespace}p:arg)
17 | Characters("0.1")
18 | EndElement({urn:example:namespace}p:arg)
19 | StartElement({urn:example:namespace}p:arg)
20 | Characters("0.01")
21 | EndElement({urn:example:namespace}p:arg)
22 | StartElement({urn:example:header}h:header [name="Header-1"])
23 | Characters("header 1 value")
24 | EndElement({urn:example:header}h:header)
25 | StartElement({urn:example:header}h:header [name="Header-2"])
26 | Characters("Some bigger value")
27 | EndElement({urn:example:header}h:header)
28 | EndElement({urn:example:namespace}p:datum)
29 | EndElement({urn:example:namespace}p:data)
30 | EndDocument
31 | 


--------------------------------------------------------------------------------
/tests/documents/sample_3.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <p:data xmlns:p="urn:x" z=">">
 3 |     <!-- abcd &lt; &gt; &amp; -->
 4 |     <a>test</a>
 5 |     <b>kkss" = ddd' ></b>
 6 |     <![CDATA[
 7 |             <a>ddddd</b>!e3--><!-- ddckx
 8 |     ]]>
 9 |     <c/>
10 |     <![CDATA[
11 |     <![CDATA[zzzz]]]]><![CDATA[>]]>
12 | </p:data>
13 | 
14 | 


--------------------------------------------------------------------------------
/tests/documents/sample_3_full.txt:
--------------------------------------------------------------------------------
 1 | 1:1 StartDocument(1.0, utf-8)
 2 | 2:1 StartElement({urn:x}p:data [z=">"])
 3 | 2:31 Whitespace("\n    ")
 4 | 3:5 Comment(" abcd &lt; &gt; &amp; ")
 5 | 3:34 Whitespace("\n    ")
 6 | 4:5 StartElement(a)
 7 | 4:8 Characters("test")
 8 | 4:12 EndElement(a)
 9 | 4:16 Whitespace("\n    ")
10 | 5:5 StartElement(b)
11 | 5:8 Characters("kkss\" = ddd\' >")
12 | 5:22 EndElement(b)
13 | 5:26 Whitespace("\n    ")
14 | 6:5 CData("\n            <a>ddddd</b>!e3--><!-- ddckx\n    ")
15 | 8:8 Characters("\n    ")
16 | 9:5 StartElement(c)
17 | 9:5 EndElement(c)
18 | 9:9 Whitespace("\n    ")
19 | 10:5 CData("\n    <![CDATA[zzzz]]")
20 | 11:23 CData(">")
21 | 11:36 Characters("\n")
22 | 12:1 EndElement({urn:x}p:data)
23 | 14:1 EndDocument
24 | 


--------------------------------------------------------------------------------
/tests/documents/sample_3_short.txt:
--------------------------------------------------------------------------------
 1 | 1:1 StartDocument(1.0, utf-8)
 2 | 2:1 StartElement({urn:x}p:data [z=">"])
 3 | 4:5 StartElement(a)
 4 | 4:8 Characters("test")
 5 | 4:12 EndElement(a)
 6 | 5:5 StartElement(b)
 7 | 5:8 Characters("kkss\" = ddd\' >")
 8 | 5:22 EndElement(b)
 9 | 6:5 Characters("<a>ddddd</b>!e3--><!-- ddckx")
10 | 9:5 StartElement(c)
11 | 9:5 EndElement(c)
12 | 10:5 Characters("<![CDATA[zzzz]]>")
13 | 12:1 EndElement({urn:x}p:data)
14 | 14:1 EndDocument
15 | 


--------------------------------------------------------------------------------
/tests/documents/sample_4.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!DOCTYPE data SYSTEM "abcd.dtd">
 3 | <p:data xmlns:p="urn:x" z=">">
 4 |     <!-- abcd &lt; &gt; &amp; -->
 5 |     <a>test</a>
 6 |     <b>kkss" = ddd' ></b>
 7 |     <![CDATA[
 8 |             <a>ddddd</b>!e3--><!-- ddckx
 9 |     ]]>
10 |     <c/>
11 |     <![CDATA[
12 |     <![CDATA[zzzz]]]]><![CDATA[>]]>
13 | </p:data>
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/tests/documents/sample_4_full.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, utf-8)
 2 | Doctype("<!DOCTYPE data SYSTEM \"abcd.dtd\">")
 3 | StartElement({urn:x}p:data [z=">"])
 4 | Whitespace("\n    ")
 5 | Comment(" abcd &lt; &gt; &amp; ")
 6 | Whitespace("\n    ")
 7 | StartElement(a)
 8 | Characters("test")
 9 | EndElement(a)
10 | Whitespace("\n    ")
11 | StartElement(b)
12 | Characters("kkss\" = ddd\' >")
13 | EndElement(b)
14 | Whitespace("\n    ")
15 | CData("\n            <a>ddddd</b>!e3--><!-- ddckx\n    ")
16 | Characters("\n    ")
17 | StartElement(c)
18 | EndElement(c)
19 | Whitespace("\n    ")
20 | CData("\n    <![CDATA[zzzz]]")
21 | CData(">")
22 | Characters("\n")
23 | EndElement({urn:x}p:data)
24 | EndDocument
25 | 


--------------------------------------------------------------------------------
/tests/documents/sample_4_short.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, utf-8)
 2 | Doctype("<!DOCTYPE data SYSTEM \"abcd.dtd\">")
 3 | StartElement({urn:x}p:data [z=">"])
 4 | StartElement(a)
 5 | Characters("test")
 6 | EndElement(a)
 7 | StartElement(b)
 8 | Characters("kkss\" = ddd\' >")
 9 | EndElement(b)
10 | Characters("<a>ddddd</b>!e3--><!-- ddckx")
11 | StartElement(c)
12 | EndElement(c)
13 | Characters("<![CDATA[zzzz]]>")
14 | EndElement({urn:x}p:data)
15 | EndDocument
16 | 


--------------------------------------------------------------------------------
/tests/documents/sample_5.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <!DOCTYPE data SYSTEM "abcd.dtd">
3 | <p>
4 |     <a>test&nbsp;&copy;&NotEqualTilde;</a>
5 | </p>
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/tests/documents/sample_5_short.txt:
--------------------------------------------------------------------------------
1 | StartDocument(1.0, utf-8)
2 | Doctype("<!DOCTYPE data SYSTEM \"abcd.dtd\">")
3 | StartElement(p)
4 | StartElement(a)
5 | Characters("test ©≂̸")
6 | EndElement(a)
7 | EndElement(p)
8 | EndDocument
9 | 


--------------------------------------------------------------------------------
/tests/documents/sample_6.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0"?>
2 | <?xml-stylesheet href="doc.xsl"?>
3 | 
4 | <doc>Hello</doc>
5 | 


--------------------------------------------------------------------------------
/tests/documents/sample_6_full.txt:
--------------------------------------------------------------------------------
1 | StartDocument(1.0, UTF-8)
2 | Whitespace("\n")
3 | ProcessingInstruction(xml-stylesheet="href=\"doc.xsl\"")
4 | Whitespace("\n\n")
5 | StartElement(doc)
6 | Characters("Hello")
7 | EndElement(doc)
8 | EndDocument
9 | 


--------------------------------------------------------------------------------
/tests/documents/sample_7.xml:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE foo [
 2 |     <!ENTITY ampersand "&amp;">
 3 |     <!ENTITY rsq "&#x5d;">
 4 |     <!ENTITY lb "late &ampersand; bound">
 5 |     <!ENTITY omg "omg <wat/> why">
 6 | ]>
 7 | <foo>
 8 |     omg <wat/> why
 9 |     &ampersand;
10 |     &rsq;
11 |     &lb;
12 |     &omg;
13 | </foo>
14 | 


--------------------------------------------------------------------------------
/tests/documents/sample_7_full.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, UTF-8)
 2 | Doctype("<!DOCTYPE foo [\n    <!ENTITY ampersand \"&amp;\">\n    <!ENTITY rsq \"&#x5d;\">\n    <!ENTITY lb \"late &ampersand; bound\">\n    <!ENTITY omg \"omg <wat/> why\">\n]>")
 3 | Whitespace("\n")
 4 | StartElement(foo)
 5 | Characters("\n    omg ")
 6 | StartElement(wat)
 7 | EndElement(wat)
 8 | Characters(" why\n    &\n    ]\n    late & bound\n    omg ")
 9 | StartElement(wat)
10 | EndElement(wat)
11 | Characters(" why\n")
12 | EndElement(foo)
13 | EndDocument
14 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8.xml:
--------------------------------------------------------------------------------
 1 | <el>
 2 | <!--ws--><![CDATA[]]>
 3 | <![CDATA[]]>
 4 | 
 5 | <!--ws--><!--ws-->
 6 | <!--ws--><!--ws-->
 7 | <!--ws--><![CDATA[]]><!--ws-->
 8 | <!--ws--><br/><!--ws-->
 9 | <!--ws--><s></s><!--ws-->
10 | <!--ws--><s></s><!--ws-->
11 | <!--ws--><s><![CDATA[]]></s><!--ws-->
12 | 
13 | 
14 | <![CDATA[]]><!--ws-->
15 | <!--ws-->
16 | 
17 | <![CDATA[]]><![CDATA[]]>
18 | <![CDATA[]]><![CDATA[]]>
19 | <![CDATA[]]><!--ws--><![CDATA[]]>
20 | <![CDATA[]]><br/><![CDATA[]]>
21 | <![CDATA[]]><s></s><![CDATA[]]>
22 | <![CDATA[]]><s></s><![CDATA[]]>
23 | <![CDATA[]]><s><!--ws--></s><![CDATA[]]>
24 | 
25 | <!--ws--><![CDATA[]]>
26 | <![CDATA[]]>
27 | 
28 | <!--ws--><!--ws-->
29 | <!--ws--><!--ws-->
30 | <!--ws--><![CDATA[]]><!--ws-->
31 | <!--ws--><br/><!--ws-->
32 | <!--ws--><s></s><!--ws-->
33 | <!--ws--><s></s><!--ws-->
34 | <!--ws--><s><![CDATA[]]></s><!--ws-->
35 | 
36 | 
37 | <![CDATA[]]><!--ws-->
38 | <!--ws-->
39 | 
40 | <![CDATA[]]><![CDATA[]]>
41 | <![CDATA[]]><![CDATA[]]>
42 | <![CDATA[]]><!--ws--><![CDATA[]]>
43 | <![CDATA[]]><br/><![CDATA[]]>
44 | <![CDATA[]]><s></s><![CDATA[]]>
45 | <![CDATA[]]><s></s><![CDATA[]]>
46 | <![CDATA[]]><s><!--ws--></s><![CDATA[]]>
47 | 
48 | 
49 | <!--noWS-->
50 | 
51 | <!--ws--><![CDATA[]]>
52 | <![CDATA[]]>
53 | 
54 | <!--ws--><!--ws-->
55 | <!--ws--><!--ws-->
56 | <!--ws--><![CDATA[]]><!--ws-->
57 | <!--ws--><br/><!--ws-->
58 | <!--ws--><s></s><!--ws-->
59 | <!--ws--><s></s><!--ws-->
60 | <!--ws--><s><![CDATA[]]></s><!--ws-->
61 | 
62 | 
63 | <![CDATA[]]><!--ws-->
64 | <!--ws-->
65 | 
66 | <![CDATA[]]><![CDATA[]]>
67 | <![CDATA[]]><![CDATA[]]>
68 | <![CDATA[]]><!--ws--><![CDATA[]]>
69 | <![CDATA[]]><br/><![CDATA[]]>
70 | <![CDATA[]]><s></s><![CDATA[]]>
71 | <![CDATA[]]><s></s><![CDATA[]]>
72 | <![CDATA[]]><s><!--ws--></s><![CDATA[]]>
73 | 
74 | <!--ws--><![CDATA[]]>
75 | 
76 | <![CDATA[]]><!--ws--><!--ws--><!--ws--><!--ws--><!--ws--><![CDATA[]]><!--ws--><!--ws--><br/><!--ws--><!--ws--><s></s><!--ws--><!--ws--><s></s><!--ws--><!--ws--><s><![CDATA[]]></s><!--ws--><![CDATA[]]><!--ws--><!--ws--><![CDATA[]]><![CDATA[]]><![CDATA[]]><![CDATA[]]><![CDATA[]]><!--ws--><![CDATA[]]><![CDATA[]]><br/><![CDATA[]]><![CDATA[]]><s></s><![CDATA[]]><![CDATA[]]><s></s><![CDATA[]]><![CDATA[]]><s><!--ws--></s><![CDATA[]]>
77 | </el>
78 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8_c.txt:
--------------------------------------------------------------------------------
  1 | StartDocument(1.0, UTF-8)
  2 | StartElement(el)
  3 | Whitespace("\n")
  4 | CData("")
  5 | Whitespace("\n")
  6 | CData("")
  7 | Whitespace("\n\n")
  8 | Whitespace("\n")
  9 | Whitespace("\n")
 10 | CData("")
 11 | Whitespace("\n")
 12 | StartElement(br)
 13 | EndElement(br)
 14 | Whitespace("\n")
 15 | StartElement(s)
 16 | EndElement(s)
 17 | Whitespace("\n")
 18 | StartElement(s)
 19 | EndElement(s)
 20 | Whitespace("\n")
 21 | StartElement(s)
 22 | CData("")
 23 | EndElement(s)
 24 | Whitespace("\n\n\n")
 25 | CData("")
 26 | Whitespace("\n")
 27 | Whitespace("\n\n")
 28 | CData("")
 29 | CData("")
 30 | Whitespace("\n")
 31 | CData("")
 32 | CData("")
 33 | Whitespace("\n")
 34 | CData("")
 35 | CData("")
 36 | Whitespace("\n")
 37 | CData("")
 38 | StartElement(br)
 39 | EndElement(br)
 40 | CData("")
 41 | Whitespace("\n")
 42 | CData("")
 43 | StartElement(s)
 44 | EndElement(s)
 45 | CData("")
 46 | Whitespace("\n")
 47 | CData("")
 48 | StartElement(s)
 49 | EndElement(s)
 50 | CData("")
 51 | Whitespace("\n")
 52 | CData("")
 53 | StartElement(s)
 54 | EndElement(s)
 55 | CData("")
 56 | Whitespace("\n\n")
 57 | CData("")
 58 | Whitespace("\n")
 59 | CData("")
 60 | Whitespace("\n\n")
 61 | Whitespace("\n")
 62 | Whitespace("\n")
 63 | CData("")
 64 | Whitespace("\n")
 65 | StartElement(br)
 66 | EndElement(br)
 67 | Whitespace("\n")
 68 | StartElement(s)
 69 | EndElement(s)
 70 | Whitespace("\n")
 71 | StartElement(s)
 72 | EndElement(s)
 73 | Whitespace("\n")
 74 | StartElement(s)
 75 | CData("")
 76 | EndElement(s)
 77 | Whitespace("\n\n\n")
 78 | CData("")
 79 | Whitespace("\n")
 80 | Whitespace("\n\n")
 81 | CData("")
 82 | CData("")
 83 | Whitespace("\n")
 84 | CData("")
 85 | CData("")
 86 | Whitespace("\n")
 87 | CData("")
 88 | CData("")
 89 | Whitespace("\n")
 90 | CData("")
 91 | StartElement(br)
 92 | EndElement(br)
 93 | CData("")
 94 | Whitespace("\n")
 95 | CData("")
 96 | StartElement(s)
 97 | EndElement(s)
 98 | CData("")
 99 | Whitespace("\n")
100 | CData("")
101 | StartElement(s)
102 | EndElement(s)
103 | CData("")
104 | Whitespace("\n")
105 | CData("")
106 | StartElement(s)
107 | EndElement(s)
108 | CData("")
109 | Whitespace("\n\n\n")
110 | Whitespace("\n\n")
111 | CData("")
112 | Whitespace("\n")
113 | CData("")
114 | Whitespace("\n\n")
115 | Whitespace("\n")
116 | Whitespace("\n")
117 | CData("")
118 | Whitespace("\n")
119 | StartElement(br)
120 | EndElement(br)
121 | Whitespace("\n")
122 | StartElement(s)
123 | EndElement(s)
124 | Whitespace("\n")
125 | StartElement(s)
126 | EndElement(s)
127 | Whitespace("\n")
128 | StartElement(s)
129 | CData("")
130 | EndElement(s)
131 | Whitespace("\n\n\n")
132 | CData("")
133 | Whitespace("\n")
134 | Whitespace("\n\n")
135 | CData("")
136 | CData("")
137 | Whitespace("\n")
138 | CData("")
139 | CData("")
140 | Whitespace("\n")
141 | CData("")
142 | CData("")
143 | Whitespace("\n")
144 | CData("")
145 | StartElement(br)
146 | EndElement(br)
147 | CData("")
148 | Whitespace("\n")
149 | CData("")
150 | StartElement(s)
151 | EndElement(s)
152 | CData("")
153 | Whitespace("\n")
154 | CData("")
155 | StartElement(s)
156 | EndElement(s)
157 | CData("")
158 | Whitespace("\n")
159 | CData("")
160 | StartElement(s)
161 | EndElement(s)
162 | CData("")
163 | Whitespace("\n\n")
164 | CData("")
165 | Whitespace("\n\n")
166 | CData("")
167 | CData("")
168 | StartElement(br)
169 | EndElement(br)
170 | StartElement(s)
171 | EndElement(s)
172 | StartElement(s)
173 | EndElement(s)
174 | StartElement(s)
175 | CData("")
176 | EndElement(s)
177 | CData("")
178 | CData("")
179 | CData("")
180 | CData("")
181 | CData("")
182 | CData("")
183 | CData("")
184 | CData("")
185 | StartElement(br)
186 | EndElement(br)
187 | CData("")
188 | CData("")
189 | StartElement(s)
190 | EndElement(s)
191 | CData("")
192 | CData("")
193 | StartElement(s)
194 | EndElement(s)
195 | CData("")
196 | CData("")
197 | StartElement(s)
198 | EndElement(s)
199 | CData("")
200 | Whitespace("\n")
201 | EndElement(el)
202 | EndDocument
203 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8_coalesce_all.txt:
--------------------------------------------------------------------------------
 1 | 1:1 StartDocument(1.0, UTF-8)
 2 | 1:4 StartElement(el)
 3 | 8:10 StartElement(br)
 4 | 8:10 EndElement(br)
 5 | 9:10 StartElement(s)
 6 | 9:13 EndElement(s)
 7 | 10:10 StartElement(s)
 8 | 10:13 EndElement(s)
 9 | 11:10 StartElement(s)
10 | 11:25 EndElement(s)
11 | 20:13 StartElement(br)
12 | 20:13 EndElement(br)
13 | 21:13 StartElement(s)
14 | 21:16 EndElement(s)
15 | 22:13 StartElement(s)
16 | 22:16 EndElement(s)
17 | 23:13 StartElement(s)
18 | 23:25 EndElement(s)
19 | 31:10 StartElement(br)
20 | 31:10 EndElement(br)
21 | 32:10 StartElement(s)
22 | 32:13 EndElement(s)
23 | 33:10 StartElement(s)
24 | 33:13 EndElement(s)
25 | 34:10 StartElement(s)
26 | 34:25 EndElement(s)
27 | 43:13 StartElement(br)
28 | 43:13 EndElement(br)
29 | 44:13 StartElement(s)
30 | 44:16 EndElement(s)
31 | 45:13 StartElement(s)
32 | 45:16 EndElement(s)
33 | 46:13 StartElement(s)
34 | 46:25 EndElement(s)
35 | 57:10 StartElement(br)
36 | 57:10 EndElement(br)
37 | 58:10 StartElement(s)
38 | 58:13 EndElement(s)
39 | 59:10 StartElement(s)
40 | 59:13 EndElement(s)
41 | 60:10 StartElement(s)
42 | 60:25 EndElement(s)
43 | 69:13 StartElement(br)
44 | 69:13 EndElement(br)
45 | 70:13 StartElement(s)
46 | 70:16 EndElement(s)
47 | 71:13 StartElement(s)
48 | 71:16 EndElement(s)
49 | 72:13 StartElement(s)
50 | 72:25 EndElement(s)
51 | 76:88 StartElement(br)
52 | 76:88 EndElement(br)
53 | 76:111 StartElement(s)
54 | 76:114 EndElement(s)
55 | 76:136 StartElement(s)
56 | 76:139 EndElement(s)
57 | 76:161 StartElement(s)
58 | 76:176 EndElement(s)
59 | 76:312 StartElement(br)
60 | 76:312 EndElement(br)
61 | 76:341 StartElement(s)
62 | 76:344 EndElement(s)
63 | 76:372 StartElement(s)
64 | 76:375 EndElement(s)
65 | 76:403 StartElement(s)
66 | 76:415 EndElement(s)
67 | 77:1 EndElement(el)
68 | 78:1 EndDocument
69 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8_coalesce_cwscdch.txt:
--------------------------------------------------------------------------------
 1 | StartDocument(1.0, UTF-8)
 2 | StartElement(el)
 3 | Characters("\n\n\n\n\n\n\n")
 4 | StartElement(br)
 5 | EndElement(br)
 6 | Characters("\n")
 7 | StartElement(s)
 8 | EndElement(s)
 9 | Characters("\n")
10 | StartElement(s)
11 | EndElement(s)
12 | Characters("\n")
13 | StartElement(s)
14 | EndElement(s)
15 | Characters("\n\n\n\n\n\n\n\n\n")
16 | StartElement(br)
17 | EndElement(br)
18 | Characters("\n")
19 | StartElement(s)
20 | EndElement(s)
21 | Characters("\n")
22 | StartElement(s)
23 | EndElement(s)
24 | Characters("\n")
25 | StartElement(s)
26 | EndElement(s)
27 | Characters("\n\n\n\n\n\n\n\n")
28 | StartElement(br)
29 | EndElement(br)
30 | Characters("\n")
31 | StartElement(s)
32 | EndElement(s)
33 | Characters("\n")
34 | StartElement(s)
35 | EndElement(s)
36 | Characters("\n")
37 | StartElement(s)
38 | EndElement(s)
39 | Characters("\n\n\n\n\n\n\n\n\n")
40 | StartElement(br)
41 | EndElement(br)
42 | Characters("\n")
43 | StartElement(s)
44 | EndElement(s)
45 | Characters("\n")
46 | StartElement(s)
47 | EndElement(s)
48 | Characters("\n")
49 | StartElement(s)
50 | EndElement(s)
51 | Characters("\n\n\n\n\n\n\n\n\n\n\n")
52 | StartElement(br)
53 | EndElement(br)
54 | Characters("\n")
55 | StartElement(s)
56 | EndElement(s)
57 | Characters("\n")
58 | StartElement(s)
59 | EndElement(s)
60 | Characters("\n")
61 | StartElement(s)
62 | EndElement(s)
63 | Characters("\n\n\n\n\n\n\n\n\n")
64 | StartElement(br)
65 | EndElement(br)
66 | Characters("\n")
67 | StartElement(s)
68 | EndElement(s)
69 | Characters("\n")
70 | StartElement(s)
71 | EndElement(s)
72 | Characters("\n")
73 | StartElement(s)
74 | EndElement(s)
75 | Characters("\n\n\n\n")
76 | StartElement(br)
77 | EndElement(br)
78 | StartElement(s)
79 | EndElement(s)
80 | StartElement(s)
81 | EndElement(s)
82 | StartElement(s)
83 | EndElement(s)
84 | StartElement(br)
85 | EndElement(br)
86 | StartElement(s)
87 | EndElement(s)
88 | StartElement(s)
89 | EndElement(s)
90 | StartElement(s)
91 | EndElement(s)
92 | Characters("\n")
93 | EndElement(el)
94 | EndDocument
95 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8_coalesce_wscdch.txt:
--------------------------------------------------------------------------------
  1 | StartDocument(1.0, UTF-8)
  2 | StartElement(el)
  3 | Characters("\n")
  4 | Comment("ws")
  5 | Characters("\n\n\n")
  6 | Comment("ws")
  7 | Comment("ws")
  8 | Characters("\n")
  9 | Comment("ws")
 10 | Comment("ws")
 11 | Characters("\n")
 12 | Comment("ws")
 13 | Comment("ws")
 14 | Characters("\n")
 15 | Comment("ws")
 16 | StartElement(br)
 17 | EndElement(br)
 18 | Comment("ws")
 19 | Characters("\n")
 20 | Comment("ws")
 21 | StartElement(s)
 22 | EndElement(s)
 23 | Comment("ws")
 24 | Characters("\n")
 25 | Comment("ws")
 26 | StartElement(s)
 27 | EndElement(s)
 28 | Comment("ws")
 29 | Characters("\n")
 30 | Comment("ws")
 31 | StartElement(s)
 32 | EndElement(s)
 33 | Comment("ws")
 34 | Characters("\n\n\n")
 35 | Comment("ws")
 36 | Characters("\n")
 37 | Comment("ws")
 38 | Characters("\n\n\n\n")
 39 | Comment("ws")
 40 | Characters("\n")
 41 | StartElement(br)
 42 | EndElement(br)
 43 | Characters("\n")
 44 | StartElement(s)
 45 | EndElement(s)
 46 | Characters("\n")
 47 | StartElement(s)
 48 | EndElement(s)
 49 | Characters("\n")
 50 | StartElement(s)
 51 | Comment("ws")
 52 | EndElement(s)
 53 | Characters("\n\n")
 54 | Comment("ws")
 55 | Characters("\n\n\n")
 56 | Comment("ws")
 57 | Comment("ws")
 58 | Characters("\n")
 59 | Comment("ws")
 60 | Comment("ws")
 61 | Characters("\n")
 62 | Comment("ws")
 63 | Comment("ws")
 64 | Characters("\n")
 65 | Comment("ws")
 66 | StartElement(br)
 67 | EndElement(br)
 68 | Comment("ws")
 69 | Characters("\n")
 70 | Comment("ws")
 71 | StartElement(s)
 72 | EndElement(s)
 73 | Comment("ws")
 74 | Characters("\n")
 75 | Comment("ws")
 76 | StartElement(s)
 77 | EndElement(s)
 78 | Comment("ws")
 79 | Characters("\n")
 80 | Comment("ws")
 81 | StartElement(s)
 82 | EndElement(s)
 83 | Comment("ws")
 84 | Characters("\n\n\n")
 85 | Comment("ws")
 86 | Characters("\n")
 87 | Comment("ws")
 88 | Characters("\n\n\n\n")
 89 | Comment("ws")
 90 | Characters("\n")
 91 | StartElement(br)
 92 | EndElement(br)
 93 | Characters("\n")
 94 | StartElement(s)
 95 | EndElement(s)
 96 | Characters("\n")
 97 | StartElement(s)
 98 | EndElement(s)
 99 | Characters("\n")
100 | StartElement(s)
101 | Comment("ws")
102 | EndElement(s)
103 | Characters("\n\n\n")
104 | Comment("noWS")
105 | Characters("\n\n")
106 | Comment("ws")
107 | Characters("\n\n\n")
108 | Comment("ws")
109 | Comment("ws")
110 | Characters("\n")
111 | Comment("ws")
112 | Comment("ws")
113 | Characters("\n")
114 | Comment("ws")
115 | Comment("ws")
116 | Characters("\n")
117 | Comment("ws")
118 | StartElement(br)
119 | EndElement(br)
120 | Comment("ws")
121 | Characters("\n")
122 | Comment("ws")
123 | StartElement(s)
124 | EndElement(s)
125 | Comment("ws")
126 | Characters("\n")
127 | Comment("ws")
128 | StartElement(s)
129 | EndElement(s)
130 | Comment("ws")
131 | Characters("\n")
132 | Comment("ws")
133 | StartElement(s)
134 | EndElement(s)
135 | Comment("ws")
136 | Characters("\n\n\n")
137 | Comment("ws")
138 | Characters("\n")
139 | Comment("ws")
140 | Characters("\n\n\n\n")
141 | Comment("ws")
142 | Characters("\n")
143 | StartElement(br)
144 | EndElement(br)
145 | Characters("\n")
146 | StartElement(s)
147 | EndElement(s)
148 | Characters("\n")
149 | StartElement(s)
150 | EndElement(s)
151 | Characters("\n")
152 | StartElement(s)
153 | Comment("ws")
154 | EndElement(s)
155 | Characters("\n\n")
156 | Comment("ws")
157 | Characters("\n\n")
158 | Comment("ws")
159 | Comment("ws")
160 | Comment("ws")
161 | Comment("ws")
162 | Comment("ws")
163 | Comment("ws")
164 | Comment("ws")
165 | StartElement(br)
166 | EndElement(br)
167 | Comment("ws")
168 | Comment("ws")
169 | StartElement(s)
170 | EndElement(s)
171 | Comment("ws")
172 | Comment("ws")
173 | StartElement(s)
174 | EndElement(s)
175 | Comment("ws")
176 | Comment("ws")
177 | StartElement(s)
178 | EndElement(s)
179 | Comment("ws")
180 | Comment("ws")
181 | Comment("ws")
182 | Comment("ws")
183 | StartElement(br)
184 | EndElement(br)
185 | StartElement(s)
186 | EndElement(s)
187 | StartElement(s)
188 | EndElement(s)
189 | StartElement(s)
190 | Comment("ws")
191 | EndElement(s)
192 | Characters("\n")
193 | EndElement(el)
194 | EndDocument
195 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8_full.txt:
--------------------------------------------------------------------------------
  1 | StartDocument(1.0, UTF-8)
  2 | StartElement(el)
  3 | Whitespace("\n")
  4 | Comment("ws")
  5 | CData("")
  6 | Whitespace("\n")
  7 | CData("")
  8 | Whitespace("\n\n")
  9 | Comment("ws")
 10 | Comment("ws")
 11 | Whitespace("\n")
 12 | Comment("ws")
 13 | Comment("ws")
 14 | Whitespace("\n")
 15 | Comment("ws")
 16 | CData("")
 17 | Comment("ws")
 18 | Whitespace("\n")
 19 | Comment("ws")
 20 | StartElement(br)
 21 | EndElement(br)
 22 | Comment("ws")
 23 | Whitespace("\n")
 24 | Comment("ws")
 25 | StartElement(s)
 26 | EndElement(s)
 27 | Comment("ws")
 28 | Whitespace("\n")
 29 | Comment("ws")
 30 | StartElement(s)
 31 | EndElement(s)
 32 | Comment("ws")
 33 | Whitespace("\n")
 34 | Comment("ws")
 35 | StartElement(s)
 36 | CData("")
 37 | EndElement(s)
 38 | Comment("ws")
 39 | Whitespace("\n\n\n")
 40 | CData("")
 41 | Comment("ws")
 42 | Whitespace("\n")
 43 | Comment("ws")
 44 | Whitespace("\n\n")
 45 | CData("")
 46 | CData("")
 47 | Whitespace("\n")
 48 | CData("")
 49 | CData("")
 50 | Whitespace("\n")
 51 | CData("")
 52 | Comment("ws")
 53 | CData("")
 54 | Whitespace("\n")
 55 | CData("")
 56 | StartElement(br)
 57 | EndElement(br)
 58 | CData("")
 59 | Whitespace("\n")
 60 | CData("")
 61 | StartElement(s)
 62 | EndElement(s)
 63 | CData("")
 64 | Whitespace("\n")
 65 | CData("")
 66 | StartElement(s)
 67 | EndElement(s)
 68 | CData("")
 69 | Whitespace("\n")
 70 | CData("")
 71 | StartElement(s)
 72 | Comment("ws")
 73 | EndElement(s)
 74 | CData("")
 75 | Whitespace("\n\n")
 76 | Comment("ws")
 77 | CData("")
 78 | Whitespace("\n")
 79 | CData("")
 80 | Whitespace("\n\n")
 81 | Comment("ws")
 82 | Comment("ws")
 83 | Whitespace("\n")
 84 | Comment("ws")
 85 | Comment("ws")
 86 | Whitespace("\n")
 87 | Comment("ws")
 88 | CData("")
 89 | Comment("ws")
 90 | Whitespace("\n")
 91 | Comment("ws")
 92 | StartElement(br)
 93 | EndElement(br)
 94 | Comment("ws")
 95 | Whitespace("\n")
 96 | Comment("ws")
 97 | StartElement(s)
 98 | EndElement(s)
 99 | Comment("ws")
100 | Whitespace("\n")
101 | Comment("ws")
102 | StartElement(s)
103 | EndElement(s)
104 | Comment("ws")
105 | Whitespace("\n")
106 | Comment("ws")
107 | StartElement(s)
108 | CData("")
109 | EndElement(s)
110 | Comment("ws")
111 | Whitespace("\n\n\n")
112 | CData("")
113 | Comment("ws")
114 | Whitespace("\n")
115 | Comment("ws")
116 | Whitespace("\n\n")
117 | CData("")
118 | CData("")
119 | Whitespace("\n")
120 | CData("")
121 | CData("")
122 | Whitespace("\n")
123 | CData("")
124 | Comment("ws")
125 | CData("")
126 | Whitespace("\n")
127 | CData("")
128 | StartElement(br)
129 | EndElement(br)
130 | CData("")
131 | Whitespace("\n")
132 | CData("")
133 | StartElement(s)
134 | EndElement(s)
135 | CData("")
136 | Whitespace("\n")
137 | CData("")
138 | StartElement(s)
139 | EndElement(s)
140 | CData("")
141 | Whitespace("\n")
142 | CData("")
143 | StartElement(s)
144 | Comment("ws")
145 | EndElement(s)
146 | CData("")
147 | Whitespace("\n\n\n")
148 | Comment("noWS")
149 | Whitespace("\n\n")
150 | Comment("ws")
151 | CData("")
152 | Whitespace("\n")
153 | CData("")
154 | Whitespace("\n\n")
155 | Comment("ws")
156 | Comment("ws")
157 | Whitespace("\n")
158 | Comment("ws")
159 | Comment("ws")
160 | Whitespace("\n")
161 | Comment("ws")
162 | CData("")
163 | Comment("ws")
164 | Whitespace("\n")
165 | Comment("ws")
166 | StartElement(br)
167 | EndElement(br)
168 | Comment("ws")
169 | Whitespace("\n")
170 | Comment("ws")
171 | StartElement(s)
172 | EndElement(s)
173 | Comment("ws")
174 | Whitespace("\n")
175 | Comment("ws")
176 | StartElement(s)
177 | EndElement(s)
178 | Comment("ws")
179 | Whitespace("\n")
180 | Comment("ws")
181 | StartElement(s)
182 | CData("")
183 | EndElement(s)
184 | Comment("ws")
185 | Whitespace("\n\n\n")
186 | CData("")
187 | Comment("ws")
188 | Whitespace("\n")
189 | Comment("ws")
190 | Whitespace("\n\n")
191 | CData("")
192 | CData("")
193 | Whitespace("\n")
194 | CData("")
195 | CData("")
196 | Whitespace("\n")
197 | CData("")
198 | Comment("ws")
199 | CData("")
200 | Whitespace("\n")
201 | CData("")
202 | StartElement(br)
203 | EndElement(br)
204 | CData("")
205 | Whitespace("\n")
206 | CData("")
207 | StartElement(s)
208 | EndElement(s)
209 | CData("")
210 | Whitespace("\n")
211 | CData("")
212 | StartElement(s)
213 | EndElement(s)
214 | CData("")
215 | Whitespace("\n")
216 | CData("")
217 | StartElement(s)
218 | Comment("ws")
219 | EndElement(s)
220 | CData("")
221 | Whitespace("\n\n")
222 | Comment("ws")
223 | CData("")
224 | Whitespace("\n\n")
225 | CData("")
226 | Comment("ws")
227 | Comment("ws")
228 | Comment("ws")
229 | Comment("ws")
230 | Comment("ws")
231 | CData("")
232 | Comment("ws")
233 | Comment("ws")
234 | StartElement(br)
235 | EndElement(br)
236 | Comment("ws")
237 | Comment("ws")
238 | StartElement(s)
239 | EndElement(s)
240 | Comment("ws")
241 | Comment("ws")
242 | StartElement(s)
243 | EndElement(s)
244 | Comment("ws")
245 | Comment("ws")
246 | StartElement(s)
247 | CData("")
248 | EndElement(s)
249 | Comment("ws")
250 | CData("")
251 | Comment("ws")
252 | Comment("ws")
253 | CData("")
254 | CData("")
255 | CData("")
256 | CData("")
257 | CData("")
258 | Comment("ws")
259 | CData("")
260 | CData("")
261 | StartElement(br)
262 | EndElement(br)
263 | CData("")
264 | CData("")
265 | StartElement(s)
266 | EndElement(s)
267 | CData("")
268 | CData("")
269 | StartElement(s)
270 | EndElement(s)
271 | CData("")
272 | CData("")
273 | StartElement(s)
274 | Comment("ws")
275 | EndElement(s)
276 | CData("")
277 | Whitespace("\n")
278 | EndElement(el)
279 | EndDocument
280 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8_wscdch.txt:
--------------------------------------------------------------------------------
  1 | StartDocument(1.0, UTF-8)
  2 | StartElement(el)
  3 | Characters("\n")
  4 | Comment("ws")
  5 | Characters("\n")
  6 | Characters("\n\n")
  7 | Comment("ws")
  8 | Comment("ws")
  9 | Characters("\n")
 10 | Comment("ws")
 11 | Comment("ws")
 12 | Characters("\n")
 13 | Comment("ws")
 14 | Comment("ws")
 15 | Characters("\n")
 16 | Comment("ws")
 17 | StartElement(br)
 18 | EndElement(br)
 19 | Comment("ws")
 20 | Characters("\n")
 21 | Comment("ws")
 22 | StartElement(s)
 23 | EndElement(s)
 24 | Comment("ws")
 25 | Characters("\n")
 26 | Comment("ws")
 27 | StartElement(s)
 28 | EndElement(s)
 29 | Comment("ws")
 30 | Characters("\n")
 31 | Comment("ws")
 32 | StartElement(s)
 33 | EndElement(s)
 34 | Comment("ws")
 35 | Characters("\n\n\n")
 36 | Comment("ws")
 37 | Characters("\n")
 38 | Comment("ws")
 39 | Characters("\n\n")
 40 | Characters("\n")
 41 | Characters("\n")
 42 | Comment("ws")
 43 | Characters("\n")
 44 | StartElement(br)
 45 | EndElement(br)
 46 | Characters("\n")
 47 | StartElement(s)
 48 | EndElement(s)
 49 | Characters("\n")
 50 | StartElement(s)
 51 | EndElement(s)
 52 | Characters("\n")
 53 | StartElement(s)
 54 | Comment("ws")
 55 | EndElement(s)
 56 | Characters("\n\n")
 57 | Comment("ws")
 58 | Characters("\n")
 59 | Characters("\n\n")
 60 | Comment("ws")
 61 | Comment("ws")
 62 | Characters("\n")
 63 | Comment("ws")
 64 | Comment("ws")
 65 | Characters("\n")
 66 | Comment("ws")
 67 | Comment("ws")
 68 | Characters("\n")
 69 | Comment("ws")
 70 | StartElement(br)
 71 | EndElement(br)
 72 | Comment("ws")
 73 | Characters("\n")
 74 | Comment("ws")
 75 | StartElement(s)
 76 | EndElement(s)
 77 | Comment("ws")
 78 | Characters("\n")
 79 | Comment("ws")
 80 | StartElement(s)
 81 | EndElement(s)
 82 | Comment("ws")
 83 | Characters("\n")
 84 | Comment("ws")
 85 | StartElement(s)
 86 | EndElement(s)
 87 | Comment("ws")
 88 | Characters("\n\n\n")
 89 | Comment("ws")
 90 | Characters("\n")
 91 | Comment("ws")
 92 | Characters("\n\n")
 93 | Characters("\n")
 94 | Characters("\n")
 95 | Comment("ws")
 96 | Characters("\n")
 97 | StartElement(br)
 98 | EndElement(br)
 99 | Characters("\n")
100 | StartElement(s)
101 | EndElement(s)
102 | Characters("\n")
103 | StartElement(s)
104 | EndElement(s)
105 | Characters("\n")
106 | StartElement(s)
107 | Comment("ws")
108 | EndElement(s)
109 | Characters("\n\n\n")
110 | Comment("noWS")
111 | Characters("\n\n")
112 | Comment("ws")
113 | Characters("\n")
114 | Characters("\n\n")
115 | Comment("ws")
116 | Comment("ws")
117 | Characters("\n")
118 | Comment("ws")
119 | Comment("ws")
120 | Characters("\n")
121 | Comment("ws")
122 | Comment("ws")
123 | Characters("\n")
124 | Comment("ws")
125 | StartElement(br)
126 | EndElement(br)
127 | Comment("ws")
128 | Characters("\n")
129 | Comment("ws")
130 | StartElement(s)
131 | EndElement(s)
132 | Comment("ws")
133 | Characters("\n")
134 | Comment("ws")
135 | StartElement(s)
136 | EndElement(s)
137 | Comment("ws")
138 | Characters("\n")
139 | Comment("ws")
140 | StartElement(s)
141 | EndElement(s)
142 | Comment("ws")
143 | Characters("\n\n\n")
144 | Comment("ws")
145 | Characters("\n")
146 | Comment("ws")
147 | Characters("\n\n")
148 | Characters("\n")
149 | Characters("\n")
150 | Comment("ws")
151 | Characters("\n")
152 | StartElement(br)
153 | EndElement(br)
154 | Characters("\n")
155 | StartElement(s)
156 | EndElement(s)
157 | Characters("\n")
158 | StartElement(s)
159 | EndElement(s)
160 | Characters("\n")
161 | StartElement(s)
162 | Comment("ws")
163 | EndElement(s)
164 | Characters("\n\n")
165 | Comment("ws")
166 | Characters("\n\n")
167 | Comment("ws")
168 | Comment("ws")
169 | Comment("ws")
170 | Comment("ws")
171 | Comment("ws")
172 | Comment("ws")
173 | Comment("ws")
174 | StartElement(br)
175 | EndElement(br)
176 | Comment("ws")
177 | Comment("ws")
178 | StartElement(s)
179 | EndElement(s)
180 | Comment("ws")
181 | Comment("ws")
182 | StartElement(s)
183 | EndElement(s)
184 | Comment("ws")
185 | Comment("ws")
186 | StartElement(s)
187 | EndElement(s)
188 | Comment("ws")
189 | Comment("ws")
190 | Comment("ws")
191 | Comment("ws")
192 | StartElement(br)
193 | EndElement(br)
194 | StartElement(s)
195 | EndElement(s)
196 | StartElement(s)
197 | EndElement(s)
198 | StartElement(s)
199 | Comment("ws")
200 | EndElement(s)
201 | Characters("\n")
202 | EndElement(el)
203 | EndDocument
204 | 


--------------------------------------------------------------------------------
/tests/documents/sample_8_wsch.txt:
--------------------------------------------------------------------------------
  1 | StartDocument(1.0, UTF-8)
  2 | StartElement(el)
  3 | Characters("\n")
  4 | Comment("ws")
  5 | CData("")
  6 | Characters("\n")
  7 | CData("")
  8 | Characters("\n\n")
  9 | Comment("ws")
 10 | Comment("ws")
 11 | Characters("\n")
 12 | Comment("ws")
 13 | Comment("ws")
 14 | Characters("\n")
 15 | Comment("ws")
 16 | CData("")
 17 | Comment("ws")
 18 | Characters("\n")
 19 | Comment("ws")
 20 | StartElement(br)
 21 | EndElement(br)
 22 | Comment("ws")
 23 | Characters("\n")
 24 | Comment("ws")
 25 | StartElement(s)
 26 | EndElement(s)
 27 | Comment("ws")
 28 | Characters("\n")
 29 | Comment("ws")
 30 | StartElement(s)
 31 | EndElement(s)
 32 | Comment("ws")
 33 | Characters("\n")
 34 | Comment("ws")
 35 | StartElement(s)
 36 | CData("")
 37 | EndElement(s)
 38 | Comment("ws")
 39 | Characters("\n\n\n")
 40 | CData("")
 41 | Comment("ws")
 42 | Characters("\n")
 43 | Comment("ws")
 44 | Characters("\n\n")
 45 | CData("")
 46 | CData("")
 47 | Characters("\n")
 48 | CData("")
 49 | CData("")
 50 | Characters("\n")
 51 | CData("")
 52 | Comment("ws")
 53 | CData("")
 54 | Characters("\n")
 55 | CData("")
 56 | StartElement(br)
 57 | EndElement(br)
 58 | CData("")
 59 | Characters("\n")
 60 | CData("")
 61 | StartElement(s)
 62 | EndElement(s)
 63 | CData("")
 64 | Characters("\n")
 65 | CData("")
 66 | StartElement(s)
 67 | EndElement(s)
 68 | CData("")
 69 | Characters("\n")
 70 | CData("")
 71 | StartElement(s)
 72 | Comment("ws")
 73 | EndElement(s)
 74 | CData("")
 75 | Characters("\n\n")
 76 | Comment("ws")
 77 | CData("")
 78 | Characters("\n")
 79 | CData("")
 80 | Characters("\n\n")
 81 | Comment("ws")
 82 | Comment("ws")
 83 | Characters("\n")
 84 | Comment("ws")
 85 | Comment("ws")
 86 | Characters("\n")
 87 | Comment("ws")
 88 | CData("")
 89 | Comment("ws")
 90 | Characters("\n")
 91 | Comment("ws")
 92 | StartElement(br)
 93 | EndElement(br)
 94 | Comment("ws")
 95 | Characters("\n")
 96 | Comment("ws")
 97 | StartElement(s)
 98 | EndElement(s)
 99 | Comment("ws")
100 | Characters("\n")
101 | Comment("ws")
102 | StartElement(s)
103 | EndElement(s)
104 | Comment("ws")
105 | Characters("\n")
106 | Comment("ws")
107 | StartElement(s)
108 | CData("")
109 | EndElement(s)
110 | Comment("ws")
111 | Characters("\n\n\n")
112 | CData("")
113 | Comment("ws")
114 | Characters("\n")
115 | Comment("ws")
116 | Characters("\n\n")
117 | CData("")
118 | CData("")
119 | Characters("\n")
120 | CData("")
121 | CData("")
122 | Characters("\n")
123 | CData("")
124 | Comment("ws")
125 | CData("")
126 | Characters("\n")
127 | CData("")
128 | StartElement(br)
129 | EndElement(br)
130 | CData("")
131 | Characters("\n")
132 | CData("")
133 | StartElement(s)
134 | EndElement(s)
135 | CData("")
136 | Characters("\n")
137 | CData("")
138 | StartElement(s)
139 | EndElement(s)
140 | CData("")
141 | Characters("\n")
142 | CData("")
143 | StartElement(s)
144 | Comment("ws")
145 | EndElement(s)
146 | CData("")
147 | Characters("\n\n\n")
148 | Comment("noWS")
149 | Characters("\n\n")
150 | Comment("ws")
151 | CData("")
152 | Characters("\n")
153 | CData("")
154 | Characters("\n\n")
155 | Comment("ws")
156 | Comment("ws")
157 | Characters("\n")
158 | Comment("ws")
159 | Comment("ws")
160 | Characters("\n")
161 | Comment("ws")
162 | CData("")
163 | Comment("ws")
164 | Characters("\n")
165 | Comment("ws")
166 | StartElement(br)
167 | EndElement(br)
168 | Comment("ws")
169 | Characters("\n")
170 | Comment("ws")
171 | StartElement(s)
172 | EndElement(s)
173 | Comment("ws")
174 | Characters("\n")
175 | Comment("ws")
176 | StartElement(s)
177 | EndElement(s)
178 | Comment("ws")
179 | Characters("\n")
180 | Comment("ws")
181 | StartElement(s)
182 | CData("")
183 | EndElement(s)
184 | Comment("ws")
185 | Characters("\n\n\n")
186 | CData("")
187 | Comment("ws")
188 | Characters("\n")
189 | Comment("ws")
190 | Characters("\n\n")
191 | CData("")
192 | CData("")
193 | Characters("\n")
194 | CData("")
195 | CData("")
196 | Characters("\n")
197 | CData("")
198 | Comment("ws")
199 | CData("")
200 | Characters("\n")
201 | CData("")
202 | StartElement(br)
203 | EndElement(br)
204 | CData("")
205 | Characters("\n")
206 | CData("")
207 | StartElement(s)
208 | EndElement(s)
209 | CData("")
210 | Characters("\n")
211 | CData("")
212 | StartElement(s)
213 | EndElement(s)
214 | CData("")
215 | Characters("\n")
216 | CData("")
217 | StartElement(s)
218 | Comment("ws")
219 | EndElement(s)
220 | CData("")
221 | Characters("\n\n")
222 | Comment("ws")
223 | CData("")
224 | Characters("\n\n")
225 | CData("")
226 | Comment("ws")
227 | Comment("ws")
228 | Comment("ws")
229 | Comment("ws")
230 | Comment("ws")
231 | CData("")
232 | Comment("ws")
233 | Comment("ws")
234 | StartElement(br)
235 | EndElement(br)
236 | Comment("ws")
237 | Comment("ws")
238 | StartElement(s)
239 | EndElement(s)
240 | Comment("ws")
241 | Comment("ws")
242 | StartElement(s)
243 | EndElement(s)
244 | Comment("ws")
245 | Comment("ws")
246 | StartElement(s)
247 | CData("")
248 | EndElement(s)
249 | Comment("ws")
250 | CData("")
251 | Comment("ws")
252 | Comment("ws")
253 | CData("")
254 | CData("")
255 | CData("")
256 | CData("")
257 | CData("")
258 | Comment("ws")
259 | CData("")
260 | CData("")
261 | StartElement(br)
262 | EndElement(br)
263 | CData("")
264 | CData("")
265 | StartElement(s)
266 | EndElement(s)
267 | CData("")
268 | CData("")
269 | StartElement(s)
270 | EndElement(s)
271 | CData("")
272 | CData("")
273 | StartElement(s)
274 | Comment("ws")
275 | EndElement(s)
276 | CData("")
277 | Characters("\n")
278 | EndElement(el)
279 | EndDocument
280 | 


--------------------------------------------------------------------------------
/tests/errata2e.fail.txt:
--------------------------------------------------------------------------------
1 | rmt-e2e-18 E18.xml  External entity containing start of entity declaration is base URI for system identifier ; 5:7 Undefined entity: %intpe
2 | rmt-e2e-19 E19.xml  Parameter entities and character references are included-in-literal, but general entities are bypassed. ; 2:10 Unexpected entity: ent
3 | rmt-e2e-34 E34.xml  A non-deterministic content model is an error even if the element type is not used. 
4 | rmt-e2e-38 E38.xml  XML 1.0 document refers to 1.1 entity 
5 | rmt-e2e-50 E50.xml  All line-ends are normalized, even those not passed to the application. NB this can only be tested effectively in XML 1.1, since CR is in the S production; in 1.1 we can use NEL which isn't. ; 6:5 Unexpected token inside qualified name: 
6 | rmt-e2e-55 E55.xml  A reference to an unparsed entity in an entity value is an error rather than forbidden (unless the entity is referenced, of course) 
7 | rmt-e2e-57 E57.xml  A value other than preserve or default for xml:space is an error 
8 | 


--------------------------------------------------------------------------------
/tests/errata3e.fail.txt:
--------------------------------------------------------------------------------
1 | rmt-e3e-12 E12.xml  Default values for attributes may not contain references to external entities. 
2 | rmt-e3e-13 E13.xml  Even internal parameter entity references are enough to make undeclared entities into mere validity errors rather than well-formedness errors. ; 7:11 Unexpected entity: ent2
3 | 


--------------------------------------------------------------------------------
/tests/errata4e.fail.txt:
--------------------------------------------------------------------------------
1 | invalid-bo-7 inclbomboom_be.xml A byte order mark and a backwards one in general entity cause an illegal char. error (big-endian)
2 | invalid-bo-8 inclbomboom_le.xml A byte order mark and a backwards one in general entity cause an illegal char. error (little-endian)
3 | invalid-bo-9 incl8bomboom.xml A byte order mark and a backwards one in general entity cause an illegal char. error (utf-8)
4 | x-rmt-008b 008.xml  a document with version=1.7, legal in XML 1.0 from 5th edition ; 1:19 Invalid XML version: 1.7
5 | x-ibm-1-0.5-valid-P04-ibm04v01.xml ibm04v01.xml This test case covers legal NameStartChars character ranges plus discrete legal characters for production 04.; 35:24 Unexpected token inside qualified name: :
6 | x-ibm-1-0.5-valid-P05-ibm05v01.xml ibm05v01.xml This test case covers legal Element Names as per production 5.; 54:24 Qualified name is invalid: LegalName:
7 | x-ibm-1-0.5-valid-P05-ibm05v03.xml ibm05v03.xml This test case covers legal Attribute (Names) as per production 5.; 54:8 Qualified name is invalid: :attr
8 | 


--------------------------------------------------------------------------------
/tests/ibm_oasis_valid.fail.txt:
--------------------------------------------------------------------------------
1 | ibm-valid-P09-ibm09v03.xml ibm09v03.xml   Tests EnitityValue referencing a Parameter Entity ; 3:39 Unexpected entity: Name
2 | ibm-valid-P09-ibm09v05.xml ibm09v05.xml   Tests EnitityValue with combination of GE, PE and text, the GE used is  declared in the student.dtd ; 10:36 Unexpected entity: combine
3 | ibm-valid-P32-ibm32v02.xml ibm32v02.xml   Tests VC: Standalone Document Declaration with external entity reference  and standalone is no ; 3:24 Unexpected entity: animal_content
4 | 


--------------------------------------------------------------------------------
/tests/ibm_valid.fail.txt:
--------------------------------------------------------------------------------
1 | ibm-1-1-valid-P04-ibm04v01.xml ibm04v01.xml    This test case covers legal NameStartChars character ranges plus discrete legal   characters for production 04.  ; 36:24 Unexpected token inside qualified name: :
2 | ibm-1-1-valid-P05-ibm05v01.xml ibm05v01.xml    This test case covers legal Element Names as per production 5.  ; 55:24 Qualified name is invalid: LegalName:
3 | ibm-1-1-valid-P05-ibm05v03.xml ibm05v03.xml    This test case covers legal Attribute (Names) as per production 5.  ; 55:8 Qualified name is invalid: :attr
4 | 


--------------------------------------------------------------------------------
/tests/oasis.fail.txt:
--------------------------------------------------------------------------------
 1 | o-p04pass1 p04pass1.xml   names with all valid ASCII characters, and one from each    other class in NameChar  ; 5:8 Element A.-:̀· prefix is unbound
 2 | o-p05pass1 p05pass1.xml   various valid Name constructions  ; 2:8 Element A:._-0 prefix is unbound
 3 | o-p09fail1 p09fail1.xml   EntityValue excludes '%'  
 4 | o-p09fail2 p09fail2.xml   EntityValue excludes '&'  
 5 | o-p12fail1 p12fail1.xml   '"' excluded  
 6 | o-p12fail2 p12fail2.xml   '\' excluded  
 7 | o-p12fail3 p12fail3.xml   entity references excluded  
 8 | o-p12fail4 p12fail4.xml   '>' excluded  
 9 | o-p12fail5 p12fail5.xml   '<' excluded  
10 | o-p12fail6 p12fail6.xml   built-in entity refs excluded  
11 | o-p12fail7 p12fail7.xml   The public ID has a tab character, which is disallowed  
12 | o-p30fail1 p30fail1.xml   An XML declaration is not the same as a TextDecl  
13 | o-p31fail1 p31fail1.xml   external subset excludes doctypedecl  
14 | o-p45fail2 p45fail2.xml   S before contentspec is required.  
15 | o-p45fail3 p45fail3.xml   only one content spec  
16 | o-p45fail4 p45fail4.xml   no comments in declarations (contrast with SGML)  
17 | o-p46fail1 p46fail1.xml   no parens on declared content  
18 | o-p46fail2 p46fail2.xml   no inclusions (contrast with SGML)  
19 | o-p46fail3 p46fail3.xml   no exclusions (contrast with SGML)  
20 | o-p46fail4 p46fail4.xml   no space before occurrence  
21 | o-p46fail5 p46fail5.xml   single group  
22 | o-p46fail6 p46fail6.xml   can't be both declared and modeled  
23 | o-p47fail1 p47fail1.xml   Invalid operator '|' must match previous operator ','
24 | o-p47fail2 p47fail2.xml   Illegal character '-' in Element-content model  
25 | o-p47fail3 p47fail3.xml   Optional character must follow a name or list  
26 | o-p47fail4 p47fail4.xml   Illegal space before optional character
27 | o-p48fail1 p48fail1.xml   Illegal space before optional character 
28 | o-p48fail2 p48fail2.xml   Illegal space before optional character  
29 | o-p51fail1 p51fail1.xml   occurrence on #PCDATA group must be *  
30 | o-p51fail2 p51fail2.xml   occurrence on #PCDATA group must be *  
31 | o-p51fail3 p51fail3.xml   #PCDATA must come first  
32 | o-p51fail4 p51fail4.xml   occurrence on #PCDATA group must be *  
33 | o-p51fail5 p51fail5.xml   only '|' connectors  
34 | o-p51fail6 p51fail6.xml   Only '|' connectors and occurrence on #PCDATA group must be *  
35 | o-p51fail7 p51fail7.xml   no nested groups  
36 | o-p52fail1 p52fail1.xml   A name is required  
37 | o-p53fail1 p53fail1.xml   S is required before default  
38 | o-p53fail2 p53fail2.xml   S is required before type  
39 | o-p53fail3 p53fail3.xml   type is required  
40 | o-p53fail4 p53fail4.xml   default is required  
41 | o-p53fail5 p53fail5.xml   name is requried  
42 | o-p54fail1 p54fail1.xml   don't pass unknown attribute types  
43 | o-p55fail1 p55fail1.xml   must be upper case  
44 | o-p56fail1 p56fail1.xml   no IDS type  
45 | o-p56fail2 p56fail2.xml   no NUMBER type  
46 | o-p56fail3 p56fail3.xml   no NAME type  
47 | o-p56fail4 p56fail4.xml   no ENTITYS type - types must be upper case  
48 | o-p56fail5 p56fail5.xml   types must be upper case  
49 | o-p57fail1 p57fail1.xml   no keyword for NMTOKEN enumeration  
50 | o-p58fail1 p58fail1.xml   at least one value required  
51 | o-p58fail2 p58fail2.xml   separator must be '|'  
52 | o-p58fail3 p58fail3.xml   notations are NAMEs, not NMTOKENs -- note:  Leaving the invalid   notation undeclared would cause a validating parser to fail without   checking the name syntax, so the notation is declared with an   invalid name. A parser that reports error positions should report   an error at the AttlistDecl on line 6, before reaching the notation   declaration.  
53 | o-p58fail4 p58fail4.xml   NOTATION must be upper case  
54 | o-p58fail5 p58fail5.xml   S after keyword is required  
55 | o-p58fail6 p58fail6.xml   parentheses are require  
56 | o-p58fail7 p58fail7.xml   values are unquoted  
57 | o-p58fail8 p58fail8.xml   values are unquoted  
58 | o-p59fail1 p59fail1.xml   at least one required  
59 | o-p59fail2 p59fail2.xml   separator must be ","  
60 | o-p59fail3 p59fail3.xml   values are unquoted  
61 | o-p60fail1 p60fail1.xml   keywords must be upper case  
62 | o-p60fail2 p60fail2.xml   S is required after #FIXED  
63 | o-p60fail3 p60fail3.xml   only #FIXED has both keyword and value  
64 | o-p60fail4 p60fail4.xml   #FIXED required value  
65 | o-p60fail5 p60fail5.xml   only one default type  
66 | o-p61fail1 p61fail1.xml   no other types, including TEMP, which is valid in SGML  
67 | o-p62fail1 p62fail1.xml   INCLUDE must be upper case  
68 | o-p62fail2 p62fail2.xml   no spaces in terminating delimiter  
69 | o-p63fail1 p63fail1.xml   IGNORE must be upper case  
70 | o-p63fail2 p63fail2.xml   delimiters must be balanced  
71 | o-p64fail1 p64fail1.xml   section delimiters must balance  
72 | o-p64fail2 p64fail2.xml   section delimiters must balance  
73 | o-p72fail2 p72fail2.xml   S is required after '%'  
74 | o-p73fail2 p73fail2.xml   Only one replacement value  
75 | o-p73fail3 p73fail3.xml   No NDataDecl on replacement text  
76 | o-p74fail1 p74fail1.xml   no NDataDecls on parameter entities  
77 | o-p74fail3 p74fail3.xml   only one value  
78 | o-p75fail1 p75fail1.xml   S required after "PUBLIC"  
79 | o-p75fail2 p75fail2.xml   S required after "SYSTEM"  
80 | o-p75fail3 p75fail3.xml   S required between literals  
81 | o-p75fail4 p75fail4.xml   "SYSTEM" implies only one literal  
82 | o-p75fail5 p75fail5.xml   only one keyword  
83 | o-p75fail6 p75fail6.xml   "PUBLIC" requires two literals (contrast with SGML)  
84 | o-p76fail1 p76fail1.xml   S is required before "NDATA"  
85 | o-p76fail2 p76fail2.xml   "NDATA" is upper-case  
86 | o-p76fail3 p76fail3.xml   notation name is required  
87 | o-p76fail4 p76fail4.xml   notation names are Names  
88 | o-p11pass1 p11pass1.xml   system literals may not contain  URI fragments  
89 | 


--------------------------------------------------------------------------------
/tests/rmt-ns10.fail.txt:
--------------------------------------------------------------------------------
 1 | rmt-ns10-004 004.xml  Namespace name test: a relative URI (deprecated) 
 2 | rmt-ns10-005 005.xml  Namespace name test: a same-document relative URI (deprecated) 
 3 | rmt-ns10-006 006.xml  Namespace name test: an http IRI that is not a URI 
 4 | rmt-ns10-009 009.xml  Namespace equality test: plain repetition 
 5 | rmt-ns10-010 010.xml  Namespace equality test: use of character reference 
 6 | rmt-ns10-011 011.xml  Namespace equality test: use of entity reference 
 7 | rmt-ns10-012 012.xml  Namespace inequality test: equal after attribute value normalization 
 8 | rmt-ns10-030 030.xml  Reserved prefixes and namespaces: binding another prefix to the xml namespace 
 9 | rmt-ns10-033 033.xml  Reserved prefixes and namespaces: binding another prefix to the xmlns namespace 
10 | rmt-ns10-036 036.xml  Attribute uniqueness: repeated attribute with different prefixes 
11 | rmt-ns10-042 042.xml  Colon in PI name 
12 | rmt-ns10-043 043.xml  Colon in entity name 
13 | rmt-ns10-044 044.xml  Colon in entity name 
14 | ht-ns10-047 047.xml  Reserved name: _not_ an error ; 4:9 'xml' cannot be an element name prefix
15 | 


--------------------------------------------------------------------------------
/tests/rmt-ns11.fail.txt:
--------------------------------------------------------------------------------
1 | rmt-ns11-003 003.xml  1.1 style prefix unbinding ; 10:16 Cannot undefine prefix 'a'
2 | rmt-ns11-004 004.xml  1.1 style prefix unbinding and rebinding ; 11:16 Cannot undefine prefix 'a'
3 | 


--------------------------------------------------------------------------------
/tests/streaming.rs:
--------------------------------------------------------------------------------
 1 | #![forbid(unsafe_code)]
 2 | 
 3 | use std::io::{Cursor, Write};
 4 | 
 5 | use xml::EventReader;
 6 | use xml::reader::{ParserConfig, XmlEvent};
 7 | 
 8 | macro_rules! assert_match {
 9 |     ($actual:expr, $expected:pat) => {
10 |         match $actual {
11 |             $expected => {},
12 |             _ => panic!("assertion failed: `(left matches right)` \
13 |                         (left: `{:?}`, right: `{}`", $actual, stringify!($expected))
14 |         }
15 |     };
16 |     ($actual:expr, $expected:pat if $guard:expr) => {
17 |         match $actual {
18 |             $expected if $guard => {},
19 |             _ => panic!("assertion failed: `(left matches right)` \
20 |                         (left: `{:?}`, right: `{} if {}`",
21 |                         $actual, stringify!($expected), stringify!($guard))
22 |         }
23 |     };
24 | }
25 | 
26 | fn write_and_reset_position<W>(c: &mut Cursor<W>, data: &[u8]) where Cursor<W>: Write {
27 |     let p = c.position();
28 |     c.write_all(data).unwrap();
29 |     c.set_position(p);
30 | }
31 | 
32 | #[test]
33 | fn reading_streamed_content() {
34 |     let buf = Cursor::new(b"<root>".to_vec());
35 |     let reader = EventReader::new(buf);
36 | 
37 |     let mut it = reader.into_iter();
38 | 
39 |     assert_match!(it.next(), Some(Ok(XmlEvent::StartDocument { .. })));
40 |     assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "root");
41 | 
42 |     write_and_reset_position(it.source_mut(), b"<child-1>content</child-1>");
43 |     assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-1");
44 |     assert_match!(it.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "content");
45 |     assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-1");
46 | 
47 |     write_and_reset_position(it.source_mut(), b"<child-2/>");
48 |     assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-2");
49 |     assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-2");
50 | 
51 |     write_and_reset_position(it.source_mut(), b"<child-3/>");
52 |     assert_match!(it.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-3");
53 |     assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-3");
54 |     // doesn't seem to work because of how tags parsing is done
55 | //    write_and_reset_position(it.source_mut(), b"some text");
56 |    // assert_match!(it.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "some text");
57 | 
58 |     write_and_reset_position(it.source_mut(), b"</root>");
59 |     assert_match!(it.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "root");
60 |     assert_match!(it.next(), Some(Ok(XmlEvent::EndDocument)));
61 |     assert_match!(it.next(), None);
62 | }
63 | 
64 | #[test]
65 | fn reading_streamed_content2() {
66 |     let buf = Cursor::new(b"<root>".to_vec());
67 |     let mut config = ParserConfig::new();
68 |     config.ignore_end_of_stream = true;
69 |     let readerb = EventReader::new_with_config(buf, config);
70 | 
71 |     let mut reader = readerb.into_iter();
72 | 
73 |     assert_match!(reader.next(), Some(Ok(XmlEvent::StartDocument { .. })));
74 |     assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "root");
75 | 
76 |     write_and_reset_position(reader.source_mut(), b"<child-1>content</child-1>");
77 |     assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-1");
78 |     assert_match!(reader.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "content");
79 |     assert_match!(reader.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-1");
80 | 
81 |     write_and_reset_position(reader.source_mut(), b"<child-2>content</child-2>");
82 | 
83 |     assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-2");
84 |     assert_match!(reader.next(), Some(Ok(XmlEvent::Characters(ref c))) if c == "content");
85 |     assert_match!(reader.next(), Some(Ok(XmlEvent::EndElement { ref name })) if name.local_name == "child-2");
86 |     assert_match!(reader.next(), Some(Err(_)));
87 |     write_and_reset_position(reader.source_mut(), b"<child-3></child-3>");
88 |     assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-3");
89 |     write_and_reset_position(reader.source_mut(), b"<child-4 type='get'");
90 |     match reader.next() {
91 |         None | Some(Ok(_)) => {
92 |             panic!("At this point, parser must not detect something.");
93 |         },
94 |         Some(Err(_)) => {},
95 |     }
96 |     write_and_reset_position(reader.source_mut(), b" />");
97 |     assert_match!(reader.next(), Some(Ok(XmlEvent::StartElement { ref name, .. })) if name.local_name == "child-4");
98 | }
99 | 


--------------------------------------------------------------------------------
/tests/sun-not-wf.fail.txt:
--------------------------------------------------------------------------------
 1 | attlist01 attlist01.xml   SGML's NUTOKEN is not allowed.
 2 | attlist02 attlist02.xml   SGML's NUTOKENS attribute type is not allowed.
 3 | attlist03 attlist03.xml   Comma doesn't separate enumerations, unlike in SGML.
 4 | attlist04 attlist04.xml   SGML's NUMBER attribute type is not allowed.
 5 | attlist05 attlist05.xml   SGML's NUMBERS attribute type is not allowed.
 6 | attlist06 attlist06.xml   SGML's NAME attribute type is not allowed.
 7 | attlist07 attlist07.xml   SGML's NAMES attribute type is not allowed.
 8 | attlist08 attlist08.xml   SGML's #CURRENT is not allowed.
 9 | attlist09 attlist09.xml   SGML's #CONREF is not allowed.
10 | cond01 cond01.xml   Only INCLUDE and IGNORE are conditional section keywords
11 | cond02 cond02.xml   Must have keyword in conditional sections
12 | content01 content01.xml   No whitespace before "?" in content model
13 | content02 content02.xml   No whitespace before "*" in content model
14 | content03 content03.xml   No whitespace before "+" in content model
15 | decl01 decl01.xml   External entities may not have standalone decls. 
16 | nwf-dtd00 dtd00.xml   Comma mandatory in content model
17 | nwf-dtd01 dtd01.xml   Can't mix comma and vertical bar in content models
18 | dtd04 dtd04.xml   PUBLIC literal must be quoted
19 | dtd05 dtd05.xml   SYSTEM identifier must be quoted
20 | dtd07 dtd07.xml   Text declarations (which optionally begin any external entity)  are required to have "encoding=...". 
21 | encoding07 encoding07.xml   Text declarations (which optionally begin any external entity)  are required to have "encoding=...". 
22 | pi pi.xml   No space between PI target name and data
23 | pubid01 pubid01.xml   Illegal entity ref in public ID
24 | pubid02 pubid02.xml   Illegal characters in public ID
25 | pubid03 pubid03.xml   Illegal characters in public ID
26 | pubid04 pubid04.xml   Illegal characters in public ID
27 | pubid05 pubid05.xml   SGML-ism: public ID without system ID
28 | sgml04 sgml04.xml   ATTLIST declarations apply to only one element, unlike SGML
29 | sgml05 sgml05.xml   ELEMENT declarations apply to only one element, unlike SGML
30 | sgml06 sgml06.xml   ATTLIST declarations are never global, unlike in SGML
31 | sgml07 sgml07.xml   SGML Tag minimization specifications are not allowed
32 | sgml08 sgml08.xml   SGML Tag minimization specifications are not allowed
33 | sgml09 sgml09.xml   SGML Content model exception specifications are not allowed
34 | sgml10 sgml10.xml   SGML Content model exception specifications are not allowed
35 | sgml11 sgml11.xml   CDATA is not a valid content model spec
36 | sgml12 sgml12.xml   RCDATA is not a valid content model spec
37 | sgml13 sgml13.xml   SGML Unordered content models not allowed
38 | 


--------------------------------------------------------------------------------
/tests/sun-valid.fail.txt:
--------------------------------------------------------------------------------
1 | not-sa03 not-sa03.xml   A non-standalone document is valid if declared as such.; 19:20 Unexpected entity: internal
2 | v-pe00 pe00.xml   Tests construction of internal entity replacement text, using  an example in the XML specification. ; 2:12 Unexpected entity: book
3 | 


--------------------------------------------------------------------------------
/tests/tests.xml:
--------------------------------------------------------------------------------
1 | <TESTCASES>
2 |     <TEST URI="cases/xmlnsquote.xml" TYPE="not-wf">issue 152</TEST>
3 |     <TEST URI="cases/quote.xml" TYPE="not-wf"/>
4 |     <TEST URI="cases/autosar.xml" TYPE="valid"/>
5 |     <TEST URI="cases/feed.xml" TYPE="valid"/>
6 | </TESTCASES>
7 | 


--------------------------------------------------------------------------------
/tests/xml11.fail.txt:
--------------------------------------------------------------------------------
1 | rmt-001 001.xml  External subset has later version number 
2 | rmt-002 002.xml  External PE has later version number 
3 | rmt-003 003.xml  External general entity has later version number 
4 | rmt-004 004.xml  External general entity has later version number (no decl means 1.0) 
5 | rmt-005 005.xml  Indirect external general entity has later version number 
6 | rmt-009 009.xml  External general entity has implausible version number 
7 | rmt-054 054.xml  Contains a character reference to a C0 control character (form-feed) in an entity value. This will be legal (in XML 1.1) when the entity declaration is parsed, but what about when it is used? According to the grammar in the CR spec, it should be illegal (because the replacement text must match "content"), but this is probably not intended. This will be fixed in the PR version. ; 12:9 Invalid character U+000C
8 | 


--------------------------------------------------------------------------------
/tests/xmlconf.rs:
--------------------------------------------------------------------------------
  1 | //! W3C XML conformance test suite <https://www.w3.org/XML/Test/>
  2 | 
  3 | use std::collections::{HashMap, HashSet};
  4 | use std::fs::File;
  5 | use std::io::BufReader;
  6 | use std::path::Path;
  7 | use std::process::Command;
  8 | use std::sync::Mutex;
  9 | use xml::reader::XmlEvent;
 10 | use xml::{EventWriter, ParserConfig};
 11 | 
 12 | static UNZIP: Mutex<()> = Mutex::new(());
 13 | 
 14 | fn ensure_unzipped() {
 15 |     let _g = UNZIP.lock().expect("unzip already failed");
 16 | 
 17 |     // test suite license only allows redistribution of unmodified zip!
 18 |     if !Path::new("tests/xmlconf").exists() {
 19 |         assert!(Command::new("unzip")
 20 |             .current_dir("tests")
 21 |             .arg("xmlts20130923.zip")
 22 |             .status().unwrap().success(), "must unzip");
 23 |     }
 24 | }
 25 | 
 26 | #[track_caller]
 27 | fn run_suite(suite_rel_path: &str) {
 28 |     run_suite_with_config(suite_rel_path, ParserConfig::default().allow_multiple_root_elements(true));
 29 |     run_suite_with_config(suite_rel_path, ParserConfig::default().coalesce_characters(false));
 30 |     run_suite_with_config(suite_rel_path, ParserConfig::default().ignore_comments(false));
 31 |     run_suite_with_config(suite_rel_path, ParserConfig::new().trim_whitespace(true).whitespace_to_characters(true).cdata_to_characters(true).ignore_comments(true).coalesce_characters(true));
 32 |     run_suite_with_config(suite_rel_path, ParserConfig::default().allow_multiple_root_elements(false).ignore_root_level_whitespace(false));
 33 | }
 34 | 
 35 | #[track_caller]
 36 | fn run_suite_with_config(suite_rel_path: &str, parser_config: ParserConfig) {
 37 |     ensure_unzipped();
 38 | 
 39 |     let suite_path = Path::new("tests").join(suite_rel_path);
 40 |     let known_failures_file_path = Path::new("tests").join(suite_path.with_extension("fail.txt").file_name().unwrap());
 41 |     let mut new_known_failures_file = if std::env::var("PRINT_SPEC").map_or(false, |val| val == "1") { Some(String::new()) } else { None };
 42 | 
 43 |     let known_broken_test_ids: HashSet<_> = std::fs::read_to_string(&known_failures_file_path).unwrap_or_default().lines()
 44 |         .map(|l| l.trim().split(' ').next().unwrap().to_string()).collect();
 45 | 
 46 |     let root = suite_path.parent().unwrap();
 47 |     let mut parsed = 0;
 48 | 
 49 |     let f = BufReader::new(File::open(&suite_path)
 50 |         .map_err(|e| format!("{}: {e}", suite_path.display())).unwrap());
 51 |     let r = ParserConfig::default().allow_multiple_root_elements(true).create_reader(f);
 52 |     let mut desc = String::new();
 53 |     let mut attr = HashMap::<String, String>::new();
 54 |     for e in r {
 55 |         let e = e.map_err(|e| format!("{}: {e}", suite_path.display())).expect("testsuite validity");
 56 |         match e {
 57 |             XmlEvent::Characters(chr) => {
 58 |                 desc.push_str(&chr.replace('\n', " ").replace("  ", " ").replace("  ", " "));
 59 |             },
 60 |             XmlEvent::EndElement { name } if name.local_name == "TEST" => {
 61 |                 let path = root.join(&attr["URI"]);
 62 |                 let test_type = attr["TYPE"].as_str();
 63 |                 let id = attr.get("ID").map(|a| a.as_str()).unwrap_or_else(|| path.file_stem().unwrap().to_str().unwrap());
 64 | 
 65 |                 if attr.get("EDITION").map(|s| s.as_str()) == Some("1 2 3 4") {
 66 |                     // tests obsolete things changed in edition 5
 67 |                     continue;
 68 |                 }
 69 | 
 70 |                 let res = match test_type {
 71 |                     "valid" => expect_well_formed(&path, &desc, parser_config.clone()),
 72 |                     "invalid" => expect_well_formed(&path, &desc, parser_config.clone()), // invalid is still well-formed
 73 |                     "not-wf" | "error" => expect_ill_formed(&path, &desc),
 74 |                     other => unimplemented!("{other}?? type"),
 75 |                 };
 76 | 
 77 |                 if let Some(out) = new_known_failures_file.as_mut() {
 78 |                     if let Err(e) = res {
 79 |                         use std::fmt::Write;
 80 |                         writeln!(out, "{id} {}", e.to_string().replace('\n', " ")).unwrap();
 81 |                     }
 82 |                 } else {
 83 |                     let known_bad = known_broken_test_ids.contains(id);
 84 |                     match res {
 85 |                         Err(_) if known_bad => {},
 86 |                         Err(e) => panic!("{suite_rel_path} failed on {} ({id})\n{e}", path.display()),
 87 |                         Ok(()) if known_bad => panic!("expected {} ({id}) to fail, but it passes {test_type} of {suite_rel_path} now\n{desc}", path.display()),
 88 |                         Ok(()) => {},
 89 |                     }
 90 |                 }
 91 | 
 92 |                 parsed += 1;
 93 |             },
 94 |             XmlEvent::StartElement { name, attributes, namespace: _ } if name.local_name == "TEST" => {
 95 |                 desc.clear();
 96 |                 attr = attributes.into_iter().map(|a| (a.name.local_name, a.value)).collect();
 97 |             },
 98 |             _ => {},
 99 |         }
100 |     }
101 |     if let Some(out) = new_known_failures_file {
102 |         if out.is_empty() {
103 |             let _ = std::fs::remove_file(known_failures_file_path);
104 |         } else {
105 |             std::fs::write(known_failures_file_path, out).unwrap();
106 |         }
107 |     }
108 |     assert!(parsed > 0);
109 | }
110 | 
111 | #[track_caller]
112 | fn expect_well_formed(xml_path: &Path, msg: &str, parser_config: ParserConfig) -> Result<(), Box<dyn std::error::Error>> {
113 |     let f = BufReader::new(File::open(xml_path).expect("testcase"));
114 |     let r = parser_config.create_reader(f);
115 |     let mut w = EventWriter::new(Vec::new());
116 |     let mut seen_any = false;
117 |     let mut writes_failed = None;
118 |     let mut document_started = false;
119 |     for e in r {
120 |         let e = e.map_err(|e| format!("{} {msg}; {e}", xml_path.file_name().and_then(std::ffi::OsStr::to_str).unwrap()))?;
121 |         match e {
122 |             XmlEvent::EndElement { .. } => {
123 |                 seen_any = true;
124 |             },
125 |             XmlEvent::StartDocument { .. } => {
126 |                 if document_started { return Err("document started twice".into()); }
127 |                 document_started = true;
128 |             },
129 |             _ => {},
130 |         }
131 |         if let Some(e) = e.as_writer_event() {
132 |             if let Err(e) = w.write(e) {
133 |                 writes_failed = Some(e);
134 |             }
135 |         }
136 |     }
137 |     if !seen_any {
138 |         return Err("no elements found".into());
139 |     }
140 |     if let Some(e) = writes_failed {
141 |         panic!("{} write failed on {e}", xml_path.display());
142 |     }
143 |     Ok(())
144 | }
145 | 
146 | #[track_caller]
147 | fn expect_ill_formed(xml_path: &Path, msg: &str) -> Result<(), Box<dyn std::error::Error>> {
148 |     let f = BufReader::new(File::open(xml_path)?);
149 |     let r = ParserConfig::new().allow_multiple_root_elements(false).create_reader(f);
150 |     for e in r {
151 |         if e.is_err() {
152 |             return Ok(());
153 |         }
154 |     }
155 |     Err(format!("{} {msg}", xml_path.file_name().and_then(std::ffi::OsStr::to_str).unwrap()).into())
156 | }
157 | 
158 | #[test]
159 | fn eduni_errata_2e() {
160 |     run_suite("xmlconf/eduni/errata-2e/errata2e.xml");
161 | }
162 | 
163 | #[test]
164 | fn eduni_errata_3e() {
165 |     run_suite("xmlconf/eduni/errata-3e/errata3e.xml");
166 | }
167 | 
168 | #[test]
169 | fn eduni_errata_4e() {
170 |     run_suite("xmlconf/eduni/errata-4e/errata4e.xml");
171 | }
172 | 
173 | #[test]
174 | fn eduni_misc_ht() {
175 |     run_suite("xmlconf/eduni/misc/ht-bh.xml");
176 | }
177 | 
178 | #[test]
179 | fn eduni_namespaces_10() {
180 |     run_suite("xmlconf/eduni/namespaces/1.0/rmt-ns10.xml");
181 | }
182 | 
183 | #[test]
184 | fn eduni_namespaces_11() {
185 |     run_suite("xmlconf/eduni/namespaces/1.1/rmt-ns11.xml");
186 | }
187 | 
188 | #[test]
189 | fn eduni_namespaces_errata() {
190 |     run_suite("xmlconf/eduni/namespaces/errata-1e/errata1e.xml");
191 | }
192 | 
193 | #[test]
194 | fn eduni_xml_11() {
195 |     run_suite("xmlconf/eduni/xml-1.1/xml11.xml");
196 | }
197 | 
198 | #[test]
199 | fn ibm_oasis_valid() {
200 |     run_suite("xmlconf/ibm/ibm_oasis_valid.xml");
201 | }
202 | 
203 | #[test]
204 | fn ibm_xml_11() {
205 |     run_suite("xmlconf/ibm/xml-1.1/ibm_valid.xml");
206 | }
207 | 
208 | #[test]
209 | fn oasis() {
210 |     run_suite("xmlconf/oasis/oasis.xml");
211 | }
212 | 
213 | #[test]
214 | fn sun_valid() {
215 |     run_suite("xmlconf/sun/sun-valid.xml");
216 | }
217 | 
218 | #[test]
219 | fn sun_ill_formed() {
220 |     run_suite("xmlconf/sun/sun-not-wf.xml");
221 | }
222 | 
223 | #[test]
224 | fn japanese() {
225 |     run_suite("xmlconf/japanese/japanese.xml");
226 | }
227 | 
228 | #[test]
229 | fn xmltest() {
230 |     run_suite("xmlconf/xmltest/xmltest.xml");
231 | }
232 | 
233 | #[test]
234 | fn own_tests() {
235 |     run_suite("tests.xml");
236 | }
237 | 


--------------------------------------------------------------------------------
/tests/xmltest.fail.txt:
--------------------------------------------------------------------------------
 1 | not-wf-sa-003 003.xml   Processing Instruction target name is required.
 2 | not-wf-sa-054 054.xml   PUBLIC requires two literals.
 3 | not-wf-sa-056 056.xml   Invalid Document Type Definition format - misplaced comment. 
 4 | not-wf-sa-057 057.xml   This isn't SGML; comments can't exist in declarations. 
 5 | not-wf-sa-058 058.xml   Invalid character , in ATTLIST enumeration 
 6 | not-wf-sa-059 059.xml   String literal must be in quotes. 
 7 | not-wf-sa-060 060.xml   Invalid type NAME defined in ATTLIST.
 8 | not-wf-sa-061 061.xml   External entity declarations require whitespace between public  and system IDs.
 9 | not-wf-sa-064 064.xml   Space is required between attribute type and default values  in <!ATTLIST...> declarations. 
10 | not-wf-sa-065 065.xml   Space is required between attribute name and type  in <!ATTLIST...> declarations. 
11 | not-wf-sa-066 066.xml   Required whitespace is missing. 
12 | not-wf-sa-067 067.xml   Space is required between attribute type and default values  in <!ATTLIST...> declarations. 
13 | not-wf-sa-068 068.xml   Space is required between NOTATION keyword and list of  enumerated choices in <!ATTLIST...> declarations. 
14 | not-wf-sa-069 069.xml   Space is required before an NDATA entity annotation.
15 | not-wf-sa-074 074.xml   Internal general parsed entities are only well formed if  they match the "content" production. 
16 | not-wf-sa-075 075.xml   ENTITY can't reference itself directly or indirectly. 
17 | not-wf-sa-077 077.xml   Undefined ENTITY bar. 
18 | not-wf-sa-078 078.xml   Undefined ENTITY foo. 
19 | not-wf-sa-079 079.xml   ENTITY can't reference itself directly or indirectly. 
20 | not-wf-sa-080 080.xml   ENTITY can't reference itself directly or indirectly. 
21 | not-wf-sa-081 081.xml   This tests the No External Entity References WFC,  since the entity is referred to within an attribute. 
22 | not-wf-sa-082 082.xml   This tests the No External Entity References WFC,  since the entity is referred to within an attribute. 
23 | not-wf-sa-083 083.xml   Undefined NOTATION n. 
24 | not-wf-sa-084 084.xml   Tests the Parsed Entity WFC by referring to an  unparsed entity. (This precedes the error of not declaring  that entity's notation, which may be detected any time before  the DTD parsing is completed.) 
25 | not-wf-sa-085 085.xml   Public IDs may not contain "[". 
26 | not-wf-sa-086 086.xml   Public IDs may not contain "[". 
27 | not-wf-sa-087 087.xml   Public IDs may not contain "[". 
28 | not-wf-sa-089 089.xml   Parameter entities "are" always parsed; NDATA annotations  are not permitted.
29 | not-wf-sa-091 091.xml   Parameter entities "are" always parsed; NDATA annotations  are not permitted.
30 | not-wf-sa-104 104.xml   Internal general parsed entities are only well formed if  they match the "content" production. 
31 | not-wf-sa-115 115.xml   The replacement text of this entity is an illegal character  reference, which must be rejected when it is parsed in the  context of an attribute value.
32 | not-wf-sa-116 116.xml   Internal general parsed entities are only well formed if  they match the "content" production. This is a partial  character reference, not a full one. 
33 | not-wf-sa-117 117.xml   Internal general parsed entities are only well formed if  they match the "content" production. This is a partial  character reference, not a full one. 
34 | not-wf-sa-119 119.xml   Internal general parsed entities are only well formed if  they match the "content" production. This is a partial  character reference, not a full one. 
35 | not-wf-sa-122 122.xml   Invalid syntax mixed connectors are used. 
36 | not-wf-sa-123 123.xml   Invalid syntax mismatched parenthesis. 
37 | not-wf-sa-124 124.xml   Invalid format of Mixed-content declaration. 
38 | not-wf-sa-125 125.xml   Invalid syntax extra set of parenthesis not necessary. 
39 | not-wf-sa-126 126.xml   Invalid syntax Mixed-content must be defined as zero or more. 
40 | not-wf-sa-127 127.xml   Invalid syntax Mixed-content must be defined as zero or more. 
41 | not-wf-sa-128 128.xml   Invalid CDATA syntax. 
42 | not-wf-sa-129 129.xml   Invalid syntax for Element Type Declaration. 
43 | not-wf-sa-130 130.xml   Invalid syntax for Element Type Declaration. 
44 | not-wf-sa-131 131.xml   Invalid syntax for Element Type Declaration. 
45 | not-wf-sa-132 132.xml   Invalid syntax mixed connectors used. 
46 | not-wf-sa-133 133.xml   Illegal whitespace before optional character causes syntax error. 
47 | not-wf-sa-134 134.xml   Illegal whitespace before optional character causes syntax error. 
48 | not-wf-sa-135 135.xml   Invalid character used as connector. 
49 | not-wf-sa-136 136.xml   Tag omission is invalid in XML. 
50 | not-wf-sa-137 137.xml   Space is required before a content model. 
51 | not-wf-sa-138 138.xml   Invalid syntax for content particle. 
52 | not-wf-sa-139 139.xml   The element-content model should not be empty. 
53 | not-wf-sa-149 149.xml   XML Declaration may not be within a DTD.
54 | not-wf-sa-158 158.xml   SGML-ism: "#NOTATION gif" can't have attributes. 
55 | not-wf-sa-159 159.xml   Uses '&' unquoted in an entity declaration,  which is illegal syntax for an entity reference.
56 | not-wf-sa-160 160.xml   Violates the PEs in Internal Subset WFC  by using a PE reference within a declaration. 
57 | not-wf-sa-161 161.xml   Violates the PEs in Internal Subset WFC  by using a PE reference within a declaration. 
58 | not-wf-sa-162 162.xml   Violates the PEs in Internal Subset WFC  by using a PE reference within a declaration. 
59 | not-wf-sa-164 164.xml   Invalid placement of Parameter entity reference. 
60 | not-wf-sa-180 180.xml   The Entity Declared WFC requires entities to be declared  before they are used in an attribute list declaration. 
61 | not-wf-sa-181 181.xml   Internal parsed entities must match the content  production to be well formed. 
62 | not-wf-sa-182 182.xml   Internal parsed entities must match the content  production to be well formed. 
63 | not-wf-sa-183 183.xml   Mixed content declarations may not include content particles.
64 | not-wf-sa-184 184.xml   In mixed content models, element names must not be  parenthesized. 
65 | not-wf-not-sa-001 001.xml   Conditional sections must be properly terminated ("]>" used  instead of "]]>"). 
66 | not-wf-not-sa-002 002.xml   Processing instruction target names may not be "XML"  in any combination of cases. 
67 | not-wf-not-sa-003 003.xml   Conditional sections must be properly terminated ("]]>" omitted). 
68 | not-wf-not-sa-004 004.xml   Conditional sections must be properly terminated ("]]>" omitted). 
69 | not-wf-not-sa-005 005.xml   Tests the Entity Declared VC by referring to an  undefined parameter entity within an external entity.
70 | not-wf-not-sa-006 006.xml   Conditional sections need a '[' after the INCLUDE or IGNORE. 
71 | not-wf-not-sa-007 007.xml   A <!DOCTYPE ...> declaration may not begin any external  entity; it's only found once, in the document entity.
72 | not-wf-not-sa-008 008.xml   In DTDs, the '%' character must be part of a parameter  entity reference.
73 | not-wf-not-sa-009 009.xml   This test violates WFC:PE Between Declarations in Production 28a.  The last character of a markup declaration is not contained in the same  parameter-entity text replacement.
74 | not-wf-ext-sa-001 001.xml   Tests the No Recursion WFC by having an external general  entity be self-recursive.
75 | not-wf-ext-sa-002 002.xml   External entities have "text declarations", which do  not permit the "standalone=..." attribute that's allowed  in XML declarations.
76 | not-wf-ext-sa-003 003.xml   Only one text declaration is permitted; a second one  looks like an illegal processing instruction (target names  of "xml" in any case are not allowed). 
77 | valid-sa-012 012.xml   Uses a legal XML 1.0 name consisting of a single colon  character (disallowed by the latest XML Namespaces draft).; 5:7 Qualified name is invalid: :
78 | valid-not-sa-031 031.xml   Expands a general entity which contains a CDATA section with  what looks like a markup declaration (but is just text since  it's in a CDATA section).; 2:8 Unexpected entity: e
79 | 


--------------------------------------------------------------------------------
/tests/xmlts20130923.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kornelski/xml-rs/c4a9653d177d7cf98885764fc69a199768b6e009/tests/xmlts20130923.zip


--------------------------------------------------------------------------------