├── html5ever ├── fuzz │ ├── .gitignore │ ├── Cargo.toml │ └── fuzz_targets │ │ └── fuzz_document_parse.rs ├── data │ └── bench │ │ ├── tiny-fragment.html │ │ ├── small-fragment.html │ │ ├── strong.html │ │ ├── lipsum-zh.html │ │ └── medium-fragment.html ├── Cargo.toml ├── src │ ├── lib.rs │ ├── macros.rs │ ├── util │ │ └── str.rs │ ├── tree_builder │ │ ├── types.rs │ │ ├── tag_sets.rs │ │ └── data.rs │ ├── tokenizer │ │ ├── states.rs │ │ └── interface.rs │ └── driver.rs ├── LICENSE-MIT ├── examples │ ├── noop-tokenize.rs │ ├── capi │ │ └── tokenize.c │ ├── tokenize.rs │ ├── noop-tree-builder.rs │ └── print-tree-actions.rs └── benches │ └── html5ever.rs ├── xml5ever ├── fuzz │ ├── .gitignore │ ├── Cargo.toml │ └── fuzz_targets │ │ └── fuzz_document_parse.rs ├── examples │ ├── example.xml │ ├── simple_xml_tokenizer.rs │ ├── xml_tokenizer.rs │ └── README.md ├── src │ ├── macros.rs │ ├── tree_builder │ │ └── types.rs │ ├── lib.rs │ ├── tokenizer │ │ ├── qname.rs │ │ ├── interface.rs │ │ └── states.rs │ ├── driver.rs │ └── serialize │ │ └── mod.rs ├── Cargo.toml ├── data │ └── bench │ │ └── strong.xml ├── LICENSE-MIT ├── benches │ └── xml5ever.rs └── README.md ├── rustfmt.toml ├── .gitignore ├── .gitmodules ├── rcdom ├── data │ └── test │ │ └── ignore ├── README.md ├── tests │ ├── html-driver.rs │ ├── util │ │ ├── find_tests.rs │ │ └── runner.rs │ ├── foreach_html5lib_test │ │ └── mod.rs │ ├── xml-driver.rs │ ├── html-tree-sink.rs │ └── xml-tree-builder.rs ├── Cargo.toml ├── LICENSE-MIT ├── examples │ ├── hello_xml.rs │ ├── xml_tree_printer.rs │ ├── html2html.rs │ └── print-rcdom.rs └── custom-html5lib-tokenizer-tests │ └── regression.test ├── COPYRIGHT ├── markup5ever ├── Cargo.toml ├── LICENSE-MIT ├── lib.rs ├── util │ └── smallcharset.rs └── serialize.rs ├── .github ├── dependabot.yml └── workflows │ └── main.yml ├── web_atoms ├── Cargo.toml ├── LICENSE-MIT ├── lib.rs └── build.rs ├── AUTHORS ├── tendril ├── Cargo.toml ├── LICENSE-MIT ├── src │ ├── lib.rs │ ├── util.rs │ ├── buf32.rs │ └── utf8_decode.rs ├── benches │ ├── futf.rs │ └── tendril.rs ├── README.md └── examples │ └── fuzz.rs ├── LICENSE-MIT ├── Cargo.toml ├── RELEASING.MD └── README.md /html5ever/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | -------------------------------------------------------------------------------- /xml5ever/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | -------------------------------------------------------------------------------- /html5ever/data/bench/tiny-fragment.html: -------------------------------------------------------------------------------- 1 |
Hello, world!
2 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | match_block_trailing_comma = true 2 | reorder_imports = true 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /data/bench/uncommitted 2 | target 3 | .idea 4 | .vscode 5 | Cargo.lock 6 | *.racertmp 7 | -------------------------------------------------------------------------------- /xml5ever/examples/example.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |In July 1992, the X/Open committee XoJIG was looking for a better encoding. Dave Prosser of Unix System Laboratories
2 | submitted a proposal for one that had faster implementation
3 | characteristics and introduced the improvement that 7-bit ASCII
4 | characters would only represent themselves; all multibyte
5 | sequences would include only bytes where the high bit was set. This
6 | original proposal, FSS-UTF (File System Safe UCS Transformation Format),
7 | was similar in concept to UTF-8, but lacked the crucial property of self-synchronization.
8 |
--------------------------------------------------------------------------------
/xml5ever/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "xml5ever"
3 | authors = ["The xml5ever project developers"]
4 | description = "Push based streaming parser for XML."
5 | documentation = "https://docs.rs/xml5ever"
6 | homepage = "https://github.com/servo/html5ever/blob/main/xml5ever/README.md"
7 | readme = "README.md"
8 | keywords = ["xml", "xml5", "parser", "parsing"]
9 | exclude = ["xml5lib-tests/*"]
10 | categories = ["parser-implementations", "web-programming"]
11 | version.workspace = true
12 | license.workspace = true
13 | repository.workspace = true
14 | edition.workspace = true
15 | rust-version.workspace = true
16 |
17 | [features]
18 | trace_tokenizer = []
19 | serde = ["markup5ever/serde"]
20 |
21 | [dependencies]
22 | markup5ever = { workspace = true }
23 | log = { workspace = true }
24 |
25 | [dev-dependencies]
26 | criterion = { workspace = true }
27 |
28 | [[bench]]
29 | name = "xml5ever"
30 | harness = false
31 |
--------------------------------------------------------------------------------
/tendril/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "tendril"
3 | version = "0.4.3"
4 | description = "Compact buffer/string type for zero-copy parsing"
5 | authors = [
6 | "Keegan McAllister