├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-MIT ├── README.md ├── benches ├── bench.rs └── very_large.yml ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ ├── load.rs │ ├── parse.rs │ └── scan.rs ├── src ├── bin │ ├── run-emitter-test-suite.rs │ └── run-parser-test-suite.rs ├── document.rs ├── emitter.rs ├── error.rs ├── event.rs ├── lib.rs ├── macros.rs ├── parser.rs ├── reader.rs ├── scanner.rs └── token.rs └── tests ├── bin └── mod.rs ├── data ├── .gitignore ├── Cargo.toml ├── build.rs └── lib.rs ├── ignorelist ├── libyaml-emitter ├── libyaml-parser └── libyaml-parser-error ├── test_emitter.rs ├── test_parser.rs └── test_parser_error.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: simonask 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | schedule: [cron: "40 1 * * *"] 8 | 9 | permissions: 10 | contents: read 11 | 12 | env: 13 | RUSTFLAGS: -Dwarnings 14 | 15 | jobs: 16 | pre_ci: 17 | uses: dtolnay/.github/.github/workflows/pre_ci.yml@master 18 | 19 | test: 20 | name: Rust ${{matrix.rust}} 21 | needs: pre_ci 22 | if: needs.pre_ci.outputs.continue 23 | runs-on: ubuntu-latest 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | rust: [nightly, beta, stable, 1.75.0] 28 | timeout-minutes: 45 29 | steps: 30 | - uses: actions/checkout@v4 31 | - uses: dtolnay/rust-toolchain@master 32 | with: 33 | toolchain: ${{matrix.rust}} 34 | - name: Enable type layout randomization 35 | run: echo RUSTFLAGS=${RUSTFLAGS}\ -Zrandomize-layout >> $GITHUB_ENV 36 | if: matrix.rust == 'nightly' 37 | - run: cargo test 38 | 39 | msrv: 40 | name: Rust 1.70.0 41 | needs: pre_ci 42 | if: needs.pre_ci.outputs.continue 43 | runs-on: ubuntu-latest 44 | timeout-minutes: 45 45 | steps: 46 | - uses: actions/checkout@v4 47 | - uses: dtolnay/rust-toolchain@1.70.0 48 | - run: cargo check --lib 49 | 50 | doc: 51 | name: Documentation 52 | needs: pre_ci 53 | if: needs.pre_ci.outputs.continue 54 | runs-on: ubuntu-latest 55 | timeout-minutes: 45 56 | env: 57 | RUSTDOCFLAGS: -Dwarnings 58 | steps: 59 | - uses: actions/checkout@v4 60 | - uses: dtolnay/rust-toolchain@nightly 61 | - uses: dtolnay/install@cargo-docs-rs 62 | - run: cargo docs-rs 63 | 64 | clippy: 65 | name: Clippy 66 | runs-on: ubuntu-latest 67 | if: github.event_name != 'pull_request' 68 | timeout-minutes: 45 69 | steps: 70 | - uses: actions/checkout@v4 71 | - uses: dtolnay/rust-toolchain@clippy 72 | - run: cargo clippy --tests 73 | 74 | miri: 75 | name: Miri 76 | needs: pre_ci 77 | if: needs.pre_ci.outputs.continue 78 | runs-on: ubuntu-latest 79 | timeout-minutes: 45 80 | steps: 81 | - uses: actions/checkout@v4 82 | - uses: dtolnay/rust-toolchain@miri 83 | - run: cargo miri setup 84 | - run: cargo miri test 85 | env: 86 | MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance 87 | 88 | fuzz: 89 | name: Fuzz 90 | needs: pre_ci 91 | if: needs.pre_ci.outputs.continue 92 | runs-on: ubuntu-latest 93 | timeout-minutes: 45 94 | steps: 95 | - uses: actions/checkout@v4 96 | - uses: dtolnay/rust-toolchain@nightly 97 | - uses: dtolnay/install@cargo-fuzz 98 | - run: cargo fuzz check 99 | 100 | outdated: 101 | name: Outdated 102 | runs-on: ubuntu-latest 103 | if: github.event_name != 'pull_request' 104 | timeout-minutes: 45 105 | steps: 106 | - uses: actions/checkout@v4 107 | - uses: dtolnay/install@cargo-outdated 108 | - run: cargo outdated --workspace --exit-code 1 109 | - run: cargo outdated --manifest-path fuzz/Cargo.toml --exit-code 1 110 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build.rs 2 | /rust-toolchain 3 | target 4 | Cargo.lock 5 | /.vscode/launch.json 6 | /.vscode/settings.json 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.1.1 - 2024-02-11 4 | ### Added 5 | - Implement `PartialEq` and `Debug` for `Event` and `Token`. 6 | ### Bugfixes 7 | - Fix a bug where marks would not be correctly set for tokens and events. 8 | 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libyaml-safer" 3 | version = "0.1.1" 4 | authors = ["Simon Ask Ulsnes ](https://github.com/simonask/libyaml-safer) 5 | [crates.io](https://crates.io/crates/libyaml-safer) 6 | [docs.rs](https://docs.rs/libyaml-safer) 7 | [build status](https://github.com/simonask/libyaml-safer/actions?query=branch%3Amaster) 8 | 9 | This library is a fork of [unsafe-libyaml] translated to safe and idiomatic Rust. 10 | 11 | [unsafe-libyaml] is [libyaml] translated from C to unsafe Rust with the 12 | assistance of [c2rust]. 13 | 14 | [unsafe-libyaml]: https://github.com/dtolnay/unsafe-libyaml 15 | [libyaml]: https://github.com/yaml/libyaml/tree/2c891fc7a770e8ba2fec34fc6b545c672beb37e6 16 | [c2rust]: https://github.com/immunant/c2rust 17 | 18 | ```toml 19 | [dependencies] 20 | libyaml-safer = "0.1" 21 | ``` 22 | 23 | *Compiler support: requires rustc 1.70* 24 | 25 | ## Notes 26 | 27 | This library uses the same test suite as unsafe-libyaml, which is also the 28 | "official" test suite for libyaml. The library was ported line by line, function 29 | by function, from unsafe-libyaml, with the aim of precisely matching its 30 | behavior, including performance and allocation patterns. Any observable 31 | difference in behavior, outside of API differences due to Rust conventions, is 32 | considered a bug. 33 | 34 | One notable exception to the above is that this library uses the Rust standard 35 | library in place of custom routines where possible. For example, most UTF-8 and 36 | UTF-16 encoding and decoding is handled by the standard library, and 37 | input/output callbacks are replaced with the applicable `std::io::*` traits. Due 38 | to the use of `std::io`, this library cannot currently be `no_std`. 39 | 40 | Memory allocation patterns are generally preserved, except that standard library 41 | containers may overallocate buffers using different heuristics. 42 | 43 | In places where libyaml routines are replaced by the standard library, certain 44 | errors may be reported with reduced fidelity compared with libyaml (e.g., error 45 | messages may look slightly different), but the same inputs should generate the 46 | same general errors. 47 | 48 | ### Compatibility and interoperability 49 | 50 | While this library matches the behavior of libyaml, it is not intended as a 51 | drop-in replacement. The shape of the API is idiomatic Rust, and while it is 52 | possible to emulate the C API using this library, supporting this use case is 53 | not a priority. Use `unsafe-libyaml` if that is what you need. 54 | 55 | ### Performance 56 | 57 | Performance is largely on par with `unsafe-libyaml`. No significant effort has 58 | been put into optimizing this library, beyond just choosing the most 59 | straightforward ways to reasonably port concepts from the C-like code. 60 | 61 | See 62 | [`benches/bench.rs`](https://github.com/simonask/libyaml-safer/benches/bench.rs) 63 | for a very simple benchmark dealing with a very large (~700 KiB) YAML document. 64 | On my machine (Ryzen 9 3950X) the parser from this library is slightly slower 65 | and the emitter is slightly faster, but both within about ~1ms of their unsafe 66 | counterparts. Run `cargo bench` to test on your machine. 67 | 68 | If there is demand, there are clear paths forward to optimize the parser. For 69 | example, due to it being ported directly from unsafe C-like code doing pointer 70 | arithmetic, it performs a completely unreasonable number of bounds checks for 71 | each input byte. 72 | 73 | ## License 74 | 75 | MIT license, same as unsafe-libyaml and libyaml. 76 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | use std::mem::MaybeUninit; 2 | 3 | use criterion::{criterion_group, criterion_main, Criterion}; 4 | use libyaml_safer::{Document, Emitter, Parser}; 5 | use unsafe_libyaml::*; 6 | 7 | static VERY_LARGE_YAML: &[u8] = include_bytes!("very_large.yml"); 8 | 9 | pub fn parser(c: &mut Criterion) { 10 | c.bench_function("libyaml-safer parse large", |b| { 11 | // Note: Not using `iter_with_large_drop` because that would be unfair 12 | // to unsafe-libyaml, which needs a call to `yaml_document_delete`. 13 | b.iter(|| { 14 | let mut input = VERY_LARGE_YAML; 15 | let mut parser = Parser::new(); 16 | parser.set_input(&mut input); 17 | Document::load(&mut parser) 18 | }) 19 | }); 20 | 21 | c.bench_function("unsafe-libyaml parse large", |b| { 22 | b.iter(|| unsafe { 23 | let mut parser = MaybeUninit::zeroed(); 24 | if !yaml_parser_initialize(parser.as_mut_ptr()).ok { 25 | panic!("yaml_parser_initialize failed"); 26 | } 27 | let mut parser = parser.assume_init(); 28 | yaml_parser_set_input_string( 29 | &mut parser, 30 | VERY_LARGE_YAML.as_ptr(), 31 | VERY_LARGE_YAML.len() as _, 32 | ); 33 | let mut document = MaybeUninit::zeroed(); 34 | if !yaml_parser_load(&mut parser, document.as_mut_ptr()).ok { 35 | panic!("yaml_parser_load faled"); 36 | }; 37 | yaml_document_delete(document.as_mut_ptr()); 38 | yaml_parser_delete(&mut parser); 39 | }) 40 | }); 41 | 42 | c.bench_function("libyaml-safer emit large", |b| { 43 | // output shouldn't be much larger than the input, but just to be safe... 44 | let mut buffer = Vec::with_capacity(VERY_LARGE_YAML.len()); 45 | 46 | let doc = { 47 | let mut parser = Parser::new(); 48 | let mut input = VERY_LARGE_YAML; 49 | parser.set_input(&mut input); 50 | Document::load(&mut parser).unwrap() 51 | }; 52 | 53 | b.iter_custom(|iters| { 54 | let mut measurement = std::time::Duration::ZERO; 55 | for _ in 0..iters { 56 | let doc = doc.clone(); 57 | let start_time = std::time::Instant::now(); 58 | let mut emitter = Emitter::new(); 59 | emitter.set_output(&mut buffer); 60 | doc.dump(&mut emitter).unwrap(); 61 | measurement += start_time.elapsed(); 62 | } 63 | measurement 64 | }); 65 | }); 66 | 67 | c.bench_function("unsafe-libyaml emit large", |b| { 68 | // output shouldn't be much larger than the input, but just to be safe... 69 | let mut buffer = vec![0; VERY_LARGE_YAML.len() * 2]; 70 | 71 | // `yaml_document_t` cannot be cloned, so we have to parse it every iteration unfortunately. 72 | let read_doc = || unsafe { 73 | let mut parser = MaybeUninit::zeroed(); 74 | if !yaml_parser_initialize(parser.as_mut_ptr()).ok { 75 | panic!("yaml_parser_initialize failed"); 76 | } 77 | let mut parser = parser.assume_init(); 78 | yaml_parser_set_input_string( 79 | &mut parser, 80 | VERY_LARGE_YAML.as_ptr(), 81 | VERY_LARGE_YAML.len() as _, 82 | ); 83 | let mut document = MaybeUninit::zeroed(); 84 | if !yaml_parser_load(&mut parser, document.as_mut_ptr()).ok { 85 | panic!("yaml_parser_load faled"); 86 | }; 87 | yaml_parser_delete(&mut parser); 88 | document.assume_init() 89 | }; 90 | 91 | b.iter_custom(|iters| { 92 | let mut measurement = std::time::Duration::ZERO; 93 | for _ in 0..iters { 94 | unsafe { 95 | let mut doc = read_doc(); 96 | let start_time = std::time::Instant::now(); 97 | let mut emitter = MaybeUninit::zeroed(); 98 | if !yaml_emitter_initialize(emitter.as_mut_ptr()).ok { 99 | panic!("yaml_emitter_initialize failed"); 100 | } 101 | let mut emitter = emitter.assume_init(); 102 | let mut size_written = 0; 103 | yaml_emitter_set_output_string( 104 | &mut emitter, 105 | buffer.as_mut_ptr(), 106 | buffer.len() as _, 107 | &mut size_written, 108 | ); 109 | if !yaml_emitter_dump(&mut emitter, &mut doc).ok { 110 | panic!("yaml_emitter_dump failed"); 111 | } 112 | measurement += start_time.elapsed(); 113 | yaml_emitter_delete(&mut emitter); 114 | } 115 | } 116 | measurement 117 | }); 118 | }); 119 | } 120 | 121 | criterion_group!(benches, parser); 122 | criterion_main!(benches); 123 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | artifacts/ 2 | corpus/ 3 | coverage/ 4 | target/ 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unsafe-libyaml-fuzz" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | libfuzzer-sys = "0.4.7" 13 | libyaml-safer = { path = ".." } 14 | 15 | [[bin]] 16 | name = "scan" 17 | path = "fuzz_targets/scan.rs" 18 | test = false 19 | doc = false 20 | 21 | [[bin]] 22 | name = "parse" 23 | path = "fuzz_targets/parse.rs" 24 | test = false 25 | doc = false 26 | 27 | [[bin]] 28 | name = "load" 29 | path = "fuzz_targets/load.rs" 30 | test = false 31 | doc = false 32 | 33 | [workspace] 34 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/load.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use libyaml_safer::{Document, Parser}; 5 | 6 | fuzz_target!(|data: &[u8]| fuzz_target(data)); 7 | 8 | fn fuzz_target(mut data: &[u8]) { 9 | let mut parser = Parser::new(); 10 | parser.set_input(&mut data); 11 | 12 | while let Ok(mut document) = Document::load(&mut parser) { 13 | let done = document.get_root_node().is_none(); 14 | if done { 15 | break; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/parse.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use libyaml_safer::{EventData, Parser}; 5 | 6 | fuzz_target!(|data: &[u8]| fuzz_target(data)); 7 | 8 | fn fuzz_target(mut data: &[u8]) { 9 | let mut parser = Parser::new(); 10 | parser.set_input(&mut data); 11 | 12 | while let Ok(event) = parser.parse() { 13 | let is_end = matches!(event.data, EventData::StreamEnd); 14 | if is_end { 15 | break; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/scan.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use libyaml_safer::{Scanner, TokenData}; 5 | 6 | fuzz_target!(|data: &[u8]| fuzz_target(data)); 7 | 8 | fn fuzz_target(mut data: &[u8]) { 9 | let mut scanner = Scanner::new(); 10 | scanner.set_input(&mut data); 11 | 12 | while let Ok(token) = Scanner::scan(&mut scanner) { 13 | let is_end = matches!(token.data, TokenData::StreamEnd); 14 | if is_end { 15 | break; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/bin/run-emitter-test-suite.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::pedantic)] 2 | #![allow( 3 | clippy::cast_lossless, 4 | clippy::cast_possible_truncation, 5 | clippy::cast_possible_wrap, 6 | clippy::cast_sign_loss, 7 | clippy::items_after_statements, 8 | clippy::let_underscore_untyped, 9 | clippy::missing_errors_doc, 10 | clippy::missing_safety_doc, 11 | clippy::ptr_as_ptr, 12 | clippy::single_match_else, 13 | clippy::too_many_lines, 14 | clippy::unreadable_literal, 15 | clippy::manual_strip 16 | )] 17 | 18 | use libyaml_safer::{Emitter, Encoding, Event, MappingStyle, ScalarStyle, SequenceStyle}; 19 | use std::env; 20 | use std::error::Error; 21 | use std::fs::File; 22 | use std::io::{self, BufRead, Read, Write}; 23 | use std::process::ExitCode; 24 | 25 | pub(crate) fn test_main( 26 | stdin: &mut dyn Read, 27 | stdout: &mut dyn Write, 28 | ) -> Result<(), Box> { 29 | let mut emitter = Emitter::new(); 30 | 31 | emitter.set_output(stdout); 32 | emitter.set_canonical(false); 33 | emitter.set_unicode(false); 34 | 35 | let mut buf = std::io::BufReader::new(stdin); 36 | let mut line_buffer = String::with_capacity(1024); 37 | let mut value_buffer = String::with_capacity(128); 38 | 39 | let result = loop { 40 | line_buffer.clear(); 41 | let n = buf.read_line(&mut line_buffer)?; 42 | if n == 0 { 43 | break Ok(()); 44 | } 45 | let line = line_buffer.strip_suffix('\n').unwrap_or(&line_buffer); 46 | 47 | let event = if line.starts_with("+STR") { 48 | Event::stream_start(Encoding::Utf8) 49 | } else if line.starts_with("-STR") { 50 | Event::stream_end() 51 | } else if line.starts_with("+DOC") { 52 | let implicit = !line[4..].starts_with(" ---"); 53 | Event::document_start(None, &[], implicit) 54 | } else if line.starts_with("-DOC") { 55 | let implicit = !line[4..].starts_with(" ..."); 56 | Event::document_end(implicit) 57 | } else if line.starts_with("+MAP") { 58 | Event::mapping_start( 59 | get_anchor('&', line), 60 | get_tag(line), 61 | false, 62 | MappingStyle::Block, 63 | ) 64 | } else if line.starts_with("-MAP") { 65 | Event::mapping_end() 66 | } else if line.starts_with("+SEQ") { 67 | Event::sequence_start( 68 | get_anchor('&', line), 69 | get_tag(line), 70 | false, 71 | SequenceStyle::Block, 72 | ) 73 | } else if line.starts_with("-SEQ") { 74 | Event::sequence_end() 75 | } else if line.starts_with("=VAL") { 76 | let mut style = ScalarStyle::Any; 77 | let value = get_value(line, &mut value_buffer, &mut style); 78 | let implicit = get_tag(line).is_none(); 79 | Event::scalar( 80 | get_anchor('&', line), 81 | get_tag(line), 82 | value, 83 | implicit, 84 | implicit, 85 | style, 86 | ) 87 | } else if line.starts_with("=ALI") { 88 | Event::alias(get_anchor('*', line).expect("no alias name")) 89 | } else { 90 | break Err(format!("Unknown event: '{line}'").into()); 91 | }; 92 | 93 | if let Err(err) = emitter.emit(event) { 94 | break Err(err.into()); 95 | } 96 | }; 97 | 98 | result 99 | } 100 | 101 | fn get_anchor(sigil: char, line: &str) -> Option<&str> { 102 | let (_, from_sigil) = line.split_once(sigil)?; 103 | if let Some((until_space, _tail)) = from_sigil.split_once(' ') { 104 | Some(until_space) 105 | } else if !from_sigil.is_empty() { 106 | Some(from_sigil) 107 | } else { 108 | None 109 | } 110 | } 111 | 112 | fn get_tag(line: &str) -> Option<&str> { 113 | let (_, from_angle_open) = line.split_once('<')?; 114 | let (until_angle_close, _) = from_angle_open.split_once('>')?; 115 | Some(until_angle_close) 116 | } 117 | 118 | fn get_value<'a>(line: &str, buffer: &'a mut String, style: &mut ScalarStyle) -> &'a str { 119 | let mut remainder = line; 120 | let value = loop { 121 | let Some((_before, tail)) = remainder.split_once(' ') else { 122 | panic!("invalid line: {line}"); 123 | }; 124 | 125 | *style = match tail.chars().next().expect("string should not be empty") { 126 | ':' => ScalarStyle::Plain, 127 | '\'' => ScalarStyle::SingleQuoted, 128 | '"' => ScalarStyle::DoubleQuoted, 129 | '|' => ScalarStyle::Literal, 130 | '>' => ScalarStyle::Folded, 131 | _ => { 132 | // This was an anchor, move to the next space. 133 | remainder = tail; 134 | continue; 135 | } 136 | }; 137 | break &tail[1..]; 138 | }; 139 | 140 | buffer.clear(); 141 | // Unescape the value 142 | let mut chars = value.chars(); 143 | while let Some(ch) = chars.next() { 144 | if ch == '\\' { 145 | buffer.push(match chars.next().expect("unterminated escape sequence") { 146 | '\\' => '\\', 147 | '0' => '\0', 148 | 'b' => '\x08', 149 | 'n' => '\n', 150 | 'r' => '\r', 151 | 't' => '\t', 152 | otherwise => panic!("invalid escape character: {otherwise:?}"), 153 | }); 154 | } else { 155 | buffer.push(ch); 156 | } 157 | } 158 | 159 | &*buffer 160 | } 161 | 162 | fn main() -> ExitCode { 163 | let args = env::args_os().skip(1); 164 | if args.len() == 0 { 165 | let _ = writeln!( 166 | io::stderr(), 167 | "Usage: run-emitter-test-suite ...", 168 | ); 169 | return ExitCode::FAILURE; 170 | } 171 | for arg in args { 172 | let mut stdin = File::open(arg).unwrap(); 173 | let mut stdout = io::stdout(); 174 | let result = test_main(&mut stdin, &mut stdout); 175 | if let Err(err) = result { 176 | let _ = writeln!(io::stderr(), "{err}"); 177 | return ExitCode::FAILURE; 178 | } 179 | } 180 | ExitCode::SUCCESS 181 | } 182 | -------------------------------------------------------------------------------- /src/bin/run-parser-test-suite.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::pedantic)] 2 | #![allow( 3 | clippy::cast_lossless, 4 | clippy::cast_possible_truncation, 5 | clippy::cast_possible_wrap, 6 | clippy::cast_sign_loss, 7 | clippy::items_after_statements, 8 | clippy::let_underscore_untyped, 9 | clippy::missing_errors_doc, 10 | clippy::missing_safety_doc, 11 | clippy::too_many_lines 12 | )] 13 | 14 | use libyaml_safer::{EventData, Parser, ScalarStyle}; 15 | use std::env; 16 | use std::error::Error; 17 | use std::fs::File; 18 | use std::io::{self, Read, Write}; 19 | use std::process::{self, ExitCode}; 20 | use std::slice; 21 | 22 | pub(crate) fn test_main( 23 | stdin: &mut dyn Read, 24 | stdout: &mut dyn Write, 25 | ) -> Result<(), Box> { 26 | let mut parser = Parser::new(); 27 | 28 | let mut stdin = std::io::BufReader::new(stdin); 29 | parser.set_input(&mut stdin); 30 | 31 | loop { 32 | let event = match parser.parse() { 33 | Err(err) => { 34 | let error = format!("Parse error: {err}"); 35 | return Err(error.into()); 36 | } 37 | Ok(event) => event, 38 | }; 39 | 40 | let mut is_end = false; 41 | 42 | match &event.data { 43 | EventData::StreamStart { .. } => { 44 | _ = writeln!(stdout, "+STR"); 45 | } 46 | EventData::StreamEnd => { 47 | is_end = true; 48 | _ = writeln!(stdout, "-STR"); 49 | } 50 | EventData::DocumentStart { implicit, .. } => { 51 | _ = write!(stdout, "+DOC"); 52 | if !implicit { 53 | _ = write!(stdout, " ---"); 54 | } 55 | _ = writeln!(stdout); 56 | } 57 | EventData::DocumentEnd { implicit } => { 58 | _ = write!(stdout, "-DOC"); 59 | if !implicit { 60 | _ = write!(stdout, " ..."); 61 | } 62 | _ = writeln!(stdout); 63 | } 64 | EventData::Alias { anchor } => { 65 | _ = writeln!(stdout, "=ALI *{anchor}"); 66 | } 67 | EventData::Scalar { 68 | anchor, 69 | tag, 70 | value, 71 | style, 72 | .. 73 | } => { 74 | let _ = write!(stdout, "=VAL"); 75 | if let Some(anchor) = anchor { 76 | _ = write!(stdout, " &{anchor}"); 77 | } 78 | if let Some(tag) = tag { 79 | _ = write!(stdout, " <{tag}>"); 80 | } 81 | _ = stdout.write_all(match style { 82 | ScalarStyle::Plain => b" :", 83 | ScalarStyle::SingleQuoted => b" '", 84 | ScalarStyle::DoubleQuoted => b" \"", 85 | ScalarStyle::Literal => b" |", 86 | ScalarStyle::Folded => b" >", 87 | _ => process::abort(), 88 | }); 89 | print_escaped(stdout, value); 90 | _ = writeln!(stdout); 91 | } 92 | EventData::SequenceStart { anchor, tag, .. } => { 93 | let _ = write!(stdout, "+SEQ"); 94 | if let Some(anchor) = anchor { 95 | _ = write!(stdout, " &{anchor}"); 96 | } 97 | if let Some(tag) = tag { 98 | _ = write!(stdout, " <{tag}>"); 99 | } 100 | _ = writeln!(stdout); 101 | } 102 | EventData::SequenceEnd => { 103 | _ = writeln!(stdout, "-SEQ"); 104 | } 105 | EventData::MappingStart { anchor, tag, .. } => { 106 | let _ = write!(stdout, "+MAP"); 107 | if let Some(anchor) = anchor { 108 | _ = write!(stdout, " &{anchor}"); 109 | } 110 | if let Some(tag) = tag { 111 | _ = write!(stdout, " <{tag}>"); 112 | } 113 | _ = writeln!(stdout); 114 | } 115 | EventData::MappingEnd => { 116 | _ = writeln!(stdout, "-MAP"); 117 | } 118 | } 119 | 120 | if is_end { 121 | break; 122 | } 123 | } 124 | Ok(()) 125 | } 126 | 127 | fn print_escaped(stdout: &mut dyn Write, s: &str) { 128 | for ch in s.bytes() { 129 | let repr = match &ch { 130 | b'\\' => b"\\\\", 131 | b'\0' => b"\\0", 132 | b'\x08' => b"\\b", 133 | b'\n' => b"\\n", 134 | b'\r' => b"\\r", 135 | b'\t' => b"\\t", 136 | c => slice::from_ref(c), 137 | }; 138 | let _ = stdout.write_all(repr); 139 | } 140 | } 141 | 142 | fn main() -> ExitCode { 143 | let args = env::args_os().skip(1); 144 | if args.len() == 0 { 145 | let _ = writeln!(io::stderr(), "Usage: run-parser-test-suite ..."); 146 | return ExitCode::FAILURE; 147 | } 148 | for arg in args { 149 | let mut stdin = File::open(arg).unwrap(); 150 | let mut stdout = io::stdout(); 151 | let result = test_main(&mut stdin, &mut stdout); 152 | if let Err(err) = result { 153 | let _ = writeln!(io::stderr(), "{err}"); 154 | return ExitCode::FAILURE; 155 | } 156 | } 157 | ExitCode::SUCCESS 158 | } 159 | -------------------------------------------------------------------------------- /src/document.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | AliasData, Anchors, Emitter, Error, Event, EventData, MappingStyle, Mark, Parser, Result, 3 | ScalarStyle, SequenceStyle, TagDirective, VersionDirective, DEFAULT_MAPPING_TAG, 4 | DEFAULT_SCALAR_TAG, DEFAULT_SEQUENCE_TAG, 5 | }; 6 | 7 | /// The document structure. 8 | #[derive(Clone, Debug)] 9 | #[non_exhaustive] 10 | pub struct Document { 11 | /// The document nodes. 12 | pub nodes: Vec, 13 | /// The version directive. 14 | pub version_directive: Option, 15 | /// The list of tag directives. 16 | pub tag_directives: Vec, 17 | /// Is the document start indicator implicit? 18 | pub start_implicit: bool, 19 | /// Is the document end indicator implicit? 20 | pub end_implicit: bool, 21 | /// The beginning of the document. 22 | pub start_mark: Mark, 23 | /// The end of the document. 24 | pub end_mark: Mark, 25 | } 26 | 27 | /// The node structure. 28 | #[derive(Clone, Default, Debug)] 29 | #[non_exhaustive] 30 | pub struct Node { 31 | /// The node type. 32 | pub data: NodeData, 33 | /// The node tag. 34 | pub tag: Option, 35 | /// The beginning of the node. 36 | pub start_mark: Mark, 37 | /// The end of the node. 38 | pub end_mark: Mark, 39 | } 40 | 41 | /// Node types. 42 | #[derive(Clone, Default, Debug)] 43 | pub enum NodeData { 44 | /// An empty node. 45 | #[default] 46 | NoNode, 47 | /// A scalar node. 48 | Scalar { 49 | /// The scalar value. 50 | value: String, 51 | /// The scalar style. 52 | style: ScalarStyle, 53 | }, 54 | /// A sequence node. 55 | Sequence { 56 | /// The stack of sequence items. 57 | items: Vec, 58 | /// The sequence style. 59 | style: SequenceStyle, 60 | }, 61 | /// A mapping node. 62 | Mapping { 63 | /// The stack of mapping pairs (key, value). 64 | pairs: Vec, 65 | /// The mapping style. 66 | style: MappingStyle, 67 | }, 68 | } 69 | 70 | /// An element of a sequence node. 71 | pub type NodeItem = i32; 72 | 73 | /// An element of a mapping node. 74 | #[derive(Copy, Clone, Default, Debug)] 75 | #[non_exhaustive] 76 | pub struct NodePair { 77 | /// The key of the element. 78 | pub key: i32, 79 | /// The value of the element. 80 | pub value: i32, 81 | } 82 | 83 | impl Document { 84 | /// Create a YAML document. 85 | pub fn new( 86 | version_directive: Option, 87 | tag_directives_in: &[TagDirective], 88 | start_implicit: bool, 89 | end_implicit: bool, 90 | ) -> Document { 91 | let nodes = Vec::with_capacity(16); 92 | let tag_directives = tag_directives_in.to_vec(); 93 | 94 | Document { 95 | nodes, 96 | version_directive, 97 | tag_directives, 98 | start_implicit, 99 | end_implicit, 100 | start_mark: Mark::default(), 101 | end_mark: Mark::default(), 102 | } 103 | } 104 | 105 | /// Get a node of a YAML document. 106 | /// 107 | /// Returns the node object or `None` if `index` is out of range. 108 | pub fn get_node_mut(&mut self, index: i32) -> Option<&mut Node> { 109 | self.nodes.get_mut(index as usize - 1) 110 | } 111 | 112 | /// Get a node of a YAML document. 113 | /// 114 | /// Returns the node object or `None` if `index` is out of range. 115 | pub fn get_node(&self, index: i32) -> Option<&Node> { 116 | self.nodes.get(index as usize - 1) 117 | } 118 | 119 | /// Get the root of a YAML document node. 120 | /// 121 | /// The root object is the first object added to the document. 122 | /// 123 | /// An empty document produced by the parser signifies the end of a YAML stream. 124 | /// 125 | /// Returns the node object or `None` if the document is empty. 126 | pub fn get_root_node(&mut self) -> Option<&mut Node> { 127 | self.nodes.get_mut(0) 128 | } 129 | 130 | /// Create a SCALAR node and attach it to the document. 131 | /// 132 | /// The `style` argument may be ignored by the emitter. 133 | /// 134 | /// Returns the node id or 0 on error. 135 | #[must_use] 136 | pub fn add_scalar(&mut self, tag: Option<&str>, value: &str, style: ScalarStyle) -> i32 { 137 | let mark = Mark { 138 | index: 0_u64, 139 | line: 0_u64, 140 | column: 0_u64, 141 | }; 142 | let tag = tag.unwrap_or(DEFAULT_SCALAR_TAG); 143 | let tag_copy = String::from(tag); 144 | let value_copy = String::from(value); 145 | let node = Node { 146 | data: NodeData::Scalar { 147 | value: value_copy, 148 | style, 149 | }, 150 | tag: Some(tag_copy), 151 | start_mark: mark, 152 | end_mark: mark, 153 | }; 154 | self.nodes.push(node); 155 | self.nodes.len() as i32 156 | } 157 | 158 | /// Create a SEQUENCE node and attach it to the document. 159 | /// 160 | /// The `style` argument may be ignored by the emitter. 161 | /// 162 | /// Returns the node id, which is a nonzero integer. 163 | #[must_use] 164 | pub fn add_sequence(&mut self, tag: Option<&str>, style: SequenceStyle) -> i32 { 165 | let mark = Mark { 166 | index: 0_u64, 167 | line: 0_u64, 168 | column: 0_u64, 169 | }; 170 | 171 | let items = Vec::with_capacity(16); 172 | let tag = tag.unwrap_or(DEFAULT_SEQUENCE_TAG); 173 | let tag_copy = String::from(tag); 174 | let node = Node { 175 | data: NodeData::Sequence { items, style }, 176 | tag: Some(tag_copy), 177 | start_mark: mark, 178 | end_mark: mark, 179 | }; 180 | self.nodes.push(node); 181 | self.nodes.len() as i32 182 | } 183 | 184 | /// Create a MAPPING node and attach it to the document. 185 | /// 186 | /// The `style` argument may be ignored by the emitter. 187 | /// 188 | /// Returns the node id, which is a nonzero integer. 189 | #[must_use] 190 | pub fn add_mapping(&mut self, tag: Option<&str>, style: MappingStyle) -> i32 { 191 | let mark = Mark { 192 | index: 0_u64, 193 | line: 0_u64, 194 | column: 0_u64, 195 | }; 196 | let pairs = Vec::with_capacity(16); 197 | let tag = tag.unwrap_or(DEFAULT_MAPPING_TAG); 198 | let tag_copy = String::from(tag); 199 | 200 | let node = Node { 201 | data: NodeData::Mapping { pairs, style }, 202 | tag: Some(tag_copy), 203 | start_mark: mark, 204 | end_mark: mark, 205 | }; 206 | 207 | self.nodes.push(node); 208 | self.nodes.len() as i32 209 | } 210 | 211 | /// Add an item to a SEQUENCE node. 212 | pub fn append_sequence_item(&mut self, sequence: i32, item: i32) { 213 | assert!(sequence > 0 && sequence as usize - 1 < self.nodes.len()); 214 | assert!(matches!( 215 | &self.nodes[sequence as usize - 1].data, 216 | NodeData::Sequence { .. } 217 | )); 218 | assert!(item > 0 && item as usize - 1 < self.nodes.len()); 219 | if let NodeData::Sequence { ref mut items, .. } = 220 | &mut self.nodes[sequence as usize - 1].data 221 | { 222 | items.push(item); 223 | } 224 | } 225 | 226 | /// Add a pair of a key and a value to a MAPPING node. 227 | pub fn yaml_document_append_mapping_pair(&mut self, mapping: i32, key: i32, value: i32) { 228 | assert!(mapping > 0 && mapping as usize - 1 < self.nodes.len()); 229 | assert!(matches!( 230 | &self.nodes[mapping as usize - 1].data, 231 | NodeData::Mapping { .. } 232 | )); 233 | assert!(key > 0 && key as usize - 1 < self.nodes.len()); 234 | assert!(value > 0 && value as usize - 1 < self.nodes.len()); 235 | let pair = NodePair { key, value }; 236 | if let NodeData::Mapping { ref mut pairs, .. } = &mut self.nodes[mapping as usize - 1].data 237 | { 238 | pairs.push(pair); 239 | } 240 | } 241 | 242 | /// Parse the input stream and produce the next YAML document. 243 | /// 244 | /// Call this function subsequently to produce a sequence of documents 245 | /// constituting the input stream. 246 | /// 247 | /// If the produced document has no root node, it means that the document 248 | /// end has been reached. 249 | /// 250 | /// An application must not alternate the calls of [`Document::load()`] with 251 | /// the calls of [`Parser::parse()`]. Doing this will break the parser. 252 | pub fn load(parser: &mut Parser) -> Result { 253 | let mut document = Document::new(None, &[], false, false); 254 | document.nodes.reserve(16); 255 | 256 | if !parser.scanner.stream_start_produced { 257 | match parser.parse() { 258 | Ok(Event { 259 | data: EventData::StreamStart { .. }, 260 | .. 261 | }) => (), 262 | Ok(_) => panic!("expected stream start"), 263 | Err(err) => { 264 | parser.delete_aliases(); 265 | return Err(err); 266 | } 267 | } 268 | } 269 | if parser.scanner.stream_end_produced { 270 | return Ok(document); 271 | } 272 | let err: Error; 273 | match parser.parse() { 274 | Ok(event) => { 275 | if let EventData::StreamEnd = &event.data { 276 | return Ok(document); 277 | } 278 | parser.aliases.reserve(16); 279 | match document.load_document(parser, event) { 280 | Ok(()) => { 281 | parser.delete_aliases(); 282 | return Ok(document); 283 | } 284 | Err(e) => err = e, 285 | } 286 | } 287 | Err(e) => err = e, 288 | } 289 | parser.delete_aliases(); 290 | Err(err) 291 | } 292 | 293 | fn load_document(&mut self, parser: &mut Parser, event: Event) -> Result<()> { 294 | let mut ctx = vec![]; 295 | if let EventData::DocumentStart { 296 | version_directive, 297 | tag_directives, 298 | implicit, 299 | } = event.data 300 | { 301 | self.version_directive = version_directive; 302 | self.tag_directives = tag_directives; 303 | self.start_implicit = implicit; 304 | self.start_mark = event.start_mark; 305 | ctx.reserve(16); 306 | if let Err(err) = self.load_nodes(parser, &mut ctx) { 307 | ctx.clear(); 308 | return Err(err); 309 | } 310 | ctx.clear(); 311 | Ok(()) 312 | } else { 313 | panic!("Expected YAML_DOCUMENT_START_EVENT") 314 | } 315 | } 316 | 317 | fn load_nodes(&mut self, parser: &mut Parser, ctx: &mut Vec) -> Result<()> { 318 | let end_implicit; 319 | let end_mark; 320 | 321 | loop { 322 | let event = parser.parse()?; 323 | match event.data { 324 | EventData::StreamStart { .. } => panic!("unexpected stream start event"), 325 | EventData::StreamEnd => panic!("unexpected stream end event"), 326 | EventData::DocumentStart { .. } => panic!("unexpected document start event"), 327 | EventData::DocumentEnd { implicit } => { 328 | end_implicit = implicit; 329 | end_mark = event.end_mark; 330 | break; 331 | } 332 | EventData::Alias { .. } => { 333 | self.load_alias(parser, event, ctx)?; 334 | } 335 | EventData::Scalar { .. } => { 336 | self.load_scalar(parser, event, ctx)?; 337 | } 338 | EventData::SequenceStart { .. } => { 339 | self.load_sequence(parser, event, ctx)?; 340 | } 341 | EventData::SequenceEnd => { 342 | self.load_sequence_end(event, ctx)?; 343 | } 344 | EventData::MappingStart { .. } => { 345 | self.load_mapping(parser, event, ctx)?; 346 | } 347 | EventData::MappingEnd => { 348 | self.load_mapping_end(event, ctx)?; 349 | } 350 | } 351 | } 352 | self.end_implicit = end_implicit; 353 | self.end_mark = end_mark; 354 | Ok(()) 355 | } 356 | 357 | fn register_anchor( 358 | &mut self, 359 | parser: &mut Parser, 360 | index: i32, 361 | anchor: Option, 362 | ) -> Result<()> { 363 | let Some(anchor) = anchor else { 364 | return Ok(()); 365 | }; 366 | let data = AliasData { 367 | anchor, 368 | index, 369 | mark: self.nodes[index as usize - 1].start_mark, 370 | }; 371 | for alias_data in &parser.aliases { 372 | if alias_data.anchor == data.anchor { 373 | return Err(Error::composer( 374 | "found duplicate anchor; first occurrence", 375 | alias_data.mark, 376 | "second occurrence", 377 | data.mark, 378 | )); 379 | } 380 | } 381 | parser.aliases.push(data); 382 | Ok(()) 383 | } 384 | 385 | fn load_node_add(&mut self, ctx: &[i32], index: i32) -> Result<()> { 386 | let Some(parent_index) = ctx.last() else { 387 | return Ok(()); 388 | }; 389 | let parent_index = *parent_index; 390 | let parent = &mut self.nodes[parent_index as usize - 1]; 391 | match parent.data { 392 | NodeData::Sequence { ref mut items, .. } => { 393 | items.push(index); 394 | } 395 | NodeData::Mapping { ref mut pairs, .. } => match pairs.last_mut() { 396 | // If the last pair does not have a value, set `index` as the value. 397 | Some(pair @ NodePair { value: 0, .. }) => { 398 | pair.value = index; 399 | } 400 | // Otherwise push a new pair where `index` is the key. 401 | _ => pairs.push(NodePair { 402 | key: index, 403 | value: 0, 404 | }), 405 | }, 406 | _ => { 407 | panic!("document parent node is not a sequence or a mapping") 408 | } 409 | } 410 | Ok(()) 411 | } 412 | 413 | fn load_alias(&mut self, parser: &mut Parser, event: Event, ctx: &[i32]) -> Result<()> { 414 | let EventData::Alias { anchor } = &event.data else { 415 | unreachable!() 416 | }; 417 | 418 | for alias_data in &parser.aliases { 419 | if alias_data.anchor == *anchor { 420 | return self.load_node_add(ctx, alias_data.index); 421 | } 422 | } 423 | 424 | Err(Error::composer( 425 | "", 426 | Mark::default(), 427 | "found undefined alias", 428 | event.start_mark, 429 | )) 430 | } 431 | 432 | fn load_scalar(&mut self, parser: &mut Parser, event: Event, ctx: &[i32]) -> Result<()> { 433 | let EventData::Scalar { 434 | mut tag, 435 | value, 436 | style, 437 | anchor, 438 | .. 439 | } = event.data 440 | else { 441 | unreachable!() 442 | }; 443 | 444 | if tag.is_none() || tag.as_deref() == Some("!") { 445 | tag = Some(String::from(DEFAULT_SCALAR_TAG)); 446 | } 447 | let node = Node { 448 | data: NodeData::Scalar { value, style }, 449 | tag, 450 | start_mark: event.start_mark, 451 | end_mark: event.end_mark, 452 | }; 453 | self.nodes.push(node); 454 | let index: i32 = self.nodes.len() as i32; 455 | self.register_anchor(parser, index, anchor)?; 456 | self.load_node_add(ctx, index) 457 | } 458 | 459 | fn load_sequence( 460 | &mut self, 461 | parser: &mut Parser, 462 | event: Event, 463 | ctx: &mut Vec, 464 | ) -> Result<()> { 465 | let EventData::SequenceStart { 466 | anchor, 467 | mut tag, 468 | style, 469 | .. 470 | } = event.data 471 | else { 472 | unreachable!() 473 | }; 474 | 475 | let mut items = Vec::with_capacity(16); 476 | 477 | if tag.is_none() || tag.as_deref() == Some("!") { 478 | tag = Some(String::from(DEFAULT_SEQUENCE_TAG)); 479 | } 480 | 481 | let node = Node { 482 | data: NodeData::Sequence { 483 | items: core::mem::take(&mut items), 484 | style, 485 | }, 486 | tag, 487 | start_mark: event.start_mark, 488 | end_mark: event.end_mark, 489 | }; 490 | 491 | self.nodes.push(node); 492 | let index: i32 = self.nodes.len() as i32; 493 | self.register_anchor(parser, index, anchor)?; 494 | self.load_node_add(ctx, index)?; 495 | ctx.push(index); 496 | Ok(()) 497 | } 498 | 499 | fn load_sequence_end(&mut self, event: Event, ctx: &mut Vec) -> Result<()> { 500 | let Some(index) = ctx.last().copied() else { 501 | panic!("sequence_end without a current sequence") 502 | }; 503 | assert!(matches!( 504 | self.nodes[index as usize - 1].data, 505 | NodeData::Sequence { .. } 506 | )); 507 | self.nodes[index as usize - 1].end_mark = event.end_mark; 508 | ctx.pop(); 509 | Ok(()) 510 | } 511 | 512 | fn load_mapping( 513 | &mut self, 514 | parser: &mut Parser, 515 | event: Event, 516 | ctx: &mut Vec, 517 | ) -> Result<()> { 518 | let EventData::MappingStart { 519 | anchor, 520 | mut tag, 521 | style, 522 | .. 523 | } = event.data 524 | else { 525 | unreachable!() 526 | }; 527 | 528 | let mut pairs = Vec::with_capacity(16); 529 | 530 | if tag.is_none() || tag.as_deref() == Some("!") { 531 | tag = Some(String::from(DEFAULT_MAPPING_TAG)); 532 | } 533 | let node = Node { 534 | data: NodeData::Mapping { 535 | pairs: core::mem::take(&mut pairs), 536 | style, 537 | }, 538 | tag, 539 | start_mark: event.start_mark, 540 | end_mark: event.end_mark, 541 | }; 542 | self.nodes.push(node); 543 | let index: i32 = self.nodes.len() as i32; 544 | self.register_anchor(parser, index, anchor)?; 545 | self.load_node_add(ctx, index)?; 546 | ctx.push(index); 547 | Ok(()) 548 | } 549 | 550 | fn load_mapping_end(&mut self, event: Event, ctx: &mut Vec) -> Result<()> { 551 | let Some(index) = ctx.last().copied() else { 552 | panic!("mapping_end without a current mapping") 553 | }; 554 | assert!(matches!( 555 | self.nodes[index as usize - 1].data, 556 | NodeData::Mapping { .. } 557 | )); 558 | self.nodes[index as usize - 1].end_mark = event.end_mark; 559 | ctx.pop(); 560 | Ok(()) 561 | } 562 | 563 | /// Emit a YAML document. 564 | /// 565 | /// The document object may be generated using the [`Document::load()`] 566 | /// function or the [`Document::new()`] function. 567 | pub fn dump(mut self, emitter: &mut Emitter) -> Result<()> { 568 | if !emitter.opened { 569 | if let Err(err) = emitter.open() { 570 | emitter.reset_anchors(); 571 | return Err(err); 572 | } 573 | } 574 | if self.nodes.is_empty() { 575 | // TODO: Do we really want to close the emitter just because the 576 | // document contains no nodes? Isn't it OK to emit multiple documents in 577 | // the same stream? 578 | emitter.close()?; 579 | } else { 580 | assert!(emitter.opened); 581 | emitter.anchors = vec![Anchors::default(); self.nodes.len()]; 582 | let event = Event::new(EventData::DocumentStart { 583 | version_directive: self.version_directive, 584 | tag_directives: core::mem::take(&mut self.tag_directives), 585 | implicit: self.start_implicit, 586 | }); 587 | emitter.emit(event)?; 588 | self.anchor_node(emitter, 1); 589 | self.dump_node(emitter, 1)?; 590 | let event = Event::document_end(self.end_implicit); 591 | emitter.emit(event)?; 592 | } 593 | 594 | emitter.reset_anchors(); 595 | Ok(()) 596 | } 597 | 598 | fn anchor_node(&self, emitter: &mut Emitter, index: i32) { 599 | let node = &self.nodes[index as usize - 1]; 600 | emitter.anchors[index as usize - 1].references += 1; 601 | if emitter.anchors[index as usize - 1].references == 1 { 602 | match &node.data { 603 | NodeData::Sequence { items, .. } => { 604 | for item in items { 605 | emitter.anchor_node_sub(*item); 606 | } 607 | } 608 | NodeData::Mapping { pairs, .. } => { 609 | for pair in pairs { 610 | emitter.anchor_node_sub(pair.key); 611 | emitter.anchor_node_sub(pair.value); 612 | } 613 | } 614 | _ => {} 615 | } 616 | } else if emitter.anchors[index as usize - 1].references == 2 { 617 | emitter.last_anchor_id += 1; 618 | emitter.anchors[index as usize - 1].anchor = emitter.last_anchor_id; 619 | } 620 | } 621 | 622 | fn dump_node(&mut self, emitter: &mut Emitter, index: i32) -> Result<()> { 623 | assert!(index > 0); 624 | let node = &mut self.nodes[index as usize - 1]; 625 | let anchor_id: i32 = emitter.anchors[index as usize - 1].anchor; 626 | let mut anchor: Option = None; 627 | if anchor_id != 0 { 628 | anchor = Some(Emitter::generate_anchor(anchor_id)); 629 | } 630 | if emitter.anchors[index as usize - 1].serialized { 631 | return Self::dump_alias(emitter, anchor.unwrap()); 632 | } 633 | emitter.anchors[index as usize - 1].serialized = true; 634 | 635 | let node = core::mem::take(node); 636 | match node.data { 637 | NodeData::Scalar { .. } => Self::dump_scalar(emitter, node, anchor), 638 | NodeData::Sequence { .. } => self.dump_sequence(emitter, node, anchor), 639 | NodeData::Mapping { .. } => self.dump_mapping(emitter, node, anchor), 640 | _ => unreachable!("document node is neither a scalar, sequence, or a mapping"), 641 | } 642 | } 643 | 644 | fn dump_alias(emitter: &mut Emitter, anchor: String) -> Result<()> { 645 | let event = Event::new(EventData::Alias { anchor }); 646 | emitter.emit(event) 647 | } 648 | 649 | fn dump_scalar(emitter: &mut Emitter, node: Node, anchor: Option) -> Result<()> { 650 | let plain_implicit = node.tag.as_deref() == Some(DEFAULT_SCALAR_TAG); 651 | let quoted_implicit = node.tag.as_deref() == Some(DEFAULT_SCALAR_TAG); // TODO: Why compare twice?! (even the C code does this) 652 | 653 | let NodeData::Scalar { value, style } = node.data else { 654 | unreachable!() 655 | }; 656 | let event = Event::new(EventData::Scalar { 657 | anchor, 658 | tag: node.tag, 659 | value, 660 | plain_implicit, 661 | quoted_implicit, 662 | style, 663 | }); 664 | emitter.emit(event) 665 | } 666 | 667 | fn dump_sequence( 668 | &mut self, 669 | emitter: &mut Emitter, 670 | node: Node, 671 | anchor: Option, 672 | ) -> Result<()> { 673 | let implicit = node.tag.as_deref() == Some(DEFAULT_SEQUENCE_TAG); 674 | 675 | let NodeData::Sequence { items, style } = node.data else { 676 | unreachable!() 677 | }; 678 | let event = Event::new(EventData::SequenceStart { 679 | anchor, 680 | tag: node.tag, 681 | implicit, 682 | style, 683 | }); 684 | 685 | emitter.emit(event)?; 686 | for item in items { 687 | self.dump_node(emitter, item)?; 688 | } 689 | let event = Event::sequence_end(); 690 | emitter.emit(event) 691 | } 692 | 693 | fn dump_mapping( 694 | &mut self, 695 | emitter: &mut Emitter, 696 | node: Node, 697 | anchor: Option, 698 | ) -> Result<()> { 699 | let implicit = node.tag.as_deref() == Some(DEFAULT_MAPPING_TAG); 700 | 701 | let NodeData::Mapping { pairs, style } = node.data else { 702 | unreachable!() 703 | }; 704 | let event = Event::new(EventData::MappingStart { 705 | anchor, 706 | tag: node.tag, 707 | implicit, 708 | style, 709 | }); 710 | 711 | emitter.emit(event)?; 712 | for pair in pairs { 713 | self.dump_node(emitter, pair.key)?; 714 | self.dump_node(emitter, pair.value)?; 715 | } 716 | let event = Event::mapping_end(); 717 | emitter.emit(event) 718 | } 719 | } 720 | -------------------------------------------------------------------------------- /src/emitter.rs: -------------------------------------------------------------------------------- 1 | use std::collections::VecDeque; 2 | 3 | use crate::macros::{ 4 | is_alpha, is_ascii, is_blank, is_blankz, is_bom, is_break, is_breakz, is_printable, is_space, 5 | }; 6 | use crate::{ 7 | Break, Encoding, Error, Event, EventData, MappingStyle, Result, ScalarStyle, SequenceStyle, 8 | TagDirective, VersionDirective, OUTPUT_BUFFER_SIZE, 9 | }; 10 | 11 | /// The emitter structure. 12 | /// 13 | /// All members are internal. Manage the structure using the `yaml_emitter_` 14 | /// family of functions. 15 | #[non_exhaustive] 16 | pub struct Emitter<'w> { 17 | /// Write handler. 18 | pub(crate) write_handler: Option<&'w mut dyn std::io::Write>, 19 | /// The working buffer. 20 | /// 21 | /// This always contains valid UTF-8. 22 | pub(crate) buffer: String, 23 | /// The raw buffer. 24 | /// 25 | /// This contains the output in the encoded format, so for example it may be 26 | /// UTF-16 encoded. 27 | pub(crate) raw_buffer: Vec, 28 | /// The stream encoding. 29 | pub(crate) encoding: Encoding, 30 | /// If the output is in the canonical style? 31 | pub(crate) canonical: bool, 32 | /// The number of indentation spaces. 33 | pub(crate) best_indent: i32, 34 | /// The preferred width of the output lines. 35 | pub(crate) best_width: i32, 36 | /// Allow unescaped non-ASCII characters? 37 | pub(crate) unicode: bool, 38 | /// The preferred line break. 39 | pub(crate) line_break: Break, 40 | /// The stack of states. 41 | pub(crate) states: Vec, 42 | /// The current emitter state. 43 | pub(crate) state: EmitterState, 44 | /// The event queue. 45 | pub(crate) events: VecDeque, 46 | /// The stack of indentation levels. 47 | pub(crate) indents: Vec, 48 | /// The list of tag directives. 49 | pub(crate) tag_directives: Vec, 50 | /// The current indentation level. 51 | pub(crate) indent: i32, 52 | /// The current flow level. 53 | pub(crate) flow_level: i32, 54 | /// Is it the document root context? 55 | pub(crate) root_context: bool, 56 | /// Is it a sequence context? 57 | pub(crate) sequence_context: bool, 58 | /// Is it a mapping context? 59 | pub(crate) mapping_context: bool, 60 | /// Is it a simple mapping key context? 61 | pub(crate) simple_key_context: bool, 62 | /// The current line. 63 | pub(crate) line: i32, 64 | /// The current column. 65 | pub(crate) column: i32, 66 | /// If the last character was a whitespace? 67 | pub(crate) whitespace: bool, 68 | /// If the last character was an indentation character (' ', '-', '?', ':')? 69 | pub(crate) indention: bool, 70 | /// If an explicit document end is required? 71 | pub(crate) open_ended: i32, 72 | /// If the stream was already opened? 73 | pub(crate) opened: bool, 74 | /// If the stream was already closed? 75 | pub(crate) closed: bool, 76 | /// The information associated with the document nodes. 77 | // Note: Same length as `document.nodes`. 78 | pub(crate) anchors: Vec, 79 | /// The last assigned anchor id. 80 | pub(crate) last_anchor_id: i32, 81 | } 82 | 83 | impl<'a> Default for Emitter<'a> { 84 | fn default() -> Self { 85 | Self::new() 86 | } 87 | } 88 | 89 | /// The emitter states. 90 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 91 | #[non_exhaustive] 92 | pub enum EmitterState { 93 | /// Expect STREAM-START. 94 | #[default] 95 | StreamStart = 0, 96 | /// Expect the first DOCUMENT-START or STREAM-END. 97 | FirstDocumentStart = 1, 98 | /// Expect DOCUMENT-START or STREAM-END. 99 | DocumentStart = 2, 100 | /// Expect the content of a document. 101 | DocumentContent = 3, 102 | /// Expect DOCUMENT-END. 103 | DocumentEnd = 4, 104 | /// Expect the first item of a flow sequence. 105 | FlowSequenceFirstItem = 5, 106 | /// Expect an item of a flow sequence. 107 | FlowSequenceItem = 6, 108 | /// Expect the first key of a flow mapping. 109 | FlowMappingFirstKey = 7, 110 | /// Expect a key of a flow mapping. 111 | FlowMappingKey = 8, 112 | /// Expect a value for a simple key of a flow mapping. 113 | FlowMappingSimpleValue = 9, 114 | /// Expect a value of a flow mapping. 115 | FlowMappingValue = 10, 116 | /// Expect the first item of a block sequence. 117 | BlockSequenceFirstItem = 11, 118 | /// Expect an item of a block sequence. 119 | BlockSequenceItem = 12, 120 | /// Expect the first key of a block mapping. 121 | BlockMappingFirstKey = 13, 122 | /// Expect the key of a block mapping. 123 | BlockMappingKey = 14, 124 | /// Expect a value for a simple key of a block mapping. 125 | BlockMappingSimpleValue = 15, 126 | /// Expect a value of a block mapping. 127 | BlockMappingValue = 16, 128 | /// Expect nothing. 129 | End = 17, 130 | } 131 | 132 | #[derive(Copy, Clone, Default)] 133 | pub(crate) struct Anchors { 134 | /// The number of references. 135 | pub references: i32, 136 | /// The anchor id. 137 | pub anchor: i32, 138 | /// If the node has been emitted? 139 | pub serialized: bool, 140 | } 141 | 142 | #[derive(Default)] 143 | struct Analysis<'a> { 144 | pub anchor: Option>, 145 | pub tag: Option>, 146 | pub scalar: Option>, 147 | } 148 | 149 | struct AnchorAnalysis<'a> { 150 | pub anchor: &'a str, 151 | pub alias: bool, 152 | } 153 | 154 | struct TagAnalysis<'a> { 155 | pub handle: &'a str, 156 | pub suffix: &'a str, 157 | } 158 | 159 | struct ScalarAnalysis<'a> { 160 | /// The scalar value. 161 | pub value: &'a str, 162 | /// Does the scalar contain line breaks? 163 | pub multiline: bool, 164 | /// Can the scalar be expessed in the flow plain style? 165 | pub flow_plain_allowed: bool, 166 | /// Can the scalar be expressed in the block plain style? 167 | pub block_plain_allowed: bool, 168 | /// Can the scalar be expressed in the single quoted style? 169 | pub single_quoted_allowed: bool, 170 | /// Can the scalar be expressed in the literal or folded styles? 171 | pub block_allowed: bool, 172 | /// The output style. 173 | pub style: ScalarStyle, 174 | } 175 | 176 | impl<'w> Emitter<'w> { 177 | /// Create an self. 178 | pub fn new() -> Emitter<'w> { 179 | Emitter { 180 | write_handler: None, 181 | buffer: String::with_capacity(OUTPUT_BUFFER_SIZE), 182 | raw_buffer: Vec::with_capacity(OUTPUT_BUFFER_SIZE), 183 | encoding: Encoding::Any, 184 | canonical: false, 185 | best_indent: 0, 186 | best_width: 0, 187 | unicode: false, 188 | line_break: Break::default(), 189 | states: Vec::with_capacity(16), 190 | state: EmitterState::default(), 191 | events: VecDeque::with_capacity(16), 192 | indents: Vec::with_capacity(16), 193 | tag_directives: Vec::with_capacity(16), 194 | indent: 0, 195 | flow_level: 0, 196 | root_context: false, 197 | sequence_context: false, 198 | mapping_context: false, 199 | simple_key_context: false, 200 | line: 0, 201 | column: 0, 202 | whitespace: false, 203 | indention: false, 204 | open_ended: 0, 205 | opened: false, 206 | closed: false, 207 | anchors: Vec::new(), 208 | last_anchor_id: 0, 209 | } 210 | } 211 | 212 | /// Reset the emitter state. 213 | pub fn reset(&mut self) { 214 | *self = Self::new(); 215 | } 216 | 217 | /// Start a YAML stream. 218 | /// 219 | /// This function should be used before 220 | /// [`Document::dump()`](crate::Document::dump) is called. 221 | pub fn open(&mut self) -> Result<()> { 222 | assert!(!self.opened); 223 | let event = Event::stream_start(Encoding::Any); 224 | self.emit(event)?; 225 | self.opened = true; 226 | Ok(()) 227 | } 228 | 229 | /// Finish a YAML stream. 230 | /// 231 | /// This function should be used after 232 | /// [`Document::dump()`](crate::Document::dump) is called. 233 | pub fn close(&mut self) -> Result<()> { 234 | assert!(self.opened); 235 | if self.closed { 236 | return Ok(()); 237 | } 238 | let event = Event::stream_end(); 239 | self.emit(event)?; 240 | self.closed = true; 241 | Ok(()) 242 | } 243 | 244 | /// Set a string output. 245 | /// 246 | /// The emitter will write the output characters to the `output` buffer. 247 | pub fn set_output_string(&mut self, output: &'w mut Vec) { 248 | assert!(self.write_handler.is_none()); 249 | if self.encoding == Encoding::Any { 250 | self.set_encoding(Encoding::Utf8); 251 | } else if self.encoding != Encoding::Utf8 { 252 | panic!("cannot output UTF-16 to String") 253 | } 254 | output.clear(); 255 | self.write_handler = Some(output); 256 | } 257 | 258 | /// Set a generic output handler. 259 | pub fn set_output(&mut self, handler: &'w mut dyn std::io::Write) { 260 | assert!(self.write_handler.is_none()); 261 | self.write_handler = Some(handler); 262 | } 263 | 264 | /// Set the output encoding. 265 | pub fn set_encoding(&mut self, encoding: Encoding) { 266 | assert_eq!(self.encoding, Encoding::Any); 267 | self.encoding = encoding; 268 | } 269 | 270 | /// Set if the output should be in the "canonical" format as in the YAML 271 | /// specification. 272 | pub fn set_canonical(&mut self, canonical: bool) { 273 | self.canonical = canonical; 274 | } 275 | 276 | /// Set the indentation increment. 277 | pub fn set_indent(&mut self, indent: i32) { 278 | self.best_indent = if 1 < indent && indent < 10 { indent } else { 2 }; 279 | } 280 | 281 | /// Set the preferred line width. -1 means unlimited. 282 | pub fn set_width(&mut self, width: i32) { 283 | self.best_width = if width >= 0 { width } else { -1 }; 284 | } 285 | 286 | /// Set if unescaped non-ASCII characters are allowed. 287 | pub fn set_unicode(&mut self, unicode: bool) { 288 | self.unicode = unicode; 289 | } 290 | 291 | /// Set the preferred line break. 292 | pub fn set_break(&mut self, line_break: Break) { 293 | self.line_break = line_break; 294 | } 295 | 296 | /// Emit an event. 297 | /// 298 | /// The event object may be generated using the 299 | /// [`Parser::parse()`](crate::Parser::parse) function. The emitter takes 300 | /// the responsibility for the event object and destroys its content after 301 | /// it is emitted. The event object is destroyed even if the function fails. 302 | pub fn emit(&mut self, event: Event) -> Result<()> { 303 | self.events.push_back(event); 304 | while let Some(event) = self.needs_mode_events() { 305 | let tag_directives = core::mem::take(&mut self.tag_directives); 306 | 307 | let mut analysis = self.analyze_event(&event, &tag_directives)?; 308 | self.state_machine(&event, &mut analysis)?; 309 | 310 | // The DOCUMENT-START event populates the tag directives, and this 311 | // happens only once, so don't swap out the tags in that case. 312 | if self.tag_directives.is_empty() { 313 | self.tag_directives = tag_directives; 314 | } 315 | } 316 | Ok(()) 317 | } 318 | 319 | /// Equivalent of the libyaml `FLUSH` macro. 320 | fn flush_if_needed(&mut self) -> Result<()> { 321 | if self.buffer.len() < OUTPUT_BUFFER_SIZE - 5 { 322 | Ok(()) 323 | } else { 324 | self.flush() 325 | } 326 | } 327 | 328 | /// Equivalent of the libyaml `PUT` macro. 329 | fn put(&mut self, value: char) -> Result<()> { 330 | self.flush_if_needed()?; 331 | self.buffer.push(value); 332 | self.column += 1; 333 | Ok(()) 334 | } 335 | 336 | /// Equivalent of the libyaml `PUT_BREAK` macro. 337 | fn put_break(&mut self) -> Result<()> { 338 | self.flush_if_needed()?; 339 | if self.line_break == Break::Cr { 340 | self.buffer.push('\r'); 341 | } else if self.line_break == Break::Ln { 342 | self.buffer.push('\n'); 343 | } else if self.line_break == Break::CrLn { 344 | self.buffer.push_str("\r\n"); 345 | }; 346 | self.column = 0; 347 | self.line += 1; 348 | Ok(()) 349 | } 350 | 351 | /// Write UTF-8 charanters from `string` to `emitter` and increment 352 | /// `emitter.column` the appropriate number of times. It is assumed that the 353 | /// string does not contain line breaks! 354 | fn write_str(&mut self, string: &str) -> Result<()> { 355 | if self.buffer.len() + string.len() > OUTPUT_BUFFER_SIZE { 356 | self.flush()?; 357 | } 358 | 359 | // Note: Reserves less than what is necessary if there are UTF-8 360 | // characters present. 361 | self.buffer.reserve(string.len()); 362 | 363 | self.column += string.chars().count() as i32; 364 | 365 | // Note: This may cause the buffer to become slightly larger than 366 | // `OUTPUT_BUFFER_SIZE`, but not by much. 367 | self.buffer.push_str(string); 368 | 369 | Ok(()) 370 | } 371 | 372 | /// Equivalent of the libyaml `WRITE` macro. 373 | fn write_char(&mut self, ch: char) -> Result<()> { 374 | self.flush_if_needed()?; 375 | self.buffer.push(ch); 376 | self.column += 1; 377 | Ok(()) 378 | } 379 | 380 | /// Equivalent of the libyaml `WRITE_BREAK` macro. 381 | fn write_break(&mut self, ch: char) -> Result<()> { 382 | self.flush_if_needed()?; 383 | if ch == '\n' { 384 | self.put_break()?; 385 | } else { 386 | self.write_char(ch)?; 387 | self.column = 0; 388 | self.line += 1; 389 | } 390 | Ok(()) 391 | } 392 | 393 | fn needs_mode_events(&mut self) -> Option { 394 | let first = self.events.front()?; 395 | 396 | let accummulate = match &first.data { 397 | EventData::DocumentStart { .. } => 1, 398 | EventData::SequenceStart { .. } => 2, 399 | EventData::MappingStart { .. } => 3, 400 | _ => return self.events.pop_front(), 401 | }; 402 | 403 | if self.events.len() > accummulate { 404 | return self.events.pop_front(); 405 | } 406 | 407 | let mut level = 0; 408 | for event in &self.events { 409 | match event.data { 410 | EventData::StreamStart { .. } 411 | | EventData::DocumentStart { .. } 412 | | EventData::SequenceStart { .. } 413 | | EventData::MappingStart { .. } => { 414 | level += 1; 415 | } 416 | 417 | EventData::StreamEnd 418 | | EventData::DocumentEnd { .. } 419 | | EventData::SequenceEnd 420 | | EventData::MappingEnd => { 421 | level -= 1; 422 | } 423 | _ => {} 424 | } 425 | 426 | if level == 0 { 427 | return self.events.pop_front(); 428 | } 429 | } 430 | 431 | None 432 | } 433 | 434 | fn append_tag_directive(&mut self, value: TagDirective, allow_duplicates: bool) -> Result<()> { 435 | for tag_directive in &self.tag_directives { 436 | if value.handle == tag_directive.handle { 437 | if allow_duplicates { 438 | return Ok(()); 439 | } 440 | return Err(Error::emitter("duplicate %TAG directive")); 441 | } 442 | } 443 | self.tag_directives.push(value); 444 | Ok(()) 445 | } 446 | 447 | fn increase_indent(&mut self, flow: bool, indentless: bool) { 448 | self.indents.push(self.indent); 449 | if self.indent < 0 { 450 | self.indent = if flow { self.best_indent } else { 0 }; 451 | } else if !indentless { 452 | self.indent += self.best_indent; 453 | } 454 | } 455 | 456 | fn state_machine<'a>(&mut self, event: &'a Event, analysis: &mut Analysis<'a>) -> Result<()> { 457 | match self.state { 458 | EmitterState::StreamStart => self.emit_stream_start(event), 459 | EmitterState::FirstDocumentStart => self.emit_document_start(event, true), 460 | EmitterState::DocumentStart => self.emit_document_start(event, false), 461 | EmitterState::DocumentContent => self.emit_document_content(event, analysis), 462 | EmitterState::DocumentEnd => self.emit_document_end(event), 463 | EmitterState::FlowSequenceFirstItem => { 464 | self.emit_flow_sequence_item(event, true, analysis) 465 | } 466 | EmitterState::FlowSequenceItem => self.emit_flow_sequence_item(event, false, analysis), 467 | EmitterState::FlowMappingFirstKey => self.emit_flow_mapping_key(event, true, analysis), 468 | EmitterState::FlowMappingKey => self.emit_flow_mapping_key(event, false, analysis), 469 | EmitterState::FlowMappingSimpleValue => { 470 | self.emit_flow_mapping_value(event, true, analysis) 471 | } 472 | EmitterState::FlowMappingValue => self.emit_flow_mapping_value(event, false, analysis), 473 | EmitterState::BlockSequenceFirstItem => { 474 | self.emit_block_sequence_item(event, true, analysis) 475 | } 476 | EmitterState::BlockSequenceItem => { 477 | self.emit_block_sequence_item(event, false, analysis) 478 | } 479 | EmitterState::BlockMappingFirstKey => { 480 | self.emit_block_mapping_key(event, true, analysis) 481 | } 482 | EmitterState::BlockMappingKey => self.emit_block_mapping_key(event, false, analysis), 483 | EmitterState::BlockMappingSimpleValue => { 484 | self.emit_block_mapping_value(event, true, analysis) 485 | } 486 | EmitterState::BlockMappingValue => { 487 | self.emit_block_mapping_value(event, false, analysis) 488 | } 489 | EmitterState::End => Err(Error::emitter("expected nothing after STREAM-END")), 490 | } 491 | } 492 | 493 | fn emit_stream_start(&mut self, event: &Event) -> Result<()> { 494 | self.open_ended = 0; 495 | if let EventData::StreamStart { ref encoding } = event.data { 496 | if self.encoding == Encoding::Any { 497 | self.encoding = *encoding; 498 | } 499 | if self.encoding == Encoding::Any { 500 | self.encoding = Encoding::Utf8; 501 | } 502 | if self.best_indent < 2 || self.best_indent > 9 { 503 | self.best_indent = 2; 504 | } 505 | if self.best_width >= 0 && self.best_width <= self.best_indent * 2 { 506 | self.best_width = 80; 507 | } 508 | if self.best_width < 0 { 509 | self.best_width = i32::MAX; 510 | } 511 | if self.line_break == Break::Any { 512 | self.line_break = Break::Ln; 513 | } 514 | self.indent = -1; 515 | self.line = 0; 516 | self.column = 0; 517 | self.whitespace = true; 518 | self.indention = true; 519 | if self.encoding != Encoding::Utf8 { 520 | self.write_bom()?; 521 | } 522 | self.state = EmitterState::FirstDocumentStart; 523 | return Ok(()); 524 | } 525 | Err(Error::emitter("expected STREAM-START")) 526 | } 527 | 528 | fn emit_document_start(&mut self, event: &Event, first: bool) -> Result<()> { 529 | if let EventData::DocumentStart { 530 | version_directive, 531 | tag_directives, 532 | implicit, 533 | } = &event.data 534 | { 535 | let default_tag_directives: [TagDirective; 2] = [ 536 | // TODO: Avoid these heap allocations. 537 | TagDirective { 538 | handle: String::from("!"), 539 | prefix: String::from("!"), 540 | }, 541 | TagDirective { 542 | handle: String::from("!!"), 543 | prefix: String::from("tag:yaml.org,2002:"), 544 | }, 545 | ]; 546 | let mut implicit = *implicit; 547 | if let Some(version_directive) = version_directive { 548 | Self::analyze_version_directive(*version_directive)?; 549 | } 550 | for tag_directive in tag_directives { 551 | Self::analyze_tag_directive(tag_directive)?; 552 | self.append_tag_directive(tag_directive.clone(), false)?; 553 | } 554 | for tag_directive in default_tag_directives { 555 | self.append_tag_directive(tag_directive, true)?; 556 | } 557 | if !first || self.canonical { 558 | implicit = false; 559 | } 560 | if (version_directive.is_some() || !tag_directives.is_empty()) && self.open_ended != 0 { 561 | self.write_indicator("...", true, false, false)?; 562 | self.write_indent()?; 563 | } 564 | self.open_ended = 0; 565 | if let Some(version_directive) = version_directive { 566 | implicit = false; 567 | self.write_indicator("%YAML", true, false, false)?; 568 | if version_directive.minor == 1 { 569 | self.write_indicator("1.1", true, false, false)?; 570 | } else { 571 | self.write_indicator("1.2", true, false, false)?; 572 | } 573 | self.write_indent()?; 574 | } 575 | if !tag_directives.is_empty() { 576 | implicit = false; 577 | for tag_directive in tag_directives { 578 | self.write_indicator("%TAG", true, false, false)?; 579 | self.write_tag_handle(&tag_directive.handle)?; 580 | self.write_tag_content(&tag_directive.prefix, true)?; 581 | self.write_indent()?; 582 | } 583 | } 584 | if Self::check_empty_document() { 585 | implicit = false; 586 | } 587 | if !implicit { 588 | self.write_indent()?; 589 | self.write_indicator("---", true, false, false)?; 590 | if self.canonical { 591 | self.write_indent()?; 592 | } 593 | } 594 | self.state = EmitterState::DocumentContent; 595 | self.open_ended = 0; 596 | return Ok(()); 597 | } else if let EventData::StreamEnd = &event.data { 598 | if self.open_ended == 2 { 599 | self.write_indicator("...", true, false, false)?; 600 | self.open_ended = 0; 601 | self.write_indent()?; 602 | } 603 | self.flush()?; 604 | self.state = EmitterState::End; 605 | return Ok(()); 606 | } 607 | 608 | Err(Error::emitter("expected DOCUMENT-START or STREAM-END")) 609 | } 610 | 611 | fn emit_document_content(&mut self, event: &Event, analysis: &mut Analysis) -> Result<()> { 612 | self.states.push(EmitterState::DocumentEnd); 613 | self.emit_node(event, true, false, false, false, analysis) 614 | } 615 | 616 | fn emit_document_end(&mut self, event: &Event) -> Result<()> { 617 | if let EventData::DocumentEnd { implicit } = &event.data { 618 | let implicit = *implicit; 619 | self.write_indent()?; 620 | if !implicit { 621 | self.write_indicator("...", true, false, false)?; 622 | self.open_ended = 0; 623 | self.write_indent()?; 624 | } else if self.open_ended == 0 { 625 | self.open_ended = 1; 626 | } 627 | self.flush()?; 628 | self.state = EmitterState::DocumentStart; 629 | self.tag_directives.clear(); 630 | return Ok(()); 631 | } 632 | 633 | Err(Error::emitter("expected DOCUMENT-END")) 634 | } 635 | 636 | fn emit_flow_sequence_item( 637 | &mut self, 638 | event: &Event, 639 | first: bool, 640 | analysis: &mut Analysis, 641 | ) -> Result<()> { 642 | if first { 643 | self.write_indicator("[", true, true, false)?; 644 | self.increase_indent(true, false); 645 | self.flow_level += 1; 646 | } 647 | if let EventData::SequenceEnd = &event.data { 648 | self.flow_level -= 1; 649 | self.indent = self.indents.pop().unwrap(); 650 | if self.canonical && !first { 651 | self.write_indicator(",", false, false, false)?; 652 | self.write_indent()?; 653 | } 654 | self.write_indicator("]", false, false, false)?; 655 | self.state = self.states.pop().unwrap(); 656 | return Ok(()); 657 | } 658 | if !first { 659 | self.write_indicator(",", false, false, false)?; 660 | } 661 | if self.canonical || self.column > self.best_width { 662 | self.write_indent()?; 663 | } 664 | self.states.push(EmitterState::FlowSequenceItem); 665 | self.emit_node(event, false, true, false, false, analysis) 666 | } 667 | 668 | fn emit_flow_mapping_key( 669 | &mut self, 670 | event: &Event, 671 | first: bool, 672 | analysis: &mut Analysis, 673 | ) -> Result<()> { 674 | if first { 675 | self.write_indicator("{", true, true, false)?; 676 | self.increase_indent(true, false); 677 | self.flow_level += 1; 678 | } 679 | if let EventData::MappingEnd = &event.data { 680 | assert!(!self.indents.is_empty(), "self.indents should not be empty"); 681 | self.flow_level -= 1; 682 | self.indent = self.indents.pop().unwrap(); 683 | if self.canonical && !first { 684 | self.write_indicator(",", false, false, false)?; 685 | self.write_indent()?; 686 | } 687 | self.write_indicator("}", false, false, false)?; 688 | self.state = self.states.pop().unwrap(); 689 | return Ok(()); 690 | } 691 | if !first { 692 | self.write_indicator(",", false, false, false)?; 693 | } 694 | if self.canonical || self.column > self.best_width { 695 | self.write_indent()?; 696 | } 697 | if !self.canonical && self.check_simple_key(event, analysis) { 698 | self.states.push(EmitterState::FlowMappingSimpleValue); 699 | self.emit_node(event, false, false, true, true, analysis) 700 | } else { 701 | self.write_indicator("?", true, false, false)?; 702 | self.states.push(EmitterState::FlowMappingValue); 703 | self.emit_node(event, false, false, true, false, analysis) 704 | } 705 | } 706 | 707 | fn emit_flow_mapping_value( 708 | &mut self, 709 | event: &Event, 710 | simple: bool, 711 | analysis: &mut Analysis, 712 | ) -> Result<()> { 713 | if simple { 714 | self.write_indicator(":", false, false, false)?; 715 | } else { 716 | if self.canonical || self.column > self.best_width { 717 | self.write_indent()?; 718 | } 719 | self.write_indicator(":", true, false, false)?; 720 | } 721 | self.states.push(EmitterState::FlowMappingKey); 722 | self.emit_node(event, false, false, true, false, analysis) 723 | } 724 | 725 | fn emit_block_sequence_item( 726 | &mut self, 727 | event: &Event, 728 | first: bool, 729 | analysis: &mut Analysis, 730 | ) -> Result<()> { 731 | if first { 732 | self.increase_indent(false, self.mapping_context && !self.indention); 733 | } 734 | if let EventData::SequenceEnd = &event.data { 735 | self.indent = self.indents.pop().unwrap(); 736 | self.state = self.states.pop().unwrap(); 737 | return Ok(()); 738 | } 739 | self.write_indent()?; 740 | self.write_indicator("-", true, false, true)?; 741 | self.states.push(EmitterState::BlockSequenceItem); 742 | self.emit_node(event, false, true, false, false, analysis) 743 | } 744 | 745 | fn emit_block_mapping_key( 746 | &mut self, 747 | event: &Event, 748 | first: bool, 749 | analysis: &mut Analysis, 750 | ) -> Result<()> { 751 | if first { 752 | self.increase_indent(false, false); 753 | } 754 | if let EventData::MappingEnd = &event.data { 755 | self.indent = self.indents.pop().unwrap(); 756 | self.state = self.states.pop().unwrap(); 757 | return Ok(()); 758 | } 759 | self.write_indent()?; 760 | if self.check_simple_key(event, analysis) { 761 | self.states.push(EmitterState::BlockMappingSimpleValue); 762 | self.emit_node(event, false, false, true, true, analysis) 763 | } else { 764 | self.write_indicator("?", true, false, true)?; 765 | self.states.push(EmitterState::BlockMappingValue); 766 | self.emit_node(event, false, false, true, false, analysis) 767 | } 768 | } 769 | 770 | fn emit_block_mapping_value( 771 | &mut self, 772 | event: &Event, 773 | simple: bool, 774 | analysis: &mut Analysis, 775 | ) -> Result<()> { 776 | if simple { 777 | self.write_indicator(":", false, false, false)?; 778 | } else { 779 | self.write_indent()?; 780 | self.write_indicator(":", true, false, true)?; 781 | } 782 | self.states.push(EmitterState::BlockMappingKey); 783 | self.emit_node(event, false, false, true, false, analysis) 784 | } 785 | 786 | fn emit_node( 787 | &mut self, 788 | event: &Event, 789 | root: bool, 790 | sequence: bool, 791 | mapping: bool, 792 | simple_key: bool, 793 | analysis: &mut Analysis, 794 | ) -> Result<()> { 795 | self.root_context = root; 796 | self.sequence_context = sequence; 797 | self.mapping_context = mapping; 798 | self.simple_key_context = simple_key; 799 | 800 | match event.data { 801 | EventData::Alias { .. } => self.emit_alias(event, &analysis.anchor), 802 | EventData::Scalar { .. } => self.emit_scalar(event, analysis), 803 | EventData::SequenceStart { .. } => self.emit_sequence_start(event, analysis), 804 | EventData::MappingStart { .. } => self.emit_mapping_start(event, analysis), 805 | _ => Err(Error::emitter( 806 | "expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS", 807 | )), 808 | } 809 | } 810 | 811 | fn emit_alias(&mut self, _event: &Event, analysis: &Option) -> Result<()> { 812 | self.process_anchor(analysis)?; 813 | if self.simple_key_context { 814 | self.put(' ')?; 815 | } 816 | self.state = self.states.pop().unwrap(); 817 | Ok(()) 818 | } 819 | 820 | fn emit_scalar(&mut self, event: &Event, analysis: &mut Analysis) -> Result<()> { 821 | let Analysis { 822 | anchor, 823 | tag, 824 | scalar: Some(scalar), 825 | } = analysis 826 | else { 827 | unreachable!("no scalar analysis"); 828 | }; 829 | 830 | self.select_scalar_style(event, scalar, tag)?; 831 | self.process_anchor(anchor)?; 832 | self.process_tag(tag)?; 833 | self.increase_indent(true, false); 834 | self.process_scalar(scalar)?; 835 | self.indent = self.indents.pop().unwrap(); 836 | self.state = self.states.pop().unwrap(); 837 | Ok(()) 838 | } 839 | 840 | fn emit_sequence_start(&mut self, event: &Event, analysis: &Analysis) -> Result<()> { 841 | let Analysis { anchor, tag, .. } = analysis; 842 | self.process_anchor(anchor)?; 843 | self.process_tag(tag)?; 844 | 845 | let EventData::SequenceStart { style, .. } = &event.data else { 846 | unreachable!() 847 | }; 848 | 849 | if self.flow_level != 0 850 | || self.canonical 851 | || *style == SequenceStyle::Flow 852 | || self.check_empty_sequence(event) 853 | { 854 | self.state = EmitterState::FlowSequenceFirstItem; 855 | } else { 856 | self.state = EmitterState::BlockSequenceFirstItem; 857 | }; 858 | Ok(()) 859 | } 860 | 861 | fn emit_mapping_start(&mut self, event: &Event, analysis: &Analysis) -> Result<()> { 862 | let Analysis { anchor, tag, .. } = analysis; 863 | self.process_anchor(anchor)?; 864 | self.process_tag(tag)?; 865 | 866 | let EventData::MappingStart { style, .. } = &event.data else { 867 | unreachable!() 868 | }; 869 | 870 | if self.flow_level != 0 871 | || self.canonical 872 | || *style == MappingStyle::Flow 873 | || self.check_empty_mapping(event) 874 | { 875 | self.state = EmitterState::FlowMappingFirstKey; 876 | } else { 877 | self.state = EmitterState::BlockMappingFirstKey; 878 | } 879 | Ok(()) 880 | } 881 | 882 | fn check_empty_document() -> bool { 883 | false 884 | } 885 | 886 | fn check_empty_sequence(&self, event: &Event) -> bool { 887 | if self.events.is_empty() { 888 | return false; 889 | } 890 | let start = matches!(event.data, EventData::SequenceStart { .. }); 891 | let end = matches!(self.events[0].data, EventData::SequenceEnd); 892 | start && end 893 | } 894 | 895 | fn check_empty_mapping(&self, event: &Event) -> bool { 896 | if self.events.is_empty() { 897 | return false; 898 | } 899 | let start = matches!(event.data, EventData::MappingStart { .. }); 900 | let end = matches!(self.events[0].data, EventData::MappingEnd); 901 | start && end 902 | } 903 | 904 | fn check_simple_key(&self, event: &Event, analysis: &Analysis) -> bool { 905 | let Analysis { 906 | tag, 907 | anchor, 908 | scalar, 909 | } = analysis; 910 | 911 | let mut length = anchor.as_ref().map_or(0, |a| a.anchor.len()) 912 | + tag.as_ref().map_or(0, |t| t.handle.len() + t.suffix.len()); 913 | 914 | match event.data { 915 | EventData::Alias { .. } => { 916 | length = analysis.anchor.as_ref().map_or(0, |a| a.anchor.len()); 917 | } 918 | EventData::Scalar { .. } => { 919 | let Some(scalar) = scalar else { 920 | panic!("no analysis for scalar") 921 | }; 922 | 923 | if scalar.multiline { 924 | return false; 925 | } 926 | length += scalar.value.len(); 927 | } 928 | EventData::SequenceStart { .. } => { 929 | if !self.check_empty_sequence(event) { 930 | return false; 931 | } 932 | } 933 | EventData::MappingStart { .. } => { 934 | if !self.check_empty_mapping(event) { 935 | return false; 936 | } 937 | } 938 | _ => return false, 939 | } 940 | 941 | if length > 128 { 942 | return false; 943 | } 944 | 945 | true 946 | } 947 | 948 | fn select_scalar_style( 949 | &mut self, 950 | event: &Event, 951 | scalar_analysis: &mut ScalarAnalysis, 952 | tag_analysis: &mut Option, 953 | ) -> Result<()> { 954 | let EventData::Scalar { 955 | plain_implicit, 956 | quoted_implicit, 957 | style, 958 | .. 959 | } = &event.data 960 | else { 961 | unreachable!() 962 | }; 963 | 964 | let mut style: ScalarStyle = *style; 965 | let no_tag = tag_analysis.is_none(); 966 | if no_tag && !*plain_implicit && !*quoted_implicit { 967 | return Err(Error::emitter( 968 | "neither tag nor implicit flags are specified", 969 | )); 970 | } 971 | if style == ScalarStyle::Any { 972 | style = ScalarStyle::Plain; 973 | } 974 | if self.canonical { 975 | style = ScalarStyle::DoubleQuoted; 976 | } 977 | if self.simple_key_context && scalar_analysis.multiline { 978 | style = ScalarStyle::DoubleQuoted; 979 | } 980 | if style == ScalarStyle::Plain { 981 | if self.flow_level != 0 && !scalar_analysis.flow_plain_allowed 982 | || self.flow_level == 0 && !scalar_analysis.block_plain_allowed 983 | { 984 | style = ScalarStyle::SingleQuoted; 985 | } 986 | if scalar_analysis.value.is_empty() && (self.flow_level != 0 || self.simple_key_context) 987 | { 988 | style = ScalarStyle::SingleQuoted; 989 | } 990 | if no_tag && !*plain_implicit { 991 | style = ScalarStyle::SingleQuoted; 992 | } 993 | } 994 | if style == ScalarStyle::SingleQuoted && !scalar_analysis.single_quoted_allowed { 995 | style = ScalarStyle::DoubleQuoted; 996 | } 997 | if (style == ScalarStyle::Literal || style == ScalarStyle::Folded) 998 | && (!scalar_analysis.block_allowed || self.flow_level != 0 || self.simple_key_context) 999 | { 1000 | style = ScalarStyle::DoubleQuoted; 1001 | } 1002 | if no_tag && !*quoted_implicit && style != ScalarStyle::Plain { 1003 | *tag_analysis = Some(TagAnalysis { 1004 | handle: "!", 1005 | suffix: "", 1006 | }); 1007 | } 1008 | scalar_analysis.style = style; 1009 | Ok(()) 1010 | } 1011 | 1012 | fn process_anchor(&mut self, analysis: &Option) -> Result<()> { 1013 | let Some(analysis) = analysis.as_ref() else { 1014 | return Ok(()); 1015 | }; 1016 | self.write_indicator(if analysis.alias { "*" } else { "&" }, true, false, false)?; 1017 | self.write_anchor(analysis.anchor) 1018 | } 1019 | 1020 | fn process_tag(&mut self, analysis: &Option) -> Result<()> { 1021 | let Some(analysis) = analysis.as_ref() else { 1022 | return Ok(()); 1023 | }; 1024 | 1025 | if analysis.handle.is_empty() && analysis.suffix.is_empty() { 1026 | return Ok(()); 1027 | } 1028 | if analysis.handle.is_empty() { 1029 | self.write_indicator("!<", true, false, false)?; 1030 | self.write_tag_content(analysis.suffix, false)?; 1031 | self.write_indicator(">", false, false, false)?; 1032 | } else { 1033 | self.write_tag_handle(analysis.handle)?; 1034 | if !analysis.suffix.is_empty() { 1035 | self.write_tag_content(analysis.suffix, false)?; 1036 | } 1037 | } 1038 | Ok(()) 1039 | } 1040 | 1041 | fn process_scalar(&mut self, analysis: &ScalarAnalysis) -> Result<()> { 1042 | match analysis.style { 1043 | ScalarStyle::Plain => self.write_plain_scalar(analysis.value, !self.simple_key_context), 1044 | ScalarStyle::SingleQuoted => { 1045 | self.write_single_quoted_scalar(analysis.value, !self.simple_key_context) 1046 | } 1047 | ScalarStyle::DoubleQuoted => { 1048 | self.write_double_quoted_scalar(analysis.value, !self.simple_key_context) 1049 | } 1050 | ScalarStyle::Literal => self.write_literal_scalar(analysis.value), 1051 | ScalarStyle::Folded => self.write_folded_scalar(analysis.value), 1052 | ScalarStyle::Any => unreachable!("No scalar style chosen"), 1053 | } 1054 | } 1055 | 1056 | fn analyze_version_directive(version_directive: VersionDirective) -> Result<()> { 1057 | if version_directive.major != 1 1058 | || version_directive.minor != 1 && version_directive.minor != 2 1059 | { 1060 | return Err(Error::emitter("incompatible %YAML directive")); 1061 | } 1062 | Ok(()) 1063 | } 1064 | 1065 | fn analyze_tag_directive(tag_directive: &TagDirective) -> Result<()> { 1066 | if tag_directive.handle.is_empty() { 1067 | return Err(Error::emitter("tag handle must not be empty")); 1068 | } 1069 | if !tag_directive.handle.starts_with('!') { 1070 | return Err(Error::emitter("tag handle must start with '!'")); 1071 | } 1072 | if !tag_directive.handle.ends_with('!') { 1073 | return Err(Error::emitter("tag handle must end with '!'")); 1074 | } 1075 | if tag_directive.handle.len() > 2 { 1076 | let tag_content = &tag_directive.handle[1..tag_directive.handle.len() - 1]; 1077 | for ch in tag_content.chars() { 1078 | if !is_alpha(ch) { 1079 | return Err(Error::emitter( 1080 | "tag handle must contain alphanumerical characters only", 1081 | )); 1082 | } 1083 | } 1084 | } 1085 | 1086 | if tag_directive.prefix.is_empty() { 1087 | return Err(Error::emitter("tag prefix must not be empty")); 1088 | } 1089 | 1090 | Ok(()) 1091 | } 1092 | 1093 | fn analyze_anchor(anchor: &str, alias: bool) -> Result> { 1094 | if anchor.is_empty() { 1095 | return Err(Error::emitter(if alias { 1096 | "alias value must not be empty" 1097 | } else { 1098 | "anchor value must not be empty" 1099 | })); 1100 | } 1101 | 1102 | for ch in anchor.chars() { 1103 | if !is_alpha(ch) { 1104 | return Err(Error::emitter(if alias { 1105 | "alias value must contain alphanumerical characters only" 1106 | } else { 1107 | "anchor value must contain alphanumerical characters only" 1108 | })); 1109 | } 1110 | } 1111 | 1112 | Ok(AnchorAnalysis { anchor, alias }) 1113 | } 1114 | 1115 | fn analyze_tag<'a>( 1116 | tag: &'a str, 1117 | tag_directives: &'a [TagDirective], 1118 | ) -> Result> { 1119 | if tag.is_empty() { 1120 | return Err(Error::emitter("tag value must not be empty")); 1121 | } 1122 | 1123 | let mut handle = ""; 1124 | let mut suffix = tag; 1125 | 1126 | for tag_directive in tag_directives { 1127 | let prefix_len = tag_directive.prefix.len(); 1128 | if prefix_len < tag.len() && tag_directive.prefix == tag[0..prefix_len] { 1129 | handle = &tag_directive.handle; 1130 | suffix = &tag[prefix_len..]; 1131 | break; 1132 | } 1133 | } 1134 | 1135 | Ok(TagAnalysis { handle, suffix }) 1136 | } 1137 | 1138 | fn analyze_scalar<'a>(&mut self, value: &'a str) -> Result> { 1139 | let mut block_indicators = false; 1140 | let mut flow_indicators = false; 1141 | let mut line_breaks = false; 1142 | let mut special_characters = false; 1143 | let mut leading_space = false; 1144 | let mut leading_break = false; 1145 | let mut trailing_space = false; 1146 | let mut trailing_break = false; 1147 | let mut break_space = false; 1148 | let mut space_break = false; 1149 | let mut preceded_by_whitespace; 1150 | let mut previous_space = false; 1151 | let mut previous_break = false; 1152 | 1153 | if value.is_empty() { 1154 | return Ok(ScalarAnalysis { 1155 | value: "", 1156 | multiline: false, 1157 | flow_plain_allowed: false, 1158 | block_plain_allowed: true, 1159 | single_quoted_allowed: true, 1160 | block_allowed: false, 1161 | style: ScalarStyle::Any, 1162 | }); 1163 | } 1164 | 1165 | if value.starts_with("---") || value.starts_with("...") { 1166 | block_indicators = true; 1167 | flow_indicators = true; 1168 | } 1169 | preceded_by_whitespace = true; 1170 | 1171 | let mut chars = value.chars(); 1172 | let mut first = true; 1173 | 1174 | while let Some(ch) = chars.next() { 1175 | let next = chars.clone().next(); 1176 | let followed_by_whitespace = is_blankz(next); 1177 | if first { 1178 | match ch { 1179 | '#' | ',' | '[' | ']' | '{' | '}' | '&' | '*' | '!' | '|' | '>' | '\'' 1180 | | '"' | '%' | '@' | '`' => { 1181 | flow_indicators = true; 1182 | block_indicators = true; 1183 | } 1184 | '?' | ':' => { 1185 | flow_indicators = true; 1186 | if followed_by_whitespace { 1187 | block_indicators = true; 1188 | } 1189 | } 1190 | '-' if followed_by_whitespace => { 1191 | flow_indicators = true; 1192 | block_indicators = true; 1193 | } 1194 | _ => {} 1195 | } 1196 | } else { 1197 | match ch { 1198 | ',' | '?' | '[' | ']' | '{' | '}' => { 1199 | flow_indicators = true; 1200 | } 1201 | ':' => { 1202 | flow_indicators = true; 1203 | if followed_by_whitespace { 1204 | block_indicators = true; 1205 | } 1206 | } 1207 | '#' if preceded_by_whitespace => { 1208 | flow_indicators = true; 1209 | block_indicators = true; 1210 | } 1211 | _ => {} 1212 | } 1213 | } 1214 | 1215 | if !is_printable(ch) || !is_ascii(ch) && !self.unicode { 1216 | special_characters = true; 1217 | } 1218 | if is_break(ch) { 1219 | line_breaks = true; 1220 | } 1221 | 1222 | if is_space(ch) { 1223 | if first { 1224 | leading_space = true; 1225 | } 1226 | if next.is_none() { 1227 | trailing_space = true; 1228 | } 1229 | if previous_break { 1230 | break_space = true; 1231 | } 1232 | previous_space = true; 1233 | previous_break = false; 1234 | } else if is_break(ch) { 1235 | if first { 1236 | leading_break = true; 1237 | } 1238 | if next.is_none() { 1239 | trailing_break = true; 1240 | } 1241 | if previous_space { 1242 | space_break = true; 1243 | } 1244 | previous_space = false; 1245 | previous_break = true; 1246 | } else { 1247 | previous_space = false; 1248 | previous_break = false; 1249 | } 1250 | 1251 | preceded_by_whitespace = is_blankz(ch); 1252 | first = false; 1253 | } 1254 | 1255 | let mut analysis = ScalarAnalysis { 1256 | value, 1257 | multiline: line_breaks, 1258 | flow_plain_allowed: true, 1259 | block_plain_allowed: true, 1260 | single_quoted_allowed: true, 1261 | block_allowed: true, 1262 | style: ScalarStyle::Any, 1263 | }; 1264 | 1265 | analysis.multiline = line_breaks; 1266 | analysis.flow_plain_allowed = true; 1267 | analysis.block_plain_allowed = true; 1268 | analysis.single_quoted_allowed = true; 1269 | analysis.block_allowed = true; 1270 | if leading_space || leading_break || trailing_space || trailing_break { 1271 | analysis.flow_plain_allowed = false; 1272 | analysis.block_plain_allowed = false; 1273 | } 1274 | if trailing_space { 1275 | analysis.block_allowed = false; 1276 | } 1277 | if break_space { 1278 | analysis.flow_plain_allowed = false; 1279 | analysis.block_plain_allowed = false; 1280 | analysis.single_quoted_allowed = false; 1281 | } 1282 | if space_break || special_characters { 1283 | analysis.flow_plain_allowed = false; 1284 | analysis.block_plain_allowed = false; 1285 | analysis.single_quoted_allowed = false; 1286 | analysis.block_allowed = false; 1287 | } 1288 | if line_breaks { 1289 | analysis.flow_plain_allowed = false; 1290 | analysis.block_plain_allowed = false; 1291 | } 1292 | if flow_indicators { 1293 | analysis.flow_plain_allowed = false; 1294 | } 1295 | if block_indicators { 1296 | analysis.block_plain_allowed = false; 1297 | } 1298 | Ok(analysis) 1299 | } 1300 | 1301 | fn analyze_event<'a>( 1302 | &mut self, 1303 | event: &'a Event, 1304 | tag_directives: &'a [TagDirective], 1305 | ) -> Result> { 1306 | let mut analysis = Analysis::default(); 1307 | 1308 | match &event.data { 1309 | EventData::Alias { anchor } => { 1310 | analysis.anchor = Some(Self::analyze_anchor(anchor, true)?); 1311 | } 1312 | EventData::Scalar { 1313 | anchor, 1314 | tag, 1315 | value, 1316 | plain_implicit, 1317 | quoted_implicit, 1318 | .. 1319 | } => { 1320 | let (plain_implicit, quoted_implicit) = (*plain_implicit, *quoted_implicit); 1321 | if let Some(anchor) = anchor { 1322 | analysis.anchor = Some(Self::analyze_anchor(anchor, false)?); 1323 | } 1324 | if tag.is_some() && (self.canonical || !plain_implicit && !quoted_implicit) { 1325 | analysis.tag = 1326 | Some(Self::analyze_tag(tag.as_deref().unwrap(), tag_directives)?); 1327 | } 1328 | analysis.scalar = Some(self.analyze_scalar(value)?); 1329 | } 1330 | EventData::SequenceStart { 1331 | anchor, 1332 | tag, 1333 | implicit, 1334 | .. 1335 | } => { 1336 | if let Some(anchor) = anchor { 1337 | analysis.anchor = Some(Self::analyze_anchor(anchor, false)?); 1338 | } 1339 | if tag.is_some() && (self.canonical || !*implicit) { 1340 | analysis.tag = 1341 | Some(Self::analyze_tag(tag.as_deref().unwrap(), tag_directives)?); 1342 | } 1343 | } 1344 | EventData::MappingStart { 1345 | anchor, 1346 | tag, 1347 | implicit, 1348 | .. 1349 | } => { 1350 | if let Some(anchor) = anchor { 1351 | analysis.anchor = Some(Self::analyze_anchor(anchor, false)?); 1352 | } 1353 | if tag.is_some() && (self.canonical || !*implicit) { 1354 | analysis.tag = 1355 | Some(Self::analyze_tag(tag.as_deref().unwrap(), tag_directives)?); 1356 | } 1357 | } 1358 | _ => {} 1359 | } 1360 | 1361 | Ok(analysis) 1362 | } 1363 | 1364 | fn write_bom(&mut self) -> Result<()> { 1365 | self.flush_if_needed()?; 1366 | self.buffer.push('\u{feff}'); 1367 | Ok(()) 1368 | } 1369 | 1370 | fn write_indent(&mut self) -> Result<()> { 1371 | let indent = if self.indent >= 0 { self.indent } else { 0 }; 1372 | if !self.indention || self.column > indent || self.column == indent && !self.whitespace { 1373 | self.put_break()?; 1374 | } 1375 | while self.column < indent { 1376 | self.put(' ')?; 1377 | } 1378 | self.whitespace = true; 1379 | self.indention = true; 1380 | Ok(()) 1381 | } 1382 | 1383 | fn write_indicator( 1384 | &mut self, 1385 | indicator: &str, 1386 | need_whitespace: bool, 1387 | is_whitespace: bool, 1388 | is_indention: bool, 1389 | ) -> Result<()> { 1390 | if need_whitespace && !self.whitespace { 1391 | self.put(' ')?; 1392 | } 1393 | self.write_str(indicator)?; 1394 | self.whitespace = is_whitespace; 1395 | self.indention = self.indention && is_indention; 1396 | Ok(()) 1397 | } 1398 | 1399 | fn write_anchor(&mut self, value: &str) -> Result<()> { 1400 | self.write_str(value)?; 1401 | self.whitespace = false; 1402 | self.indention = false; 1403 | Ok(()) 1404 | } 1405 | 1406 | fn write_tag_handle(&mut self, value: &str) -> Result<()> { 1407 | if !self.whitespace { 1408 | self.put(' ')?; 1409 | } 1410 | self.write_str(value)?; 1411 | self.whitespace = false; 1412 | self.indention = false; 1413 | Ok(()) 1414 | } 1415 | 1416 | fn write_tag_content(&mut self, value: &str, need_whitespace: bool) -> Result<()> { 1417 | if need_whitespace && !self.whitespace { 1418 | self.put(' ')?; 1419 | } 1420 | 1421 | for ch in value.chars() { 1422 | if is_alpha(ch) { 1423 | self.write_char(ch)?; 1424 | continue; 1425 | } 1426 | 1427 | match ch { 1428 | ';' | '/' | '?' | ':' | '@' | '&' | '=' | '+' | '$' | ',' | '_' | '.' | '~' 1429 | | '*' | '\'' | '(' | ')' | '[' | ']' => { 1430 | self.write_char(ch)?; 1431 | continue; 1432 | } 1433 | _ => {} 1434 | } 1435 | 1436 | // URI escape 1437 | let mut encode_buffer = [0u8; 4]; 1438 | let encoded_char = ch.encode_utf8(&mut encode_buffer); 1439 | for value in encoded_char.bytes() { 1440 | let upper = char::from_digit(value as u32 >> 4, 16) 1441 | .expect("invalid digit") 1442 | .to_ascii_uppercase(); 1443 | let lower = char::from_digit(value as u32 & 0x0F, 16) 1444 | .expect("invalid digit") 1445 | .to_ascii_uppercase(); 1446 | self.put('%')?; 1447 | self.put(upper)?; 1448 | self.put(lower)?; 1449 | } 1450 | } 1451 | 1452 | self.whitespace = false; 1453 | self.indention = false; 1454 | Ok(()) 1455 | } 1456 | 1457 | fn write_plain_scalar(&mut self, value: &str, allow_breaks: bool) -> Result<()> { 1458 | let mut spaces = false; 1459 | let mut breaks = false; 1460 | if !self.whitespace && (!value.is_empty() || self.flow_level != 0) { 1461 | self.put(' ')?; 1462 | } 1463 | 1464 | let mut chars = value.chars(); 1465 | 1466 | while let Some(ch) = chars.next() { 1467 | let next = chars.clone().next(); 1468 | if is_space(ch) { 1469 | if allow_breaks && !spaces && self.column > self.best_width && !is_space(next) { 1470 | self.write_indent()?; 1471 | } else { 1472 | self.write_char(ch)?; 1473 | } 1474 | spaces = true; 1475 | } else if is_break(ch) { 1476 | if !breaks && ch == '\n' { 1477 | self.put_break()?; 1478 | } 1479 | self.write_break(ch)?; 1480 | self.indention = true; 1481 | breaks = true; 1482 | } else { 1483 | if breaks { 1484 | self.write_indent()?; 1485 | } 1486 | self.write_char(ch)?; 1487 | self.indention = false; 1488 | spaces = false; 1489 | breaks = false; 1490 | } 1491 | } 1492 | self.whitespace = false; 1493 | self.indention = false; 1494 | Ok(()) 1495 | } 1496 | 1497 | fn write_single_quoted_scalar(&mut self, value: &str, allow_breaks: bool) -> Result<()> { 1498 | let mut spaces = false; 1499 | let mut breaks = false; 1500 | self.write_indicator("'", true, false, false)?; 1501 | let mut chars = value.chars(); 1502 | let mut is_first = true; 1503 | while let Some(ch) = chars.next() { 1504 | let next = chars.clone().next(); 1505 | let is_last = next.is_none(); 1506 | 1507 | if is_space(ch) { 1508 | if allow_breaks 1509 | && !spaces 1510 | && self.column > self.best_width 1511 | && !is_first 1512 | && !is_last 1513 | && !is_space(next) 1514 | { 1515 | self.write_indent()?; 1516 | } else { 1517 | self.write_char(ch)?; 1518 | } 1519 | spaces = true; 1520 | } else if is_break(ch) { 1521 | if !breaks && ch == '\n' { 1522 | self.put_break()?; 1523 | } 1524 | self.write_break(ch)?; 1525 | self.indention = true; 1526 | breaks = true; 1527 | } else { 1528 | if breaks { 1529 | self.write_indent()?; 1530 | } 1531 | if ch == '\'' { 1532 | self.put('\'')?; 1533 | } 1534 | self.write_char(ch)?; 1535 | self.indention = false; 1536 | spaces = false; 1537 | breaks = false; 1538 | } 1539 | 1540 | is_first = false; 1541 | } 1542 | if breaks { 1543 | self.write_indent()?; 1544 | } 1545 | self.write_indicator("'", false, false, false)?; 1546 | self.whitespace = false; 1547 | self.indention = false; 1548 | Ok(()) 1549 | } 1550 | 1551 | fn write_double_quoted_scalar(&mut self, value: &str, allow_breaks: bool) -> Result<()> { 1552 | let mut spaces = false; 1553 | self.write_indicator("\"", true, false, false)?; 1554 | let mut chars = value.chars(); 1555 | let mut first = true; 1556 | while let Some(ch) = chars.next() { 1557 | if !is_printable(ch) 1558 | || !self.unicode && !is_ascii(ch) 1559 | || is_bom(ch) 1560 | || is_break(ch) 1561 | || ch == '"' 1562 | || ch == '\\' 1563 | { 1564 | self.put('\\')?; 1565 | match ch { 1566 | // TODO: Double check these character mappings. 1567 | '\0' => { 1568 | self.put('0')?; 1569 | } 1570 | '\x07' => { 1571 | self.put('a')?; 1572 | } 1573 | '\x08' => { 1574 | self.put('b')?; 1575 | } 1576 | '\x09' => { 1577 | self.put('t')?; 1578 | } 1579 | '\x0A' => { 1580 | self.put('n')?; 1581 | } 1582 | '\x0B' => { 1583 | self.put('v')?; 1584 | } 1585 | '\x0C' => { 1586 | self.put('f')?; 1587 | } 1588 | '\x0D' => { 1589 | self.put('r')?; 1590 | } 1591 | '\x1B' => { 1592 | self.put('e')?; 1593 | } 1594 | '\x22' => { 1595 | self.put('"')?; 1596 | } 1597 | '\x5C' => { 1598 | self.put('\\')?; 1599 | } 1600 | '\u{0085}' => { 1601 | self.put('N')?; 1602 | } 1603 | '\u{00A0}' => { 1604 | self.put('_')?; 1605 | } 1606 | '\u{2028}' => { 1607 | self.put('L')?; 1608 | } 1609 | '\u{2029}' => { 1610 | self.put('P')?; 1611 | } 1612 | _ => { 1613 | let (prefix, width) = if ch <= '\u{00ff}' { 1614 | ('x', 2) 1615 | } else if ch <= '\u{ffff}' { 1616 | ('u', 4) 1617 | } else { 1618 | ('U', 8) 1619 | }; 1620 | self.put(prefix)?; 1621 | let mut k = (width - 1) * 4; 1622 | let value_0 = ch as u32; 1623 | while k >= 0 { 1624 | let digit = (value_0 >> k) & 0x0F; 1625 | let Some(digit_char) = char::from_digit(digit, 16) else { 1626 | unreachable!("digit out of range") 1627 | }; 1628 | // The libyaml emitter encodes unicode sequences as uppercase hex. 1629 | let digit_char = digit_char.to_ascii_uppercase(); 1630 | self.put(digit_char)?; 1631 | k -= 4; 1632 | } 1633 | } 1634 | } 1635 | spaces = false; 1636 | } else if is_space(ch) { 1637 | if allow_breaks 1638 | && !spaces 1639 | && self.column > self.best_width 1640 | && !first 1641 | && chars.clone().next().is_some() 1642 | { 1643 | self.write_indent()?; 1644 | if is_space(chars.clone().next()) { 1645 | self.put('\\')?; 1646 | } 1647 | } else { 1648 | self.write_char(ch)?; 1649 | } 1650 | spaces = true; 1651 | } else { 1652 | self.write_char(ch)?; 1653 | spaces = false; 1654 | } 1655 | 1656 | first = false; 1657 | } 1658 | self.write_indicator("\"", false, false, false)?; 1659 | self.whitespace = false; 1660 | self.indention = false; 1661 | Ok(()) 1662 | } 1663 | 1664 | fn write_block_scalar_hints(&mut self, string: &str) -> Result<()> { 1665 | let mut chomp_hint: Option<&str> = None; 1666 | 1667 | let first = string.chars().next(); 1668 | if is_space(first) || is_break(first) { 1669 | let Some(indent_hint) = char::from_digit(self.best_indent as u32, 10) else { 1670 | unreachable!("self.best_indent out of range") 1671 | }; 1672 | let mut indent_hint_buffer = [0u8; 1]; 1673 | let indent_hint = indent_hint.encode_utf8(&mut indent_hint_buffer); 1674 | self.write_indicator(indent_hint, false, false, false)?; 1675 | } 1676 | self.open_ended = 0; 1677 | 1678 | if string.is_empty() { 1679 | chomp_hint = Some("-"); 1680 | } else { 1681 | let mut chars_rev = string.chars().rev(); 1682 | let ch = chars_rev.next(); 1683 | let next = chars_rev.next(); 1684 | 1685 | if !is_break(ch) { 1686 | chomp_hint = Some("-"); 1687 | } else if is_breakz(next) { 1688 | chomp_hint = Some("+"); 1689 | self.open_ended = 2; 1690 | } 1691 | } 1692 | 1693 | if let Some(chomp_hint) = chomp_hint { 1694 | self.write_indicator(chomp_hint, false, false, false)?; 1695 | } 1696 | Ok(()) 1697 | } 1698 | 1699 | fn write_literal_scalar(&mut self, value: &str) -> Result<()> { 1700 | let mut breaks = true; 1701 | self.write_indicator("|", true, false, false)?; 1702 | self.write_block_scalar_hints(value)?; 1703 | self.put_break()?; 1704 | self.indention = true; 1705 | self.whitespace = true; 1706 | let chars = value.chars(); 1707 | for ch in chars { 1708 | if is_break(ch) { 1709 | self.write_break(ch)?; 1710 | self.indention = true; 1711 | breaks = true; 1712 | } else { 1713 | if breaks { 1714 | self.write_indent()?; 1715 | } 1716 | self.write_char(ch)?; 1717 | self.indention = false; 1718 | breaks = false; 1719 | } 1720 | } 1721 | Ok(()) 1722 | } 1723 | 1724 | fn write_folded_scalar(&mut self, value: &str) -> Result<()> { 1725 | let mut breaks = true; 1726 | let mut leading_spaces = true; 1727 | self.write_indicator(">", true, false, false)?; 1728 | self.write_block_scalar_hints(value)?; 1729 | self.put_break()?; 1730 | self.indention = true; 1731 | self.whitespace = true; 1732 | 1733 | let mut chars = value.chars(); 1734 | 1735 | while let Some(ch) = chars.next() { 1736 | if is_break(ch) { 1737 | if !breaks && !leading_spaces && ch == '\n' { 1738 | let mut skip_breaks = chars.clone(); 1739 | while is_break(skip_breaks.next()) {} 1740 | if !is_blankz(skip_breaks.next()) { 1741 | self.put_break()?; 1742 | } 1743 | } 1744 | self.write_break(ch)?; 1745 | self.indention = true; 1746 | breaks = true; 1747 | } else { 1748 | if breaks { 1749 | self.write_indent()?; 1750 | leading_spaces = is_blank(ch); 1751 | } 1752 | if !breaks 1753 | && is_space(ch) 1754 | && !is_space(chars.clone().next()) 1755 | && self.column > self.best_width 1756 | { 1757 | self.write_indent()?; 1758 | } else { 1759 | self.write_char(ch)?; 1760 | } 1761 | self.indention = false; 1762 | breaks = false; 1763 | } 1764 | } 1765 | Ok(()) 1766 | } 1767 | 1768 | /// Flush the accumulated characters to the output. 1769 | pub fn flush(&mut self) -> Result<()> { 1770 | assert!((self.write_handler).is_some()); 1771 | assert_ne!(self.encoding, Encoding::Any); 1772 | 1773 | if self.buffer.is_empty() { 1774 | return Ok(()); 1775 | } 1776 | 1777 | if self.encoding == Encoding::Utf8 { 1778 | let to_emit = self.buffer.as_bytes(); 1779 | self.write_handler 1780 | .as_mut() 1781 | .expect("non-null writer") 1782 | .write_all(to_emit)?; 1783 | self.buffer.clear(); 1784 | return Ok(()); 1785 | } 1786 | 1787 | let big_endian = match self.encoding { 1788 | Encoding::Any | Encoding::Utf8 => { 1789 | unreachable!("unhandled encoding") 1790 | } 1791 | Encoding::Utf16Le => false, 1792 | Encoding::Utf16Be => true, 1793 | }; 1794 | 1795 | for ch in self.buffer.encode_utf16() { 1796 | let bytes = if big_endian { 1797 | ch.to_be_bytes() 1798 | } else { 1799 | ch.to_le_bytes() 1800 | }; 1801 | self.raw_buffer.extend(bytes); 1802 | } 1803 | 1804 | let to_emit = self.raw_buffer.as_slice(); 1805 | 1806 | self.write_handler 1807 | .as_mut() 1808 | .expect("non-null function pointer") 1809 | .write_all(to_emit)?; 1810 | self.buffer.clear(); 1811 | self.raw_buffer.clear(); 1812 | Ok(()) 1813 | } 1814 | 1815 | pub(crate) fn reset_anchors(&mut self) { 1816 | self.anchors.clear(); 1817 | self.last_anchor_id = 0; 1818 | } 1819 | 1820 | pub(crate) fn anchor_node_sub(&mut self, index: i32) { 1821 | self.anchors[index as usize - 1].references += 1; 1822 | if self.anchors[index as usize - 1].references == 2 { 1823 | self.last_anchor_id += 1; 1824 | self.anchors[index as usize - 1].anchor = self.last_anchor_id; 1825 | } 1826 | } 1827 | 1828 | pub(crate) fn generate_anchor(anchor_id: i32) -> String { 1829 | alloc::format!("id{anchor_id:03}") 1830 | } 1831 | } 1832 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | pub type Result = core::result::Result; 2 | 3 | /// The pointer position. 4 | #[derive(Copy, Clone, Default, Debug, PartialEq, Eq)] 5 | #[non_exhaustive] 6 | pub struct Mark { 7 | /// The position index. 8 | pub index: u64, 9 | /// The position line. 10 | pub line: u64, 11 | /// The position column. 12 | pub column: u64, 13 | } 14 | 15 | impl std::fmt::Display for Mark { 16 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 17 | write!(f, "line {} column {}", self.line, self.column) 18 | } 19 | } 20 | 21 | #[derive(Debug)] 22 | #[allow(clippy::struct_field_names)] 23 | struct Problem { 24 | pub problem: &'static str, 25 | pub problem_mark: Mark, 26 | pub context: &'static str, 27 | pub context_mark: Mark, 28 | } 29 | 30 | #[derive(Debug)] 31 | enum ErrorImpl { 32 | Reader { 33 | problem: &'static str, 34 | offset: usize, 35 | value: i32, 36 | }, 37 | Scanner(Problem), 38 | Parser(Problem), 39 | Composer(Problem), 40 | Emitter(&'static str), 41 | Io(std::io::Error), 42 | } 43 | 44 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 45 | pub enum ErrorKind { 46 | Reader, 47 | Scanner, 48 | Parser, 49 | Composer, 50 | Emitter, 51 | Io, 52 | } 53 | 54 | #[derive(Debug)] 55 | pub struct Error(Box); 56 | 57 | impl From for Error { 58 | fn from(value: std::io::Error) -> Self { 59 | Self(Box::new(ErrorImpl::Io(value))) 60 | } 61 | } 62 | 63 | impl Error { 64 | pub(crate) fn reader(problem: &'static str, offset: usize, value: i32) -> Self { 65 | Self(Box::new(ErrorImpl::Reader { 66 | problem, 67 | offset, 68 | value, 69 | })) 70 | } 71 | 72 | pub(crate) fn scanner( 73 | context: &'static str, 74 | context_mark: Mark, 75 | problem: &'static str, 76 | problem_mark: Mark, 77 | ) -> Self { 78 | Self(Box::new(ErrorImpl::Scanner(Problem { 79 | problem, 80 | problem_mark, 81 | context, 82 | context_mark, 83 | }))) 84 | } 85 | 86 | pub(crate) fn parser( 87 | context: &'static str, 88 | context_mark: Mark, 89 | problem: &'static str, 90 | problem_mark: Mark, 91 | ) -> Self { 92 | Self(Box::new(ErrorImpl::Parser(Problem { 93 | problem, 94 | problem_mark, 95 | context, 96 | context_mark, 97 | }))) 98 | } 99 | 100 | pub(crate) fn composer( 101 | context: &'static str, 102 | context_mark: Mark, 103 | problem: &'static str, 104 | problem_mark: Mark, 105 | ) -> Self { 106 | Self(Box::new(ErrorImpl::Composer(Problem { 107 | problem, 108 | problem_mark, 109 | context, 110 | context_mark, 111 | }))) 112 | } 113 | 114 | pub(crate) fn emitter(problem: &'static str) -> Self { 115 | Self(Box::new(ErrorImpl::Emitter(problem))) 116 | } 117 | 118 | pub fn kind(&self) -> ErrorKind { 119 | match &*self.0 { 120 | ErrorImpl::Reader { .. } => ErrorKind::Reader, 121 | ErrorImpl::Scanner(_) => ErrorKind::Scanner, 122 | ErrorImpl::Parser(_) => ErrorKind::Parser, 123 | ErrorImpl::Composer(_) => ErrorKind::Composer, 124 | ErrorImpl::Emitter(_) => ErrorKind::Emitter, 125 | ErrorImpl::Io(_) => ErrorKind::Io, 126 | } 127 | } 128 | 129 | pub fn problem_mark(&self) -> Option { 130 | match &*self.0 { 131 | ErrorImpl::Reader { .. } | ErrorImpl::Emitter(_) | ErrorImpl::Io(_) => None, 132 | ErrorImpl::Scanner(ref p) | ErrorImpl::Parser(ref p) | ErrorImpl::Composer(ref p) => { 133 | Some(p.problem_mark) 134 | } 135 | } 136 | } 137 | 138 | pub fn context_mark(&self) -> Option { 139 | match &*self.0 { 140 | ErrorImpl::Reader { .. } | ErrorImpl::Emitter(..) | ErrorImpl::Io(_) => None, 141 | ErrorImpl::Scanner(ref p) | ErrorImpl::Parser(ref p) | ErrorImpl::Composer(ref p) => { 142 | if p.context.is_empty() { 143 | None 144 | } else { 145 | Some(p.context_mark) 146 | } 147 | } 148 | } 149 | } 150 | 151 | pub fn problem(&self) -> &'static str { 152 | match &*self.0 { 153 | ErrorImpl::Reader { problem, .. } | ErrorImpl::Emitter(problem) => problem, 154 | ErrorImpl::Scanner(ref p) | ErrorImpl::Parser(ref p) | ErrorImpl::Composer(ref p) => { 155 | p.problem 156 | } 157 | ErrorImpl::Io(_) => "I/O error", 158 | } 159 | } 160 | 161 | pub fn context(&self) -> Option<&'static str> { 162 | match &*self.0 { 163 | ErrorImpl::Reader { .. } | ErrorImpl::Emitter(..) | ErrorImpl::Io(_) => None, 164 | ErrorImpl::Scanner(ref p) | ErrorImpl::Parser(ref p) | ErrorImpl::Composer(ref p) => { 165 | if p.context.is_empty() { 166 | None 167 | } else { 168 | Some(p.context) 169 | } 170 | } 171 | } 172 | } 173 | } 174 | 175 | impl std::error::Error for Error { 176 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 177 | if let ErrorImpl::Io(ref err) = &*self.0 { 178 | Some(err) 179 | } else { 180 | None 181 | } 182 | } 183 | } 184 | 185 | impl TryFrom for std::io::Error { 186 | type Error = Error; 187 | 188 | fn try_from(value: Error) -> Result { 189 | if value.kind() == ErrorKind::Io { 190 | if let ErrorImpl::Io(err) = *value.0 { 191 | Ok(err) 192 | } else { 193 | unreachable!() 194 | } 195 | } else { 196 | Err(value) 197 | } 198 | } 199 | } 200 | 201 | impl core::fmt::Display for ErrorKind { 202 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 203 | f.write_str(match self { 204 | ErrorKind::Reader => "Reader", 205 | ErrorKind::Scanner => "Scanner", 206 | ErrorKind::Parser => "Parser", 207 | ErrorKind::Composer => "Composer", 208 | ErrorKind::Emitter => "Emitter", 209 | ErrorKind::Io => "I/O", 210 | }) 211 | } 212 | } 213 | 214 | impl core::fmt::Display for Problem { 215 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 216 | let Self { 217 | problem, 218 | problem_mark, 219 | context, 220 | context_mark, 221 | } = self; 222 | 223 | if self.context.is_empty() { 224 | write!(f, "{problem_mark}: {problem}") 225 | } else { 226 | write!(f, "{problem_mark}: {problem} {context} ({context_mark})") 227 | } 228 | } 229 | } 230 | 231 | impl core::fmt::Display for Error { 232 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 233 | write!(f, "{} error: ", self.kind())?; 234 | match *self.0 { 235 | ErrorImpl::Reader { 236 | problem, 237 | offset, 238 | value, 239 | } => write!(f, "{problem} (offset {offset}, value {value})"), 240 | ErrorImpl::Scanner(ref p) | ErrorImpl::Parser(ref p) | ErrorImpl::Composer(ref p) => { 241 | write!(f, "{p}") 242 | } 243 | ErrorImpl::Emitter(problem) => write!(f, "{problem}"), 244 | ErrorImpl::Io(ref err) => write!(f, "{err}"), 245 | } 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/event.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | Encoding, MappingStyle, Mark, ScalarStyle, SequenceStyle, TagDirective, VersionDirective, 3 | }; 4 | 5 | /// The event structure. 6 | #[derive(Debug, PartialEq)] 7 | #[non_exhaustive] 8 | pub struct Event { 9 | /// The event data. 10 | pub data: EventData, 11 | /// The beginning of the event. 12 | pub start_mark: Mark, 13 | /// The end of the event. 14 | pub end_mark: Mark, 15 | } 16 | 17 | #[derive(Debug, PartialEq)] 18 | pub enum EventData { 19 | /// The stream parameters (for YAML_STREAM_START_EVENT). 20 | StreamStart { 21 | /// The document encoding. 22 | encoding: Encoding, 23 | }, 24 | StreamEnd, 25 | /// The document parameters (for YAML_DOCUMENT_START_EVENT). 26 | DocumentStart { 27 | /// The version directive. 28 | version_directive: Option, 29 | /// The tag directives list. 30 | tag_directives: Vec, 31 | /// Is the document indicator implicit? 32 | implicit: bool, 33 | }, 34 | /// The document end parameters (for YAML_DOCUMENT_END_EVENT). 35 | DocumentEnd { 36 | implicit: bool, 37 | }, 38 | /// The alias parameters (for YAML_ALIAS_EVENT). 39 | Alias { 40 | /// The anchor. 41 | anchor: String, 42 | }, 43 | /// The scalar parameters (for YAML_SCALAR_EVENT). 44 | Scalar { 45 | /// The anchor. 46 | anchor: Option, 47 | /// The tag. 48 | tag: Option, 49 | /// The scalar value. 50 | value: String, 51 | /// Is the tag optional for the plain style? 52 | plain_implicit: bool, 53 | /// Is the tag optional for any non-plain style? 54 | quoted_implicit: bool, 55 | /// The scalar style. 56 | style: ScalarStyle, 57 | }, 58 | /// The sequence parameters (for YAML_SEQUENCE_START_EVENT). 59 | SequenceStart { 60 | /// The anchor. 61 | anchor: Option, 62 | /// The tag. 63 | tag: Option, 64 | /// Is the tag optional? 65 | implicit: bool, 66 | /// The sequence style. 67 | style: SequenceStyle, 68 | }, 69 | SequenceEnd, 70 | /// The mapping parameters (for YAML_MAPPING_START_EVENT). 71 | MappingStart { 72 | /// The anchor. 73 | anchor: Option, 74 | /// The tag. 75 | tag: Option, 76 | /// Is the tag optional? 77 | implicit: bool, 78 | /// The mapping style. 79 | style: MappingStyle, 80 | }, 81 | MappingEnd, 82 | } 83 | 84 | impl Event { 85 | /// Make an event from its data, setting both marks to zero. 86 | pub(crate) fn new(data: EventData) -> Self { 87 | Self { 88 | data, 89 | start_mark: Mark::default(), 90 | end_mark: Mark::default(), 91 | } 92 | } 93 | 94 | /// Create the STREAM-START event. 95 | pub fn stream_start(encoding: Encoding) -> Self { 96 | Self::new(EventData::StreamStart { encoding }) 97 | } 98 | 99 | /// Create the STREAM-END event. 100 | pub fn stream_end() -> Self { 101 | Self::new(EventData::StreamEnd) 102 | } 103 | 104 | /// Create the DOCUMENT-START event. 105 | /// 106 | /// The `implicit` argument is considered as a stylistic parameter and may be 107 | /// ignored by the emitter. 108 | pub fn document_start( 109 | version_directive: Option, 110 | tag_directives_in: &[TagDirective], 111 | implicit: bool, 112 | ) -> Self { 113 | let tag_directives = tag_directives_in.to_vec(); 114 | 115 | Self::new(EventData::DocumentStart { 116 | version_directive, 117 | tag_directives, 118 | implicit, 119 | }) 120 | } 121 | 122 | /// Create the DOCUMENT-END event. 123 | /// 124 | /// The `implicit` argument is considered as a stylistic parameter and may be 125 | /// ignored by the emitter. 126 | pub fn document_end(implicit: bool) -> Self { 127 | Self::new(EventData::DocumentEnd { implicit }) 128 | } 129 | 130 | /// Create an ALIAS event. 131 | pub fn alias(anchor: &str) -> Self { 132 | Self::new(EventData::Alias { 133 | anchor: String::from(anchor), 134 | }) 135 | } 136 | 137 | /// Create a SCALAR event. 138 | /// 139 | /// The `style` argument may be ignored by the emitter. 140 | /// 141 | /// Either the `tag` attribute or one of the `plain_implicit` and 142 | /// `quoted_implicit` flags must be set. 143 | /// 144 | pub fn scalar( 145 | anchor: Option<&str>, 146 | tag: Option<&str>, 147 | value: &str, 148 | plain_implicit: bool, 149 | quoted_implicit: bool, 150 | style: ScalarStyle, 151 | ) -> Self { 152 | let mut anchor_copy: Option = None; 153 | let mut tag_copy: Option = None; 154 | 155 | if let Some(anchor) = anchor { 156 | anchor_copy = Some(String::from(anchor)); 157 | } 158 | if let Some(tag) = tag { 159 | tag_copy = Some(String::from(tag)); 160 | } 161 | 162 | Self::new(EventData::Scalar { 163 | anchor: anchor_copy, 164 | tag: tag_copy, 165 | value: String::from(value), 166 | plain_implicit, 167 | quoted_implicit, 168 | style, 169 | }) 170 | } 171 | 172 | /// Create a SEQUENCE-START event. 173 | /// 174 | /// The `style` argument may be ignored by the emitter. 175 | /// 176 | /// Either the `tag` attribute or the `implicit` flag must be set. 177 | pub fn sequence_start( 178 | anchor: Option<&str>, 179 | tag: Option<&str>, 180 | implicit: bool, 181 | style: SequenceStyle, 182 | ) -> Self { 183 | let mut anchor_copy: Option = None; 184 | let mut tag_copy: Option = None; 185 | 186 | if let Some(anchor) = anchor { 187 | anchor_copy = Some(String::from(anchor)); 188 | } 189 | if let Some(tag) = tag { 190 | tag_copy = Some(String::from(tag)); 191 | } 192 | 193 | Self::new(EventData::SequenceStart { 194 | anchor: anchor_copy, 195 | tag: tag_copy, 196 | implicit, 197 | style, 198 | }) 199 | } 200 | 201 | /// Create a SEQUENCE-END event. 202 | pub fn sequence_end() -> Self { 203 | Self::new(EventData::SequenceEnd) 204 | } 205 | 206 | /// Create a MAPPING-START event. 207 | /// 208 | /// The `style` argument may be ignored by the emitter. 209 | /// 210 | /// Either the `tag` attribute or the `implicit` flag must be set. 211 | pub fn mapping_start( 212 | anchor: Option<&str>, 213 | tag: Option<&str>, 214 | implicit: bool, 215 | style: MappingStyle, 216 | ) -> Self { 217 | let mut anchor_copy: Option = None; 218 | let mut tag_copy: Option = None; 219 | 220 | if let Some(anchor) = anchor { 221 | anchor_copy = Some(String::from(anchor)); 222 | } 223 | 224 | if let Some(tag) = tag { 225 | tag_copy = Some(String::from(tag)); 226 | } 227 | 228 | Self::new(EventData::MappingStart { 229 | anchor: anchor_copy, 230 | tag: tag_copy, 231 | implicit, 232 | style, 233 | }) 234 | } 235 | 236 | /// Create a MAPPING-END event. 237 | pub fn mapping_end() -> Self { 238 | Self::new(EventData::MappingEnd) 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc = include_str!("../README.md")] 2 | #![doc(html_root_url = "https://docs.rs/libyaml-safer/0.1.0")] 3 | #![warn(clippy::pedantic)] 4 | #![allow( 5 | clippy::cast_lossless, 6 | clippy::cast_possible_truncation, 7 | clippy::cast_possible_wrap, 8 | clippy::cast_sign_loss, 9 | clippy::fn_params_excessive_bools, 10 | clippy::manual_range_contains, 11 | clippy::missing_panics_doc, 12 | clippy::missing_errors_doc, 13 | clippy::module_name_repetitions, 14 | clippy::must_use_candidate, 15 | clippy::needless_pass_by_value, 16 | clippy::struct_excessive_bools, 17 | clippy::too_many_arguments, 18 | clippy::too_many_lines, 19 | clippy::unnecessary_wraps, 20 | clippy::match_wildcard_for_single_variants 21 | )] 22 | #![deny(unsafe_code)] 23 | 24 | extern crate alloc; 25 | 26 | #[macro_use] 27 | mod macros; 28 | 29 | mod document; 30 | mod emitter; 31 | mod error; 32 | mod event; 33 | mod parser; 34 | mod reader; 35 | mod scanner; 36 | mod token; 37 | 38 | pub use crate::document::*; 39 | pub use crate::emitter::*; 40 | pub use crate::error::*; 41 | pub use crate::event::*; 42 | pub use crate::parser::*; 43 | pub use crate::scanner::*; 44 | pub use crate::token::*; 45 | 46 | pub(crate) const INPUT_RAW_BUFFER_SIZE: usize = 16384; 47 | pub(crate) const INPUT_BUFFER_SIZE: usize = INPUT_RAW_BUFFER_SIZE; 48 | pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384; 49 | 50 | /// The tag `!!null` with the only possible value: `null`. 51 | pub const NULL_TAG: &str = "tag:yaml.org,2002:null"; 52 | /// The tag `!!bool` with the values: `true` and `false`. 53 | pub const BOOL_TAG: &str = "tag:yaml.org,2002:bool"; 54 | /// The tag `!!str` for string values. 55 | pub const STR_TAG: &str = "tag:yaml.org,2002:str"; 56 | /// The tag `!!int` for integer values. 57 | pub const INT_TAG: &str = "tag:yaml.org,2002:int"; 58 | /// The tag `!!float` for float values. 59 | pub const FLOAT_TAG: &str = "tag:yaml.org,2002:float"; 60 | /// The tag `!!timestamp` for date and time values. 61 | pub const TIMESTAMP_TAG: &str = "tag:yaml.org,2002:timestamp"; 62 | 63 | /// The tag `!!seq` is used to denote sequences. 64 | pub const SEQ_TAG: &str = "tag:yaml.org,2002:seq"; 65 | /// The tag `!!map` is used to denote mapping. 66 | pub const MAP_TAG: &str = "tag:yaml.org,2002:map"; 67 | 68 | /// The default scalar tag is `!!str`. 69 | pub const DEFAULT_SCALAR_TAG: &str = STR_TAG; 70 | /// The default sequence tag is `!!seq`. 71 | pub const DEFAULT_SEQUENCE_TAG: &str = SEQ_TAG; 72 | /// The default mapping tag is `!!map`. 73 | pub const DEFAULT_MAPPING_TAG: &str = MAP_TAG; 74 | 75 | /// The version directive data. 76 | #[derive(Clone, Copy, Debug, PartialEq)] 77 | #[non_exhaustive] 78 | pub struct VersionDirective { 79 | /// The major version number. 80 | pub major: i32, 81 | /// The minor version number. 82 | pub minor: i32, 83 | } 84 | 85 | /// The tag directive data. 86 | #[derive(Debug, Clone, PartialEq)] 87 | #[non_exhaustive] 88 | pub struct TagDirective { 89 | /// The tag handle. 90 | pub handle: String, 91 | /// The tag prefix. 92 | pub prefix: String, 93 | } 94 | 95 | /// The stream encoding. 96 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 97 | #[non_exhaustive] 98 | pub enum Encoding { 99 | /// Let the parser choose the encoding. 100 | #[default] 101 | Any = 0, 102 | /// The default UTF-8 encoding. 103 | Utf8 = 1, 104 | /// The UTF-16-LE encoding with BOM. 105 | Utf16Le = 2, 106 | /// The UTF-16-BE encoding with BOM. 107 | Utf16Be = 3, 108 | } 109 | 110 | /// Line break type. 111 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 112 | #[non_exhaustive] 113 | pub enum Break { 114 | /// Let the parser choose the break type. 115 | #[default] 116 | Any = 0, 117 | /// Use CR for line breaks (Mac style). 118 | Cr = 1, 119 | /// Use LN for line breaks (Unix style). 120 | Ln = 2, 121 | /// Use CR LN for line breaks (DOS style). 122 | CrLn = 3, 123 | } 124 | 125 | /// Scalar styles. 126 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 127 | #[non_exhaustive] 128 | pub enum ScalarStyle { 129 | /// Let the emitter choose the style. 130 | #[default] 131 | Any = 0, 132 | /// The plain scalar style. 133 | Plain = 1, 134 | /// The single-quoted scalar style. 135 | SingleQuoted = 2, 136 | /// The double-quoted scalar style. 137 | DoubleQuoted = 3, 138 | /// The literal scalar style. 139 | Literal = 4, 140 | /// The folded scalar style. 141 | Folded = 5, 142 | } 143 | 144 | /// Sequence styles. 145 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 146 | #[non_exhaustive] 147 | pub enum SequenceStyle { 148 | /// Let the emitter choose the style. 149 | Any = 0, 150 | /// The block sequence style. 151 | Block = 1, 152 | /// The flow sequence style. 153 | Flow = 2, 154 | } 155 | 156 | /// Mapping styles. 157 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 158 | #[non_exhaustive] 159 | pub enum MappingStyle { 160 | /// Let the emitter choose the style. 161 | Any = 0, 162 | /// The block mapping style. 163 | Block = 1, 164 | /// The flow mapping style. 165 | Flow = 2, 166 | } 167 | 168 | #[cfg(test)] 169 | mod tests { 170 | use super::*; 171 | 172 | #[test] 173 | fn sanity() { 174 | const SANITY_INPUT: &str = r#"unicode: "Sosa did fine.\u263A" 175 | control: "\b1998\t1999\t2000\n" 176 | hex esc: "\x0d\x0a is \r\n" 177 | 178 | single: '"Howdy!" he cried.' 179 | quoted: ' # Not a ''comment''.' 180 | tie-fighter: '|\-*-/|' 181 | "#; 182 | const SANITY_OUTPUT: &str = r#"unicode: "Sosa did fine.\u263A" 183 | control: "\b1998\t1999\t2000\n" 184 | hex esc: "\r\n is \r\n" 185 | single: '"Howdy!" he cried.' 186 | quoted: ' # Not a ''comment''.' 187 | tie-fighter: '|\-*-/|' 188 | "#; 189 | let mut parser = Parser::new(); 190 | let mut read_in = SANITY_INPUT.as_bytes(); 191 | parser.set_input_string(&mut read_in); 192 | let doc = Document::load(&mut parser).unwrap(); 193 | 194 | let mut emitter = Emitter::new(); 195 | let mut output = Vec::new(); 196 | emitter.set_output(&mut output); 197 | doc.dump(&mut emitter).unwrap(); 198 | let output_str = core::str::from_utf8(&output).expect("invalid UTF-8"); 199 | assert_eq!(output_str, SANITY_OUTPUT); 200 | } 201 | 202 | #[test] 203 | fn scanner_marks() { 204 | const INPUT: &str = "b: 205 | c: true"; 206 | let mut scanner = Scanner::new(); 207 | let mut read_in = INPUT.as_bytes(); 208 | scanner.set_input(&mut read_in); 209 | let events = scanner.collect::, _>>().unwrap(); 210 | let expected = &[ 211 | Token { 212 | data: TokenData::StreamStart { 213 | encoding: Encoding::Utf8, 214 | }, 215 | start_mark: Mark { 216 | index: 0, 217 | line: 0, 218 | column: 0, 219 | }, 220 | end_mark: Mark { 221 | index: 0, 222 | line: 0, 223 | column: 0, 224 | }, 225 | }, 226 | Token { 227 | data: TokenData::BlockMappingStart, 228 | start_mark: Mark { 229 | index: 0, 230 | line: 0, 231 | column: 0, 232 | }, 233 | end_mark: Mark { 234 | index: 0, 235 | line: 0, 236 | column: 0, 237 | }, 238 | }, 239 | Token { 240 | data: TokenData::Key, 241 | start_mark: Mark { 242 | index: 0, 243 | line: 0, 244 | column: 0, 245 | }, 246 | end_mark: Mark { 247 | index: 0, 248 | line: 0, 249 | column: 0, 250 | }, 251 | }, 252 | Token { 253 | data: TokenData::Scalar { 254 | value: String::from("b"), 255 | style: ScalarStyle::Plain, 256 | }, 257 | start_mark: Mark { 258 | index: 0, 259 | line: 0, 260 | column: 0, 261 | }, 262 | end_mark: Mark { 263 | index: 1, 264 | line: 0, 265 | column: 1, 266 | }, 267 | }, 268 | Token { 269 | data: TokenData::Value, 270 | start_mark: Mark { 271 | index: 1, 272 | line: 0, 273 | column: 1, 274 | }, 275 | end_mark: Mark { 276 | index: 2, 277 | line: 0, 278 | column: 2, 279 | }, 280 | }, 281 | Token { 282 | data: TokenData::Key, 283 | start_mark: Mark { 284 | index: 3, 285 | line: 1, 286 | column: 0, 287 | }, 288 | end_mark: Mark { 289 | index: 3, 290 | line: 1, 291 | column: 0, 292 | }, 293 | }, 294 | Token { 295 | data: TokenData::Scalar { 296 | value: String::from("c"), 297 | style: ScalarStyle::Plain, 298 | }, 299 | start_mark: Mark { 300 | index: 3, 301 | line: 1, 302 | column: 0, 303 | }, 304 | end_mark: Mark { 305 | index: 4, 306 | line: 1, 307 | column: 1, 308 | }, 309 | }, 310 | Token { 311 | data: TokenData::Value, 312 | start_mark: Mark { 313 | index: 4, 314 | line: 1, 315 | column: 1, 316 | }, 317 | end_mark: Mark { 318 | index: 5, 319 | line: 1, 320 | column: 2, 321 | }, 322 | }, 323 | Token { 324 | data: TokenData::Scalar { 325 | value: String::from("true"), 326 | style: ScalarStyle::Plain, 327 | }, 328 | start_mark: Mark { 329 | index: 6, 330 | line: 1, 331 | column: 3, 332 | }, 333 | end_mark: Mark { 334 | index: 10, 335 | line: 1, 336 | column: 7, 337 | }, 338 | }, 339 | Token { 340 | data: TokenData::BlockEnd, 341 | start_mark: Mark { 342 | index: 10, 343 | line: 2, 344 | column: 0, 345 | }, 346 | end_mark: Mark { 347 | index: 10, 348 | line: 2, 349 | column: 0, 350 | }, 351 | }, 352 | Token { 353 | data: TokenData::StreamEnd, 354 | start_mark: Mark { 355 | index: 10, 356 | line: 2, 357 | column: 0, 358 | }, 359 | end_mark: Mark { 360 | index: 10, 361 | line: 2, 362 | column: 0, 363 | }, 364 | }, 365 | ]; 366 | assert_eq!( 367 | events, 368 | expected, 369 | "diff:\n{}", 370 | zip_longest( 371 | format!("{events:#?}").lines(), 372 | format!("{expected:#?}").lines() 373 | ) 374 | .map(|(a, b)| { 375 | let a = a.unwrap_or_default(); 376 | let b = b.unwrap_or_default(); 377 | format!("{a:<40} {b}") 378 | }) 379 | .collect::>() 380 | .join("\n") 381 | ); 382 | } 383 | 384 | fn zip_longest( 385 | a: A, 386 | b: B, 387 | ) -> impl Iterator, Option)> { 388 | let mut a = a.map(Some).collect::>(); 389 | let mut b = b.map(Some).collect::>(); 390 | let len = a.len().max(b.len()); 391 | a.resize_with(len, || None); 392 | b.resize_with(len, || None); 393 | a.into_iter() 394 | .zip(b) 395 | .take_while(|(a, b)| a.is_some() || b.is_some()) 396 | } 397 | } 398 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | macro_rules! CHECK_AT { 2 | ($buffer:expr, $octet:expr, $offset:expr) => { 3 | $buffer.get($offset).copied() == Some($octet) 4 | }; 5 | } 6 | 7 | macro_rules! CHECK { 8 | ($buffer:expr, $octet:expr) => { 9 | $buffer.get(0).copied() == Some($octet) 10 | }; 11 | } 12 | 13 | macro_rules! IS_ALPHA { 14 | ($buffer:expr) => { 15 | crate::macros::is_alpha($buffer.get(0).copied()) 16 | }; 17 | } 18 | 19 | pub(crate) fn is_alpha(ch: impl Into>) -> bool { 20 | let Some(ch) = ch.into() else { 21 | return false; 22 | }; 23 | ch >= '0' && ch <= '9' 24 | || ch >= 'A' && ch <= 'Z' 25 | || ch >= 'a' && ch <= 'z' 26 | || ch == '_' 27 | || ch == '-' 28 | } 29 | 30 | macro_rules! IS_DIGIT { 31 | ($buffer:expr) => { 32 | $buffer 33 | .get(0) 34 | .copied() 35 | .map(|ch| ch.is_digit(10)) 36 | .unwrap_or(false) 37 | }; 38 | } 39 | 40 | macro_rules! AS_DIGIT { 41 | ($buffer:expr) => { 42 | $buffer 43 | .get(0) 44 | .copied() 45 | .expect("out of bounds buffer access") 46 | .to_digit(10) 47 | .expect("not in digit range") 48 | }; 49 | } 50 | 51 | macro_rules! IS_HEX_AT { 52 | ($buffer:expr, $offset:expr) => { 53 | if let Some(ch) = $buffer.get($offset).copied() { 54 | ch.is_digit(16) 55 | } else { 56 | false 57 | } 58 | }; 59 | } 60 | 61 | macro_rules! AS_HEX_AT { 62 | ($buffer:expr, $offset:expr) => { 63 | $buffer 64 | .get($offset) 65 | .copied() 66 | .expect("out of range buffer access") 67 | .to_digit(16) 68 | .expect("not in digit range (hex)") 69 | }; 70 | } 71 | 72 | pub(crate) fn is_ascii(ch: char) -> bool { 73 | ch.is_ascii() 74 | } 75 | 76 | pub(crate) fn is_printable(ch: char) -> bool { 77 | match ch { 78 | '\u{feff}' | '\u{fffe}' | '\u{ffff}' => false, 79 | // ASCII 80 | '\x0a' 81 | | '\x20'..='\x7e' 82 | | '\u{00a0}'..='\u{00bf}' 83 | | '\u{00c0}'..='\u{cfff}' 84 | | '\u{d000}'..='\u{d7ff}' 85 | | '\u{e000}'..='\u{efff}' 86 | | '\u{f000}'..='\u{fffd}' 87 | | '\u{10000}'..='\u{10ffff}' => true, 88 | _ => false, 89 | } 90 | } 91 | 92 | macro_rules! IS_Z_AT { 93 | ($buffer:expr, $offset:expr) => { 94 | $buffer.get($offset).is_none() 95 | }; 96 | } 97 | 98 | macro_rules! IS_Z { 99 | ($string:expr) => { 100 | IS_Z_AT!($string, 0) 101 | }; 102 | } 103 | 104 | macro_rules! IS_BOM { 105 | ($buffer:expr) => { 106 | CHECK!($buffer, '\u{feff}') 107 | }; 108 | } 109 | 110 | pub(crate) fn is_bom(ch: char) -> bool { 111 | ch == '\u{7eff}' 112 | } 113 | 114 | macro_rules! IS_SPACE_AT { 115 | ($string:expr, $offset:expr) => { 116 | CHECK_AT!($string, ' ', $offset) 117 | }; 118 | } 119 | 120 | macro_rules! IS_SPACE { 121 | ($string:expr) => { 122 | IS_SPACE_AT!($string, 0) 123 | }; 124 | } 125 | 126 | pub(crate) fn is_space(ch: impl Into>) -> bool { 127 | ch.into() == Some(' ') 128 | } 129 | 130 | macro_rules! IS_TAB_AT { 131 | ($buffer:expr, $offset:expr) => { 132 | CHECK_AT!($buffer, '\t', $offset) 133 | }; 134 | } 135 | 136 | macro_rules! IS_TAB { 137 | ($string:expr) => { 138 | IS_TAB_AT!($string, 0) 139 | }; 140 | } 141 | 142 | pub(crate) fn is_tab(ch: impl Into>) -> bool { 143 | ch.into() == Some('\t') 144 | } 145 | 146 | macro_rules! IS_BLANK_AT { 147 | ($buffer:expr, $offset:expr) => {{ 148 | let ch = $buffer.get($offset).copied(); 149 | $crate::macros::is_space(ch) || crate::macros::is_tab(ch) 150 | }}; 151 | } 152 | 153 | macro_rules! IS_BLANK { 154 | ($string:expr) => { 155 | IS_BLANK_AT!($string, 0) 156 | }; 157 | } 158 | 159 | pub(crate) fn is_blank(ch: impl Into>) -> bool { 160 | let ch = ch.into(); 161 | is_space(ch) || is_tab(ch) 162 | } 163 | 164 | pub(crate) fn is_blankz(ch: impl Into>) -> bool { 165 | let ch = ch.into(); 166 | is_blank(ch) || is_breakz(ch) 167 | } 168 | 169 | macro_rules! IS_BREAK_AT { 170 | ($buffer:expr, $offset:expr) => { 171 | $crate::macros::is_break($buffer.get($offset).copied()) 172 | }; 173 | } 174 | 175 | pub(crate) fn is_break(ch: impl Into>) -> bool { 176 | matches!( 177 | ch.into(), 178 | Some('\r' | '\n' | '\u{0085}' | '\u{2028}' | '\u{2029}') 179 | ) 180 | } 181 | 182 | pub(crate) fn is_breakz(ch: impl Into>) -> bool { 183 | let ch = ch.into(); 184 | ch.is_none() || is_break(ch) 185 | } 186 | 187 | macro_rules! IS_BREAK { 188 | ($string:expr) => { 189 | IS_BREAK_AT!($string, 0) 190 | }; 191 | } 192 | 193 | macro_rules! IS_BREAKZ_AT { 194 | ($buffer:expr, $offset:expr) => {{ 195 | let ch = $buffer.get($offset).copied(); 196 | crate::macros::is_breakz(ch) 197 | }}; 198 | } 199 | 200 | macro_rules! IS_BREAKZ { 201 | ($string:expr) => { 202 | IS_BREAKZ_AT!($string, 0) 203 | }; 204 | } 205 | 206 | macro_rules! IS_BLANKZ_AT { 207 | ($buffer:expr, $offset:expr) => {{ 208 | let ch = $buffer.get($offset).copied(); 209 | $crate::macros::is_blank(ch) || $crate::macros::is_breakz(ch) 210 | }}; 211 | } 212 | 213 | macro_rules! IS_BLANKZ { 214 | ($string:expr) => { 215 | IS_BLANKZ_AT!($string, 0) 216 | }; 217 | } 218 | 219 | #[cfg(test)] 220 | mod tests { 221 | use super::*; 222 | 223 | #[test] 224 | fn printable() { 225 | for ch in "🎉".chars() { 226 | assert!(is_printable(ch)); 227 | } 228 | for ch in "\u{1f389}".chars() { 229 | assert!(is_printable(ch)); 230 | } 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | use crate::scanner::Scanner; 2 | use crate::{ 3 | Encoding, Error, Event, EventData, MappingStyle, Mark, Result, ScalarStyle, SequenceStyle, 4 | TagDirective, TokenData, VersionDirective, 5 | }; 6 | 7 | /// The parser structure. 8 | #[non_exhaustive] 9 | pub struct Parser<'r> { 10 | pub(crate) scanner: Scanner<'r>, 11 | /// The parser states stack. 12 | pub(crate) states: Vec, 13 | /// The current parser state. 14 | pub(crate) state: ParserState, 15 | /// The stack of marks. 16 | pub(crate) marks: Vec, 17 | /// The list of TAG directives. 18 | pub(crate) tag_directives: Vec, 19 | /// The alias data. 20 | pub(crate) aliases: Vec, 21 | } 22 | 23 | impl<'r> Default for Parser<'r> { 24 | fn default() -> Self { 25 | Self::new() 26 | } 27 | } 28 | 29 | /// This structure holds information about a potential simple key. 30 | #[derive(Copy, Clone)] 31 | #[non_exhaustive] 32 | pub struct SimpleKey { 33 | /// Is a simple key possible? 34 | pub possible: bool, 35 | /// Is a simple key required? 36 | pub required: bool, 37 | /// The number of the token. 38 | pub token_number: usize, 39 | /// The position mark. 40 | pub mark: Mark, 41 | } 42 | 43 | /// The states of the parser. 44 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 45 | #[non_exhaustive] 46 | pub enum ParserState { 47 | /// Expect STREAM-START. 48 | #[default] 49 | StreamStart = 0, 50 | /// Expect the beginning of an implicit document. 51 | ImplicitDocumentStart = 1, 52 | /// Expect DOCUMENT-START. 53 | DocumentStart = 2, 54 | /// Expect the content of a document. 55 | DocumentContent = 3, 56 | /// Expect DOCUMENT-END. 57 | DocumentEnd = 4, 58 | /// Expect a block node. 59 | BlockNode = 5, 60 | /// Expect a block node or indentless sequence. 61 | BlockNodeOrIndentlessSequence = 6, 62 | /// Expect a flow node. 63 | FlowNode = 7, 64 | /// Expect the first entry of a block sequence. 65 | BlockSequenceFirstEntry = 8, 66 | /// Expect an entry of a block sequence. 67 | BlockSequenceEntry = 9, 68 | /// Expect an entry of an indentless sequence. 69 | IndentlessSequenceEntry = 10, 70 | /// Expect the first key of a block mapping. 71 | BlockMappingFirstKey = 11, 72 | /// Expect a block mapping key. 73 | BlockMappingKey = 12, 74 | /// Expect a block mapping value. 75 | BlockMappingValue = 13, 76 | /// Expect the first entry of a flow sequence. 77 | FlowSequenceFirstEntry = 14, 78 | /// Expect an entry of a flow sequence. 79 | FlowSequenceEntry = 15, 80 | /// Expect a key of an ordered mapping. 81 | FlowSequenceEntryMappingKey = 16, 82 | /// Expect a value of an ordered mapping. 83 | FlowSequenceEntryMappingValue = 17, 84 | /// Expect the and of an ordered mapping entry. 85 | FlowSequenceEntryMappingEnd = 18, 86 | /// Expect the first key of a flow mapping. 87 | FlowMappingFirstKey = 19, 88 | /// Expect a key of a flow mapping. 89 | FlowMappingKey = 20, 90 | /// Expect a value of a flow mapping. 91 | FlowMappingValue = 21, 92 | /// Expect an empty value of a flow mapping. 93 | FlowMappingEmptyValue = 22, 94 | /// Expect nothing. 95 | End = 23, 96 | } 97 | 98 | /// This structure holds aliases data. 99 | #[non_exhaustive] 100 | pub struct AliasData { 101 | /// The anchor. 102 | pub anchor: String, 103 | /// The node id. 104 | pub index: i32, 105 | /// The anchor mark. 106 | pub mark: Mark, 107 | } 108 | 109 | impl<'r> Iterator for Parser<'r> { 110 | type Item = Result; 111 | 112 | fn next(&mut self) -> Option { 113 | if self.scanner.stream_end_produced || self.state == ParserState::End { 114 | None 115 | } else { 116 | Some(self.parse()) 117 | } 118 | } 119 | } 120 | 121 | impl<'r> core::iter::FusedIterator for Parser<'r> {} 122 | 123 | impl<'r> Parser<'r> { 124 | /// Create a parser. 125 | pub fn new() -> Parser<'r> { 126 | Parser { 127 | scanner: Scanner::new(), 128 | states: Vec::with_capacity(16), 129 | state: ParserState::default(), 130 | marks: Vec::with_capacity(16), 131 | tag_directives: Vec::with_capacity(16), 132 | aliases: Vec::new(), 133 | } 134 | } 135 | 136 | /// Reset the parser state. 137 | pub fn reset(&mut self) { 138 | *self = Self::new(); 139 | } 140 | 141 | /// Set a string input. 142 | pub fn set_input_string(&mut self, input: &'r mut &[u8]) { 143 | self.scanner.set_input_string(input); 144 | } 145 | 146 | /// Set a generic input handler. 147 | pub fn set_input(&mut self, input: &'r mut dyn std::io::BufRead) { 148 | self.scanner.set_input(input); 149 | } 150 | 151 | /// Set the source encoding. 152 | pub fn set_encoding(&mut self, encoding: Encoding) { 153 | self.scanner.set_encoding(encoding); 154 | } 155 | 156 | /// Parse the input stream and produce the next parsing event. 157 | /// 158 | /// Call the function subsequently to produce a sequence of events 159 | /// corresponding to the input stream. The initial event has the type 160 | /// [`EventData::StreamStart`](crate::EventData::StreamStart) while the 161 | /// ending event has the type 162 | /// [`EventData::StreamEnd`](crate::EventData::StreamEnd). 163 | /// 164 | /// An application must not alternate the calls of [`Parser::parse()`] with 165 | /// the calls of [`Document::load()`](crate::Document::load). Doing this 166 | /// will break the parser. 167 | pub fn parse(&mut self) -> Result { 168 | if self.scanner.stream_end_produced || self.state == ParserState::End { 169 | return Ok(Event::stream_end()); 170 | } 171 | self.state_machine() 172 | } 173 | 174 | fn state_machine(&mut self) -> Result { 175 | match self.state { 176 | ParserState::StreamStart => self.parse_stream_start(), 177 | ParserState::ImplicitDocumentStart => self.parse_document_start(true), 178 | ParserState::DocumentStart => self.parse_document_start(false), 179 | ParserState::DocumentContent => self.parse_document_content(), 180 | ParserState::DocumentEnd => self.parse_document_end(), 181 | ParserState::BlockNode => self.parse_node(true, false), 182 | ParserState::BlockNodeOrIndentlessSequence => self.parse_node(true, true), 183 | ParserState::FlowNode => self.parse_node(false, false), 184 | ParserState::BlockSequenceFirstEntry => self.parse_block_sequence_entry(true), 185 | ParserState::BlockSequenceEntry => self.parse_block_sequence_entry(false), 186 | ParserState::IndentlessSequenceEntry => self.parse_indentless_sequence_entry(), 187 | ParserState::BlockMappingFirstKey => self.parse_block_mapping_key(true), 188 | ParserState::BlockMappingKey => self.parse_block_mapping_key(false), 189 | ParserState::BlockMappingValue => self.parse_block_mapping_value(), 190 | ParserState::FlowSequenceFirstEntry => self.parse_flow_sequence_entry(true), 191 | ParserState::FlowSequenceEntry => self.parse_flow_sequence_entry(false), 192 | ParserState::FlowSequenceEntryMappingKey => { 193 | self.parse_flow_sequence_entry_mapping_key() 194 | } 195 | ParserState::FlowSequenceEntryMappingValue => { 196 | self.parse_flow_sequence_entry_mapping_value() 197 | } 198 | ParserState::FlowSequenceEntryMappingEnd => { 199 | self.parse_flow_sequence_entry_mapping_end() 200 | } 201 | ParserState::FlowMappingFirstKey => self.parse_flow_mapping_key(true), 202 | ParserState::FlowMappingKey => self.parse_flow_mapping_key(false), 203 | ParserState::FlowMappingValue => self.parse_flow_mapping_value(false), 204 | ParserState::FlowMappingEmptyValue => self.parse_flow_mapping_value(true), 205 | ParserState::End => panic!("parser end state reached unexpectedly"), 206 | } 207 | } 208 | 209 | fn parse_stream_start(&mut self) -> Result { 210 | let token = self.scanner.peek()?; 211 | 212 | if let TokenData::StreamStart { encoding } = &token.data { 213 | let event = Event { 214 | data: EventData::StreamStart { 215 | encoding: *encoding, 216 | }, 217 | start_mark: token.start_mark, 218 | end_mark: token.end_mark, 219 | }; 220 | self.state = ParserState::ImplicitDocumentStart; 221 | self.scanner.skip_token(); 222 | Ok(event) 223 | } else { 224 | let mark = token.start_mark; 225 | Err(Error::parser( 226 | "", 227 | Mark::default(), 228 | "did not find expected ", 229 | mark, 230 | )) 231 | } 232 | } 233 | 234 | fn parse_document_start(&mut self, implicit: bool) -> Result { 235 | let mut version_directive: Option = None; 236 | 237 | let mut tag_directives = vec![]; 238 | let mut token = self.scanner.peek()?; 239 | if !implicit { 240 | while let TokenData::DocumentEnd = &token.data { 241 | self.scanner.skip_token(); 242 | token = self.scanner.peek()?; 243 | } 244 | } 245 | if implicit 246 | && !matches!( 247 | token.data, 248 | TokenData::VersionDirective { .. } 249 | | TokenData::TagDirective { .. } 250 | | TokenData::DocumentStart 251 | | TokenData::StreamEnd 252 | ) 253 | { 254 | let event = Event { 255 | data: EventData::DocumentStart { 256 | version_directive: None, 257 | tag_directives: vec![], 258 | implicit: true, 259 | }, 260 | start_mark: token.start_mark, 261 | end_mark: token.end_mark, 262 | }; 263 | self.process_directives(None, None)?; 264 | self.states.push(ParserState::DocumentEnd); 265 | self.state = ParserState::BlockNode; 266 | Ok(event) 267 | } else if !matches!(token.data, TokenData::StreamEnd) { 268 | let end_mark: Mark; 269 | let start_mark: Mark = token.start_mark; 270 | self.process_directives(Some(&mut version_directive), Some(&mut tag_directives))?; 271 | token = self.scanner.peek()?; 272 | if let TokenData::DocumentStart = token.data { 273 | end_mark = token.end_mark; 274 | let event = Event { 275 | data: EventData::DocumentStart { 276 | version_directive, 277 | tag_directives: core::mem::take(&mut tag_directives), 278 | implicit: false, 279 | }, 280 | start_mark, 281 | end_mark, 282 | }; 283 | self.states.push(ParserState::DocumentEnd); 284 | self.state = ParserState::DocumentContent; 285 | self.scanner.skip_token(); 286 | Ok(event) 287 | } else { 288 | Err(Error::parser( 289 | "", 290 | Mark::default(), 291 | "did not find expected ", 292 | token.start_mark, 293 | )) 294 | } 295 | } else { 296 | let event = Event { 297 | data: EventData::StreamEnd, 298 | start_mark: token.start_mark, 299 | end_mark: token.end_mark, 300 | }; 301 | self.state = ParserState::End; 302 | self.scanner.skip_token(); 303 | Ok(event) 304 | } 305 | } 306 | 307 | fn parse_document_content(&mut self) -> Result { 308 | let token = self.scanner.peek()?; 309 | if let TokenData::VersionDirective { .. } 310 | | TokenData::TagDirective { .. } 311 | | TokenData::DocumentStart 312 | | TokenData::DocumentEnd 313 | | TokenData::StreamEnd = &token.data 314 | { 315 | let mark = token.start_mark; 316 | self.state = self.states.pop().unwrap(); 317 | Self::process_empty_scalar(mark) 318 | } else { 319 | self.parse_node(true, false) 320 | } 321 | } 322 | 323 | fn parse_document_end(&mut self) -> Result { 324 | let mut end_mark: Mark; 325 | let mut implicit = true; 326 | let token = self.scanner.peek()?; 327 | end_mark = token.start_mark; 328 | let start_mark: Mark = end_mark; 329 | if let TokenData::DocumentEnd = &token.data { 330 | end_mark = token.end_mark; 331 | self.scanner.skip_token(); 332 | implicit = false; 333 | } 334 | self.tag_directives.clear(); 335 | self.state = ParserState::DocumentStart; 336 | Ok(Event { 337 | data: EventData::DocumentEnd { implicit }, 338 | start_mark, 339 | end_mark, 340 | }) 341 | } 342 | 343 | fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> Result { 344 | let mut anchor: Option = None; 345 | let mut tag_handle: Option = None; 346 | let mut tag_suffix: Option = None; 347 | let mut tag: Option = None; 348 | let mut start_mark: Mark; 349 | let mut end_mark: Mark; 350 | let mut tag_mark = Mark { 351 | index: 0, 352 | line: 0, 353 | column: 0, 354 | }; 355 | 356 | let mut token = self.scanner.peek_mut()?; 357 | 358 | if let TokenData::Alias { value } = &mut token.data { 359 | let event = Event { 360 | data: EventData::Alias { 361 | anchor: core::mem::take(value), 362 | }, 363 | start_mark: token.start_mark, 364 | end_mark: token.end_mark, 365 | }; 366 | self.state = self.states.pop().unwrap(); 367 | self.scanner.skip_token(); 368 | return Ok(event); 369 | } 370 | 371 | end_mark = token.start_mark; 372 | start_mark = end_mark; 373 | if let TokenData::Anchor { value } = &mut token.data { 374 | anchor = Some(core::mem::take(value)); 375 | start_mark = token.start_mark; 376 | end_mark = token.end_mark; 377 | self.scanner.skip_token(); 378 | token = self.scanner.peek_mut()?; 379 | if let TokenData::Tag { handle, suffix } = &mut token.data { 380 | tag_handle = Some(core::mem::take(handle)); 381 | tag_suffix = Some(core::mem::take(suffix)); 382 | tag_mark = token.start_mark; 383 | end_mark = token.end_mark; 384 | self.scanner.skip_token(); 385 | } 386 | } else if let TokenData::Tag { handle, suffix } = &mut token.data { 387 | tag_handle = Some(core::mem::take(handle)); 388 | tag_suffix = Some(core::mem::take(suffix)); 389 | tag_mark = token.start_mark; 390 | start_mark = tag_mark; 391 | end_mark = token.end_mark; 392 | self.scanner.skip_token(); 393 | token = self.scanner.peek_mut()?; 394 | if let TokenData::Anchor { value } = &mut token.data { 395 | anchor = Some(core::mem::take(value)); 396 | end_mark = token.end_mark; 397 | self.scanner.skip_token(); 398 | } 399 | } 400 | 401 | if let Some(ref tag_handle_value) = tag_handle { 402 | if tag_handle_value.is_empty() { 403 | tag = tag_suffix; 404 | } else { 405 | for tag_directive in &self.tag_directives { 406 | if tag_directive.handle == *tag_handle_value { 407 | let suffix = tag_suffix.as_deref().unwrap_or(""); 408 | tag = Some(alloc::format!("{}{}", tag_directive.prefix, suffix)); 409 | break; 410 | } 411 | } 412 | if tag.is_none() { 413 | return Err(Error::parser( 414 | "while parsing a node", 415 | start_mark, 416 | "found undefined tag handle", 417 | tag_mark, 418 | )); 419 | } 420 | } 421 | } 422 | 423 | let token = self.scanner.peek_mut()?; 424 | 425 | let implicit = tag.is_none() || tag.as_deref() == Some(""); 426 | 427 | if indentless_sequence && matches!(token.data, TokenData::BlockEntry) { 428 | end_mark = token.end_mark; 429 | self.state = ParserState::IndentlessSequenceEntry; 430 | let event = Event { 431 | data: EventData::SequenceStart { 432 | anchor, 433 | tag, 434 | implicit, 435 | style: SequenceStyle::Block, 436 | }, 437 | start_mark, 438 | end_mark, 439 | }; 440 | Ok(event) 441 | } else if let TokenData::Scalar { value, style } = &mut token.data { 442 | let mut plain_implicit = false; 443 | let mut quoted_implicit = false; 444 | end_mark = token.end_mark; 445 | if *style == ScalarStyle::Plain && tag.is_none() || tag.as_deref() == Some("!") { 446 | plain_implicit = true; 447 | } else if tag.is_none() { 448 | quoted_implicit = true; 449 | } 450 | let event = Event { 451 | data: EventData::Scalar { 452 | anchor, 453 | tag, 454 | value: core::mem::take(value), 455 | plain_implicit, 456 | quoted_implicit, 457 | style: *style, 458 | }, 459 | start_mark, 460 | end_mark, 461 | }; 462 | self.state = self.states.pop().unwrap(); 463 | self.scanner.skip_token(); 464 | return Ok(event); 465 | } else if let TokenData::FlowSequenceStart = &token.data { 466 | end_mark = token.end_mark; 467 | self.state = ParserState::FlowSequenceFirstEntry; 468 | let event = Event { 469 | data: EventData::SequenceStart { 470 | anchor, 471 | tag, 472 | implicit, 473 | style: SequenceStyle::Flow, 474 | }, 475 | start_mark, 476 | end_mark, 477 | }; 478 | return Ok(event); 479 | } else if let TokenData::FlowMappingStart = &token.data { 480 | end_mark = token.end_mark; 481 | self.state = ParserState::FlowMappingFirstKey; 482 | let event = Event { 483 | data: EventData::MappingStart { 484 | anchor, 485 | tag, 486 | implicit, 487 | style: MappingStyle::Flow, 488 | }, 489 | start_mark, 490 | end_mark, 491 | }; 492 | return Ok(event); 493 | } else if block && matches!(token.data, TokenData::BlockSequenceStart) { 494 | end_mark = token.end_mark; 495 | self.state = ParserState::BlockSequenceFirstEntry; 496 | let event = Event { 497 | data: EventData::SequenceStart { 498 | anchor, 499 | tag, 500 | implicit, 501 | style: SequenceStyle::Block, 502 | }, 503 | start_mark, 504 | end_mark, 505 | }; 506 | return Ok(event); 507 | } else if block && matches!(token.data, TokenData::BlockMappingStart) { 508 | end_mark = token.end_mark; 509 | self.state = ParserState::BlockMappingFirstKey; 510 | let event = Event { 511 | data: EventData::MappingStart { 512 | anchor, 513 | tag, 514 | implicit, 515 | style: MappingStyle::Block, 516 | }, 517 | start_mark, 518 | end_mark, 519 | }; 520 | return Ok(event); 521 | } else if anchor.is_some() || tag.is_some() { 522 | self.state = self.states.pop().unwrap(); 523 | let event = Event { 524 | data: EventData::Scalar { 525 | anchor, 526 | tag, 527 | value: String::new(), 528 | plain_implicit: implicit, 529 | quoted_implicit: false, 530 | style: ScalarStyle::Plain, 531 | }, 532 | start_mark, 533 | end_mark, 534 | }; 535 | return Ok(event); 536 | } else { 537 | return Err(Error::parser( 538 | if block { 539 | "while parsing a block node" 540 | } else { 541 | "while parsing a flow node" 542 | }, 543 | start_mark, 544 | "did not find expected node content", 545 | token.start_mark, 546 | )); 547 | } 548 | } 549 | 550 | fn parse_block_sequence_entry(&mut self, first: bool) -> Result { 551 | if first { 552 | let token = self.scanner.peek()?; 553 | let mark = token.start_mark; 554 | self.marks.push(mark); 555 | self.scanner.skip_token(); 556 | } 557 | 558 | let mut token = self.scanner.peek()?; 559 | 560 | if let TokenData::BlockEntry = &token.data { 561 | let mark: Mark = token.end_mark; 562 | self.scanner.skip_token(); 563 | token = self.scanner.peek()?; 564 | if matches!(token.data, TokenData::BlockEntry | TokenData::BlockEnd) { 565 | self.state = ParserState::BlockSequenceEntry; 566 | Self::process_empty_scalar(mark) 567 | } else { 568 | self.states.push(ParserState::BlockSequenceEntry); 569 | self.parse_node(true, false) 570 | } 571 | } else if let TokenData::BlockEnd = token.data { 572 | let event = Event { 573 | data: EventData::SequenceEnd, 574 | start_mark: token.start_mark, 575 | end_mark: token.end_mark, 576 | }; 577 | self.state = self.states.pop().unwrap(); 578 | let _ = self.marks.pop(); 579 | self.scanner.skip_token(); 580 | Ok(event) 581 | } else { 582 | let token_mark = token.start_mark; 583 | let mark = self.marks.pop().unwrap(); 584 | return Err(Error::parser( 585 | "while parsing a block collection", 586 | mark, 587 | "did not find expected '-' indicator", 588 | token_mark, 589 | )); 590 | } 591 | } 592 | 593 | fn parse_indentless_sequence_entry(&mut self) -> Result { 594 | let mut token = self.scanner.peek()?; 595 | if let TokenData::BlockEntry = token.data { 596 | let mark: Mark = token.end_mark; 597 | self.scanner.skip_token(); 598 | token = self.scanner.peek()?; 599 | 600 | if matches!( 601 | token.data, 602 | TokenData::BlockEntry | TokenData::Key | TokenData::Value | TokenData::BlockEnd 603 | ) { 604 | self.state = ParserState::IndentlessSequenceEntry; 605 | Self::process_empty_scalar(mark) 606 | } else { 607 | self.states.push(ParserState::IndentlessSequenceEntry); 608 | self.parse_node(true, false) 609 | } 610 | } else { 611 | let event = Event { 612 | data: EventData::SequenceEnd, 613 | start_mark: token.start_mark, 614 | end_mark: token.end_mark, 615 | }; 616 | self.state = self.states.pop().unwrap(); 617 | Ok(event) 618 | } 619 | } 620 | 621 | fn parse_block_mapping_key(&mut self, first: bool) -> Result { 622 | if first { 623 | let token = self.scanner.peek()?; 624 | let mark = token.start_mark; 625 | self.marks.push(mark); 626 | self.scanner.skip_token(); 627 | } 628 | 629 | let mut token = self.scanner.peek()?; 630 | if let TokenData::Key = token.data { 631 | let mark: Mark = token.end_mark; 632 | self.scanner.skip_token(); 633 | token = self.scanner.peek()?; 634 | if matches!( 635 | token.data, 636 | TokenData::Key | TokenData::Value | TokenData::BlockEnd 637 | ) { 638 | self.state = ParserState::BlockMappingValue; 639 | Self::process_empty_scalar(mark) 640 | } else { 641 | self.states.push(ParserState::BlockMappingValue); 642 | self.parse_node(true, true) 643 | } 644 | } else if let TokenData::BlockEnd = token.data { 645 | let event = Event { 646 | data: EventData::MappingEnd, 647 | start_mark: token.start_mark, 648 | end_mark: token.end_mark, 649 | }; 650 | self.state = self.states.pop().unwrap(); 651 | _ = self.marks.pop(); 652 | self.scanner.skip_token(); 653 | Ok(event) 654 | } else { 655 | let token_mark = token.start_mark; 656 | let mark = self.marks.pop().unwrap(); 657 | Err(Error::parser( 658 | "while parsing a block mapping", 659 | mark, 660 | "did not find expected key", 661 | token_mark, 662 | )) 663 | } 664 | } 665 | 666 | fn parse_block_mapping_value(&mut self) -> Result { 667 | let mut token = self.scanner.peek()?; 668 | if let TokenData::Value = token.data { 669 | let mark: Mark = token.end_mark; 670 | self.scanner.skip_token(); 671 | token = self.scanner.peek()?; 672 | if matches!( 673 | token.data, 674 | TokenData::Key | TokenData::Value | TokenData::BlockEnd 675 | ) { 676 | self.state = ParserState::BlockMappingKey; 677 | Self::process_empty_scalar(mark) 678 | } else { 679 | self.states.push(ParserState::BlockMappingKey); 680 | self.parse_node(true, true) 681 | } 682 | } else { 683 | let mark = token.start_mark; 684 | self.state = ParserState::BlockMappingKey; 685 | Self::process_empty_scalar(mark) 686 | } 687 | } 688 | 689 | fn parse_flow_sequence_entry(&mut self, first: bool) -> Result { 690 | if first { 691 | let token = self.scanner.peek()?; 692 | let mark = token.start_mark; 693 | self.marks.push(mark); 694 | self.scanner.skip_token(); 695 | } 696 | 697 | let mut token = self.scanner.peek()?; 698 | if !matches!(token.data, TokenData::FlowSequenceEnd) { 699 | if !first { 700 | if let TokenData::FlowEntry = token.data { 701 | self.scanner.skip_token(); 702 | token = self.scanner.peek()?; 703 | } else { 704 | let token_mark = token.start_mark; 705 | let mark = self.marks.pop().unwrap(); 706 | return Err(Error::parser( 707 | "while parsing a flow sequence", 708 | mark, 709 | "did not find expected ',' or ']'", 710 | token_mark, 711 | )); 712 | } 713 | } 714 | if let TokenData::Key = token.data { 715 | let event = Event { 716 | data: EventData::MappingStart { 717 | anchor: None, 718 | tag: None, 719 | implicit: true, 720 | style: MappingStyle::Flow, 721 | }, 722 | start_mark: token.start_mark, 723 | end_mark: token.end_mark, 724 | }; 725 | self.state = ParserState::FlowSequenceEntryMappingKey; 726 | self.scanner.skip_token(); 727 | return Ok(event); 728 | } else if !matches!(token.data, TokenData::FlowSequenceEnd) { 729 | self.states.push(ParserState::FlowSequenceEntry); 730 | return self.parse_node(false, false); 731 | } 732 | } 733 | let event = Event { 734 | data: EventData::SequenceEnd, 735 | start_mark: token.start_mark, 736 | end_mark: token.end_mark, 737 | }; 738 | self.state = self.states.pop().unwrap(); 739 | _ = self.marks.pop(); 740 | self.scanner.skip_token(); 741 | Ok(event) 742 | } 743 | 744 | fn parse_flow_sequence_entry_mapping_key(&mut self) -> Result { 745 | let token = self.scanner.peek()?; 746 | if matches!( 747 | token.data, 748 | TokenData::Value | TokenData::FlowEntry | TokenData::FlowSequenceEnd 749 | ) { 750 | let mark: Mark = token.end_mark; 751 | self.scanner.skip_token(); 752 | self.state = ParserState::FlowSequenceEntryMappingValue; 753 | Self::process_empty_scalar(mark) 754 | } else { 755 | self.states.push(ParserState::FlowSequenceEntryMappingValue); 756 | self.parse_node(false, false) 757 | } 758 | } 759 | 760 | fn parse_flow_sequence_entry_mapping_value(&mut self) -> Result { 761 | let mut token = self.scanner.peek()?; 762 | if let TokenData::Value = token.data { 763 | self.scanner.skip_token(); 764 | token = self.scanner.peek()?; 765 | if !matches!( 766 | token.data, 767 | TokenData::FlowEntry | TokenData::FlowSequenceEnd 768 | ) { 769 | self.states.push(ParserState::FlowSequenceEntryMappingEnd); 770 | return self.parse_node(false, false); 771 | } 772 | } 773 | let mark = token.start_mark; 774 | self.state = ParserState::FlowSequenceEntryMappingEnd; 775 | Self::process_empty_scalar(mark) 776 | } 777 | 778 | fn parse_flow_sequence_entry_mapping_end(&mut self) -> Result { 779 | let token = self.scanner.peek()?; 780 | let start_mark = token.start_mark; 781 | let end_mark = token.end_mark; 782 | self.state = ParserState::FlowSequenceEntry; 783 | Ok(Event { 784 | data: EventData::MappingEnd, 785 | start_mark, 786 | end_mark, 787 | }) 788 | } 789 | 790 | fn parse_flow_mapping_key(&mut self, first: bool) -> Result { 791 | if first { 792 | let token = self.scanner.peek()?; 793 | let mark = token.start_mark; 794 | self.marks.push(mark); 795 | self.scanner.skip_token(); 796 | } 797 | 798 | let mut token = self.scanner.peek()?; 799 | if !matches!(token.data, TokenData::FlowMappingEnd) { 800 | if !first { 801 | if let TokenData::FlowEntry = token.data { 802 | self.scanner.skip_token(); 803 | token = self.scanner.peek()?; 804 | } else { 805 | let token_mark = token.start_mark; 806 | let mark = self.marks.pop().unwrap(); 807 | return Err(Error::parser( 808 | "while parsing a flow mapping", 809 | mark, 810 | "did not find expected ',' or '}'", 811 | token_mark, 812 | )); 813 | } 814 | } 815 | if let TokenData::Key = token.data { 816 | self.scanner.skip_token(); 817 | token = self.scanner.peek()?; 818 | if !matches!( 819 | token.data, 820 | TokenData::Value | TokenData::FlowEntry | TokenData::FlowMappingEnd 821 | ) { 822 | self.states.push(ParserState::FlowMappingValue); 823 | return self.parse_node(false, false); 824 | } 825 | let mark = token.start_mark; 826 | self.state = ParserState::FlowMappingValue; 827 | return Self::process_empty_scalar(mark); 828 | } else if !matches!(token.data, TokenData::FlowMappingEnd) { 829 | self.states.push(ParserState::FlowMappingEmptyValue); 830 | return self.parse_node(false, false); 831 | } 832 | } 833 | let event = Event { 834 | data: EventData::MappingEnd, 835 | start_mark: token.start_mark, 836 | end_mark: token.end_mark, 837 | }; 838 | self.state = self.states.pop().unwrap(); 839 | _ = self.marks.pop(); 840 | self.scanner.skip_token(); 841 | Ok(event) 842 | } 843 | 844 | fn parse_flow_mapping_value(&mut self, empty: bool) -> Result { 845 | let mut token = self.scanner.peek()?; 846 | if empty { 847 | let mark = token.start_mark; 848 | self.state = ParserState::FlowMappingKey; 849 | return Self::process_empty_scalar(mark); 850 | } 851 | if let TokenData::Value = token.data { 852 | self.scanner.skip_token(); 853 | token = self.scanner.peek()?; 854 | if !matches!(token.data, TokenData::FlowEntry | TokenData::FlowMappingEnd) { 855 | self.states.push(ParserState::FlowMappingKey); 856 | return self.parse_node(false, false); 857 | } 858 | } 859 | let mark = token.start_mark; 860 | self.state = ParserState::FlowMappingKey; 861 | Self::process_empty_scalar(mark) 862 | } 863 | 864 | fn process_empty_scalar(mark: Mark) -> Result { 865 | Ok(Event { 866 | data: EventData::Scalar { 867 | anchor: None, 868 | tag: None, 869 | value: String::new(), 870 | plain_implicit: true, 871 | quoted_implicit: false, 872 | style: ScalarStyle::Plain, 873 | }, 874 | start_mark: mark, 875 | end_mark: mark, 876 | }) 877 | } 878 | 879 | fn process_directives( 880 | &mut self, 881 | version_directive_ref: Option<&mut Option>, 882 | tag_directives_ref: Option<&mut Vec>, 883 | ) -> Result<()> { 884 | let default_tag_directives: [TagDirective; 2] = [ 885 | // TODO: Get rid of these heap allocations. 886 | TagDirective { 887 | handle: String::from("!"), 888 | prefix: String::from("!"), 889 | }, 890 | TagDirective { 891 | handle: String::from("!!"), 892 | prefix: String::from("tag:yaml.org,2002:"), 893 | }, 894 | ]; 895 | let mut version_directive: Option = None; 896 | 897 | let mut tag_directives = Vec::with_capacity(16); 898 | 899 | let mut token = self.scanner.peek_mut()?; 900 | 901 | loop { 902 | if !matches!( 903 | token.data, 904 | TokenData::VersionDirective { .. } | TokenData::TagDirective { .. } 905 | ) { 906 | break; 907 | } 908 | 909 | if let TokenData::VersionDirective { major, minor } = &token.data { 910 | let mark = token.start_mark; 911 | if version_directive.is_some() { 912 | return Err(Error::parser( 913 | "", 914 | Mark::default(), 915 | "found duplicate %YAML directive", 916 | mark, 917 | )); 918 | } else if *major != 1 || *minor != 1 && *minor != 2 { 919 | return Err(Error::parser( 920 | "", 921 | Mark::default(), 922 | "found incompatible YAML document", 923 | mark, 924 | )); 925 | } 926 | version_directive = Some(VersionDirective { 927 | major: *major, 928 | minor: *minor, 929 | }); 930 | } else if let TokenData::TagDirective { handle, prefix } = &mut token.data { 931 | let value = TagDirective { 932 | handle: core::mem::take(handle), 933 | prefix: core::mem::take(prefix), 934 | }; 935 | let mark = token.start_mark; 936 | self.append_tag_directive(value.clone(), false, mark)?; 937 | 938 | tag_directives.push(value); 939 | } 940 | 941 | self.scanner.skip_token(); 942 | token = self.scanner.peek_mut()?; 943 | } 944 | 945 | let start_mark = token.start_mark; 946 | for default_tag_directive in default_tag_directives { 947 | self.append_tag_directive(default_tag_directive, true, start_mark)?; 948 | } 949 | 950 | if let Some(version_directive_ref) = version_directive_ref { 951 | *version_directive_ref = version_directive; 952 | } 953 | if let Some(tag_directives_ref) = tag_directives_ref { 954 | if tag_directives.is_empty() { 955 | tag_directives_ref.clear(); 956 | tag_directives.clear(); 957 | } else { 958 | *tag_directives_ref = tag_directives; 959 | } 960 | } else { 961 | tag_directives.clear(); 962 | } 963 | 964 | Ok(()) 965 | } 966 | 967 | fn append_tag_directive( 968 | &mut self, 969 | value: TagDirective, 970 | allow_duplicates: bool, 971 | mark: Mark, 972 | ) -> Result<()> { 973 | for tag_directive in &self.tag_directives { 974 | if value.handle == tag_directive.handle { 975 | if allow_duplicates { 976 | return Ok(()); 977 | } 978 | return Err(Error::parser( 979 | "", 980 | Mark::default(), 981 | "found duplicate %TAG directive", 982 | mark, 983 | )); 984 | } 985 | } 986 | self.tag_directives.push(value); 987 | Ok(()) 988 | } 989 | 990 | pub(crate) fn delete_aliases(&mut self) { 991 | self.aliases.clear(); 992 | } 993 | } 994 | -------------------------------------------------------------------------------- /src/reader.rs: -------------------------------------------------------------------------------- 1 | use std::io::BufRead; 2 | 3 | use alloc::collections::VecDeque; 4 | 5 | use crate::{scanner::Scanner, Encoding, Error, Result}; 6 | 7 | const BOM_UTF8: [u8; 3] = [0xef, 0xbb, 0xbf]; 8 | const BOM_UTF16LE: [u8; 2] = [0xff, 0xfe]; 9 | const BOM_UTF16BE: [u8; 2] = [0xfe, 0xff]; 10 | 11 | fn yaml_parser_determine_encoding(reader: &mut dyn BufRead) -> Result> { 12 | let initial_bytes = reader.fill_buf()?; 13 | if initial_bytes.is_empty() { 14 | return Ok(None); 15 | } 16 | 17 | match initial_bytes[0] { 18 | 0xef => { 19 | let mut bom = [0; 3]; 20 | reader.read_exact(&mut bom)?; 21 | if bom == BOM_UTF8 { 22 | Ok(Some(Encoding::Utf8)) 23 | } else { 24 | Err(Error::reader( 25 | "invalid byte order marker", 26 | 0, 27 | i32::from_be_bytes([bom[0], bom[1], bom[2], 0]), 28 | )) 29 | } 30 | } 31 | 0xff | 0xfe => { 32 | let mut bom = [0; 2]; 33 | reader.read_exact(&mut bom)?; 34 | if bom == BOM_UTF16LE { 35 | Ok(Some(Encoding::Utf16Le)) 36 | } else if bom == BOM_UTF16BE { 37 | Ok(Some(Encoding::Utf16Be)) 38 | } else { 39 | Err(Error::reader( 40 | "invalid byte order marker", 41 | 0, 42 | i32::from_le_bytes([bom[0], bom[1], 0, 0]), 43 | )) 44 | } 45 | } 46 | _ => Ok(Some(Encoding::Utf8)), 47 | } 48 | } 49 | 50 | // Allowing unsafe code because it is the only efficient way to partially decode 51 | // a string slice from a stream of UTF-8 bytes. 52 | #[allow(unsafe_code)] 53 | fn read_utf8_buffered( 54 | reader: &mut dyn BufRead, 55 | out: &mut VecDeque, 56 | offset: &mut usize, 57 | ) -> Result { 58 | let available = loop { 59 | match reader.fill_buf() { 60 | Ok([]) => return Ok(false), 61 | Ok(available) => break available, 62 | Err(err) if err.kind() == std::io::ErrorKind::Interrupted => continue, 63 | Err(err) => return Err(err.into()), 64 | } 65 | }; 66 | 67 | match core::str::from_utf8(available) { 68 | Ok(valid) => { 69 | let used = valid.len(); 70 | // The entire contents of the input buffer was valid UTF-8. 71 | for ch in valid.chars() { 72 | push_char(out, ch, *offset)?; 73 | *offset += ch.len_utf8(); 74 | } 75 | reader.consume(used); 76 | Ok(true) 77 | } 78 | Err(err) => { 79 | let valid_bytes = err.valid_up_to(); 80 | 81 | // If some of the buffer contents were valid, append that to the 82 | // output. 83 | let valid = unsafe { 84 | // SAFETY: This is safe because of `valid_up_to()`. 85 | core::str::from_utf8_unchecked(&available[..valid_bytes]) 86 | }; 87 | for ch in valid.chars() { 88 | push_char(out, ch, *offset)?; 89 | *offset += ch.len_utf8(); 90 | } 91 | 92 | match err.error_len() { 93 | Some(_invalid_len) => Err(Error::reader( 94 | "invalid UTF-8", 95 | *offset, 96 | available[valid_bytes] as _, 97 | )), 98 | None => { 99 | if valid_bytes != 0 { 100 | // Some valid UTF-8 characters were present, and the 101 | // tail end of the buffer was an incomplete sequence. 102 | // Leave the incomplete sequence in the buffer. 103 | reader.consume(valid_bytes); 104 | Ok(true) 105 | } else { 106 | // The beginning of the buffer was an incomplete UTF-8 107 | // sequence. Read the whole character unbuffered. 108 | // 109 | // This will return `UnexpectedEof` if the sequence 110 | // cannot be completed. Note that `read_exact()` handles 111 | // interrupt automatically. 112 | let initial = available[0]; 113 | read_utf8_char_unbuffered(reader, out, initial, offset)?; 114 | Ok(true) 115 | } 116 | } 117 | } 118 | } 119 | } 120 | } 121 | 122 | fn read_utf8_char_unbuffered( 123 | reader: &mut dyn BufRead, 124 | out: &mut VecDeque, 125 | initial: u8, 126 | offset: &mut usize, 127 | ) -> Result<()> { 128 | let width = utf8_char_width(initial); 129 | let mut buffer = [0; 4]; 130 | reader.read_exact(&mut buffer[..width])?; 131 | if let Ok(valid) = core::str::from_utf8(&buffer[..width]) { 132 | // We read a whole, valid character. 133 | let Some(ch) = valid.chars().next() else { 134 | unreachable!() 135 | }; 136 | push_char(out, ch, *offset)?; 137 | *offset += width; 138 | Ok(()) 139 | } else { 140 | // Since we read the exact character width, the only 141 | // possible error here is invalid Unicode. 142 | Err(Error::reader("invalid UTF-8", *offset, buffer[0] as _)) 143 | } 144 | } 145 | 146 | fn read_utf16_buffered( 147 | reader: &mut dyn BufRead, 148 | out: &mut VecDeque, 149 | offset: &mut usize, 150 | ) -> Result { 151 | let available = loop { 152 | match reader.fill_buf() { 153 | Ok([]) => return Ok(false), 154 | Ok(available) => break available, 155 | Err(err) if err.kind() == std::io::ErrorKind::Interrupted => continue, 156 | Err(err) => return Err(err.into()), 157 | } 158 | }; 159 | 160 | let chunks = available.chunks_exact(2).map(|chunk| { 161 | let [a, b] = chunk else { unreachable!() }; 162 | if BIG_ENDIAN { 163 | u16::from_be_bytes([*a, *b]) 164 | } else { 165 | u16::from_le_bytes([*a, *b]) 166 | } 167 | }); 168 | 169 | let mut used = 0; 170 | for ch in core::char::decode_utf16(chunks) { 171 | match ch { 172 | Ok(ch) => { 173 | push_char(out, ch, *offset)?; 174 | let n = ch.len_utf16(); 175 | *offset += n; 176 | used += n; 177 | } 178 | Err(_) => { 179 | // An unpaired surrogate may either be a corrupt stream, but it 180 | // can also be that the buffer just happens to contain the first 181 | // half of a surrogate pair. Consume all of the valid bytes in 182 | // the buffer first, and then handle the unpaired surrogate in 183 | // the "slow" path (`read_utf16_char_unbuffered`) the next time 184 | // we are called. 185 | break; 186 | } 187 | } 188 | } 189 | 190 | if used != 0 { 191 | reader.consume(used); 192 | *offset += used; 193 | Ok(true) 194 | } else { 195 | debug_assert!(!available.is_empty() && available.len() < 2); 196 | read_utf16_char_unbuffered::(reader, out, offset)?; 197 | Ok(true) 198 | } 199 | } 200 | 201 | fn read_utf16_char_unbuffered( 202 | reader: &mut dyn BufRead, 203 | out: &mut VecDeque, 204 | offset: &mut usize, 205 | ) -> Result<()> { 206 | let mut buffer = [0; 2]; 207 | reader.read_exact(&mut buffer)?; 208 | let first = if BIG_ENDIAN { 209 | u16::from_be_bytes(buffer) 210 | } else { 211 | u16::from_le_bytes(buffer) 212 | }; 213 | 214 | if is_utf16_surrogate(first) { 215 | reader.read_exact(&mut buffer)?; 216 | let second = if BIG_ENDIAN { 217 | u16::from_be_bytes(buffer) 218 | } else { 219 | u16::from_le_bytes(buffer) 220 | }; 221 | 222 | match core::char::decode_utf16([first, second]).next() { 223 | Some(Ok(ch)) => { 224 | push_char(out, ch, *offset)?; 225 | *offset += 4; 226 | Ok(()) 227 | } 228 | Some(Err(err)) => Err(Error::reader( 229 | "invalid UTF-16", 230 | *offset, 231 | err.unpaired_surrogate() as _, 232 | )), 233 | None => unreachable!(), 234 | } 235 | } else { 236 | match core::char::decode_utf16([first]).next() { 237 | Some(Ok(ch)) => { 238 | push_char(out, ch, *offset)?; 239 | *offset += 2; 240 | Ok(()) 241 | } 242 | Some(Err(_)) | None => unreachable!(), 243 | } 244 | } 245 | } 246 | 247 | fn utf8_char_width(initial: u8) -> usize { 248 | if initial & 0x80 == 0 { 249 | 1 250 | } else if initial & 0xE0 == 0xC0 { 251 | 2 252 | } else if initial & 0xF0 == 0xE0 { 253 | 3 254 | } else if initial & 0xF8 == 0xF0 { 255 | 4 256 | } else { 257 | 0 258 | } 259 | } 260 | 261 | fn is_utf16_surrogate(value: u16) -> bool { 262 | matches!(value, 0xD800..=0xDFFF) 263 | } 264 | 265 | fn push_char(out: &mut VecDeque, ch: char, offset: usize) -> Result<()> { 266 | if !(ch == '\x09' 267 | || ch == '\x0A' 268 | || ch == '\x0D' 269 | || ch >= '\x20' && ch <= '\x7E' 270 | || ch == '\u{0085}' 271 | || ch >= '\u{00A0}' && ch <= '\u{D7FF}' 272 | || ch >= '\u{E000}' && ch <= '\u{FFFD}' 273 | || ch >= '\u{10000}' && ch <= '\u{10FFFF}') 274 | { 275 | return Err(Error::reader( 276 | "control characters are not allowed", 277 | offset, 278 | ch as _, 279 | )); 280 | } 281 | out.push_back(ch); 282 | Ok(()) 283 | } 284 | 285 | pub(crate) fn yaml_parser_update_buffer(parser: &mut Scanner, length: usize) -> Result<()> { 286 | let reader = parser.read_handler.as_deref_mut().expect("no read handler"); 287 | if parser.buffer.len() >= length { 288 | return Ok(()); 289 | } 290 | if parser.encoding == Encoding::Any { 291 | if let Some(encoding) = yaml_parser_determine_encoding(reader)? { 292 | parser.encoding = encoding; 293 | } else { 294 | parser.eof = true; 295 | return Ok(()); 296 | } 297 | } 298 | 299 | while parser.buffer.len() < length { 300 | if parser.eof { 301 | return Ok(()); 302 | } 303 | 304 | let not_eof = match parser.encoding { 305 | Encoding::Any => unreachable!(), 306 | Encoding::Utf8 => read_utf8_buffered(reader, &mut parser.buffer, &mut parser.offset)?, 307 | Encoding::Utf16Le => { 308 | read_utf16_buffered::(reader, &mut parser.buffer, &mut parser.offset)? 309 | } 310 | Encoding::Utf16Be => { 311 | read_utf16_buffered::(reader, &mut parser.buffer, &mut parser.offset)? 312 | } 313 | }; 314 | if !not_eof { 315 | parser.eof = true; 316 | return Ok(()); 317 | } 318 | } 319 | 320 | if parser.offset >= (!0_usize).wrapping_div(2_usize) { 321 | return Err(Error::reader("input is too long", parser.offset, -1)); 322 | } 323 | Ok(()) 324 | } 325 | -------------------------------------------------------------------------------- /src/token.rs: -------------------------------------------------------------------------------- 1 | use crate::{Encoding, Mark, ScalarStyle}; 2 | 3 | /// The token structure. 4 | #[derive(Debug, PartialEq)] 5 | #[non_exhaustive] 6 | pub struct Token { 7 | /// The token type. 8 | pub data: TokenData, 9 | /// The beginning of the token. 10 | pub start_mark: Mark, 11 | /// The end of the token. 12 | pub end_mark: Mark, 13 | } 14 | 15 | #[derive(Debug, PartialEq)] 16 | pub enum TokenData { 17 | /// A STREAM-START token. 18 | StreamStart { 19 | /// The stream encoding. 20 | encoding: Encoding, 21 | }, 22 | /// A STREAM-END token. 23 | StreamEnd, 24 | /// A VERSION-DIRECTIVE token. 25 | VersionDirective { 26 | /// The major version number. 27 | major: i32, 28 | /// The minor version number. 29 | minor: i32, 30 | }, 31 | /// A TAG-DIRECTIVE token. 32 | TagDirective { 33 | /// The tag handle. 34 | handle: String, 35 | /// The tag prefix. 36 | prefix: String, 37 | }, 38 | /// A DOCUMENT-START token. 39 | DocumentStart, 40 | /// A DOCUMENT-END token. 41 | DocumentEnd, 42 | /// A BLOCK-SEQUENCE-START token. 43 | BlockSequenceStart, 44 | /// A BLOCK-MAPPING-START token. 45 | BlockMappingStart, 46 | /// A BLOCK-END token. 47 | BlockEnd, 48 | /// A FLOW-SEQUENCE-START token. 49 | FlowSequenceStart, 50 | /// A FLOW-SEQUENCE-END token. 51 | FlowSequenceEnd, 52 | /// A FLOW-MAPPING-START token. 53 | FlowMappingStart, 54 | /// A FLOW-MAPPING-END token. 55 | FlowMappingEnd, 56 | /// A BLOCK-ENTRY token. 57 | BlockEntry, 58 | /// A FLOW-ENTRY token. 59 | FlowEntry, 60 | /// A KEY token. 61 | Key, 62 | /// A VALUE token. 63 | Value, 64 | /// An ALIAS token. 65 | Alias { 66 | /// The alias value. 67 | value: String, 68 | }, 69 | /// An ANCHOR token. 70 | Anchor { 71 | /// The anchor value. 72 | value: String, 73 | }, 74 | /// A TAG token. 75 | Tag { 76 | /// The tag handle. 77 | handle: String, 78 | /// The tag suffix. 79 | suffix: String, 80 | }, 81 | /// A SCALAR token. 82 | Scalar { 83 | /// The scalar value. 84 | value: String, 85 | /// The scalar style. 86 | style: ScalarStyle, 87 | }, 88 | } 89 | -------------------------------------------------------------------------------- /tests/bin/mod.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fs::File; 3 | use std::io::{Read, Write}; 4 | use std::path::Path; 5 | use std::process::{Command, Stdio}; 6 | 7 | pub struct Output { 8 | pub success: bool, 9 | pub stdout: Vec, 10 | pub stderr: Vec, 11 | } 12 | 13 | pub fn run( 14 | compiled: &str, 15 | unsafe_main: unsafe fn( 16 | stdin: &mut dyn Read, 17 | stdout: &mut dyn Write, 18 | ) -> Result<(), Box>, 19 | input: &Path, 20 | ) -> Output { 21 | if cfg!(miri) { 22 | let mut input = File::open(input).unwrap(); 23 | let mut stdout = Vec::new(); 24 | let result = unsafe { unsafe_main(&mut input, &mut stdout) }; 25 | 26 | Output { 27 | success: result.is_ok(), 28 | stdout, 29 | stderr: result 30 | .err() 31 | .as_ref() 32 | .map_or_else(String::new, ToString::to_string) 33 | .into(), 34 | } 35 | } else { 36 | let output = Command::new(compiled) 37 | .arg(input) 38 | .stdin(Stdio::null()) 39 | .output() 40 | .unwrap(); 41 | 42 | Output { 43 | success: output.status.success(), 44 | stdout: output.stdout, 45 | stderr: output.stderr, 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /tests/data/.gitignore: -------------------------------------------------------------------------------- 1 | /yaml-test-suite 2 | -------------------------------------------------------------------------------- /tests/data/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unsafe-libyaml-test-suite" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [lib] 9 | path = "lib.rs" 10 | proc-macro = true 11 | 12 | [dependencies] 13 | proc-macro2 = "1.0" 14 | quote = "1.0" 15 | 16 | [build-dependencies] 17 | anyhow = "1.0" 18 | flate2 = "1.0" 19 | reqwest = { version = "0.11", features = ["blocking"] } 20 | tar = "0.4.16" 21 | -------------------------------------------------------------------------------- /tests/data/build.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::uninlined_format_args)] 2 | 3 | use anyhow::Result; 4 | use flate2::read::GzDecoder; 5 | use std::fs; 6 | use std::path::Path; 7 | use tar::Archive; 8 | 9 | const TAG: &str = "data-2020-02-11"; 10 | 11 | fn main() { 12 | let needs_clone = match fs::read_to_string("yaml-test-suite/COMMIT") { 13 | Err(_) => true, 14 | Ok(contents) => contents.trim() != TAG, 15 | }; 16 | if needs_clone { 17 | download_and_unpack().unwrap(); 18 | } 19 | } 20 | 21 | fn download_and_unpack() -> Result<()> { 22 | let url = format!("https://github.com/yaml/yaml-test-suite/archive/refs/tags/{TAG}.tar.gz"); 23 | let response = reqwest::blocking::get(url)?.error_for_status()?; 24 | let decoder = GzDecoder::new(response); 25 | let mut archive = Archive::new(decoder); 26 | let prefix = format!("yaml-test-suite-{}", TAG); 27 | 28 | let yaml_test_suite = Path::new("yaml-test-suite"); 29 | if yaml_test_suite.exists() { 30 | fs::remove_dir_all(yaml_test_suite)?; 31 | } 32 | 33 | for entry in archive.entries()? { 34 | let mut entry = entry?; 35 | let path = entry.path()?; 36 | if path == Path::new("pax_global_header") { 37 | continue; 38 | } 39 | let relative = path.strip_prefix(&prefix)?; 40 | let out = yaml_test_suite.join(relative); 41 | entry.unpack(&out)?; 42 | } 43 | 44 | fs::write("yaml-test-suite/COMMIT", TAG)?; 45 | Ok(()) 46 | } 47 | -------------------------------------------------------------------------------- /tests/data/lib.rs: -------------------------------------------------------------------------------- 1 | use proc_macro::TokenStream; 2 | use quote::{format_ident, quote}; 3 | use std::collections::{BTreeMap as Map, BTreeSet as Set}; 4 | use std::fs::{self, File}; 5 | use std::io::{BufRead, BufReader}; 6 | use std::path::Path; 7 | 8 | #[proc_macro] 9 | pub fn test_emitter(_input: TokenStream) -> TokenStream { 10 | test("libyaml-emitter", |dir| !dir.join("error").exists()) 11 | } 12 | 13 | #[proc_macro] 14 | pub fn test_parser(_input: TokenStream) -> TokenStream { 15 | test("libyaml-parser", |dir| !dir.join("error").exists()) 16 | } 17 | 18 | #[proc_macro] 19 | pub fn test_parser_error(_input: TokenStream) -> TokenStream { 20 | test("libyaml-parser-error", |dir| dir.join("error").exists()) 21 | } 22 | 23 | fn test(ignorelist: &str, check: fn(&Path) -> bool) -> TokenStream { 24 | let tests_dir = Path::new("tests"); 25 | 26 | let mut ignored_ids = Set::new(); 27 | let ignorelist = tests_dir.join("ignorelist").join(ignorelist); 28 | for line in BufReader::new(File::open(ignorelist).unwrap()).lines() { 29 | let mut line = line.unwrap(); 30 | line.truncate(4); 31 | ignored_ids.insert(line); 32 | } 33 | 34 | let mut ids = Map::new(); 35 | let yaml_test_suite = tests_dir.join("data").join("yaml-test-suite"); 36 | for entry in fs::read_dir(yaml_test_suite).unwrap() { 37 | let entry = entry.unwrap(); 38 | if !entry.file_type().unwrap().is_dir() { 39 | continue; 40 | } 41 | 42 | let path = entry.path(); 43 | let description = path.join("==="); 44 | let slug = if let Ok(description) = fs::read_to_string(description) { 45 | description_to_slug(description) 46 | } else { 47 | continue; 48 | }; 49 | 50 | if !check(&path) { 51 | continue; 52 | } 53 | 54 | let file_name = entry.file_name(); 55 | let id = file_name.to_str().unwrap().to_owned(); 56 | ids.insert(id, slug); 57 | } 58 | 59 | let mut tests = proc_macro2::TokenStream::new(); 60 | let ignore = quote!(#[ignore]); 61 | for (id, slug) in ids { 62 | let test_name = format_ident!("_{id}_{slug}"); 63 | let ignore = ignored_ids.contains(&id).then_some(&ignore); 64 | 65 | tests.extend(quote! { 66 | #[test] 67 | #ignore 68 | #[allow(non_snake_case)] 69 | fn #test_name() { 70 | test(#id); 71 | } 72 | }); 73 | } 74 | 75 | TokenStream::from(tests) 76 | } 77 | 78 | fn description_to_slug(mut description: String) -> String { 79 | description = description.replace(|ch: char| !ch.is_ascii_alphanumeric(), "_"); 80 | while description.contains("__") { 81 | description = description.replace("__", "_"); 82 | } 83 | description.trim_matches('_').to_ascii_lowercase() 84 | } 85 | -------------------------------------------------------------------------------- /tests/ignorelist/libyaml-emitter: -------------------------------------------------------------------------------- 1 | 26DV: Whitespace around colon in mappings 2 | 2EBW: Allowed characters in keys 3 | 2JQS: Block Mapping with Missing Keys 4 | 2LFX: Spec Example 6.13. Reserved Directives [1.3] 5 | 2SXE: Anchors With Colon in Name 6 | 2XXW: Spec Example 2.25. Unordered Sets 7 | 3MYT: Plain Scalar looking like key, comment, anchor and tag 8 | 4ABK: Spec Example 7.17. Flow Mapping Separate Values 9 | 4MUZ: Flow mapping colon on line after key 10 | 4QFQ: Spec Example 8.2. Block Indentation Indicator [1.3] 11 | 52DL: Explicit Non-Specific Tag [1.3] 12 | 565N: Construct Binary 13 | 5TYM: Spec Example 6.21. Local Tag Prefix 14 | 5WE3: Spec Example 8.17. Explicit Block Mapping Entries 15 | 6CK3: Spec Example 6.26. Tag Shorthands 16 | 6FWR: Block Scalar Keep 17 | 6KGN: Anchor for empty node 18 | 6M2F: Aliases in Explicit Block Mapping 19 | 6PBE: Zero-indented sequences in explicit mapping keys 20 | 6SLA: Allowed characters in quoted mapping key 21 | 6WLZ: Spec Example 6.18. Primary Tag Handle [1.3] 22 | 6WPF: Spec Example 6.8. Flow Folding [1.3] 23 | 6XDY: Two document start markers 24 | 6ZKB: Spec Example 9.6. Stream 25 | 7T8X: Spec Example 8.10. Folded Lines - 8.13. Final Empty Lines 26 | 7W2P: Block Mapping with Missing Values 27 | 7Z25: Bare document after document end marker 28 | 8KB6: Multiline plain flow mapping key without value 29 | 8XYN: Anchor with unicode character 30 | 9BXH: Multiline doublequoted flow mapping key without value 31 | 8MK2: Explicit Non-Specific Tag 32 | 9DXL: Spec Example 9.6. Stream [1.3] 33 | 9MMW: Spec Example 7.21. Single Pair Implicit Entries [1.3 34 | 9TFX: Spec Example 7.6. Double Quoted Lines [1.3] 35 | B3HG: Spec Example 8.9. Folded Scalar [1.3] 36 | C2DT: Spec Example 7.18. Flow Mapping Adjacent Values 37 | DFF7: Spec Example 7.16. Flow Mapping Entries 38 | E76Z: Aliases in Implicit Block Mapping 39 | EX5H: Multiline Scalar at Top Level [1.3] 40 | EXG3: Three dashes and content without space [1.3] 41 | FBC9: Allowed characters in plain scalars 42 | FH7J: Tags on Empty Scalars 43 | FRK4: Spec Example 7.3. Completely Empty Flow Nodes 44 | J3BT: Spec Example 5.12. Tabs and Spaces 45 | JDH8: Plain Scalar looking like key, comment, anchor and tag [1.3] 46 | JTV5: Block Mapping with Multiline Scalars 47 | K54U: Tab after document header 48 | KK5P: Various combinations of explicit block mappings 49 | KSS4: Scalars on --- line 50 | KZN9: Spec Example 7.21. Single Pair Implicit Entries 51 | LE5A: Spec Example 7.24. Flow Nodes 52 | M7A3: Spec Example 9.3. Bare Documents 53 | M9B4: Spec Example 8.7. Literal Scalar 54 | NAT4: Various empty or newline only quoted strings 55 | NHX8: Empty Lines at End of Document 56 | PUW8: Document start on last line 57 | PW8X: Anchors on Empty Scalars 58 | Q8AD: Spec Example 7.5. Double Quoted Line Breaks [1.3] 59 | S3PD: Spec Example 8.18. Implicit Block Mapping Entries 60 | S4JQ: Spec Example 6.28. Non-Specific Tags 61 | T26H: Spec Example 8.8. Literal Content [1.3] 62 | T4YY: Spec Example 7.9. Single Quoted Lines [1.3] 63 | T5N4: Spec Example 8.7. Literal Scalar [1.3] 64 | UT92: Spec Example 9.4. Explicit Documents 65 | W42U: Spec Example 8.15. Block Sequence Entry Types 66 | W4TN: Spec Example 9.5. Directives Documents 67 | W5VH: Allowed characters in alias 68 | WZ62: Spec Example 7.2. Empty Content 69 | X38W: Aliases in Flow Objects 70 | XLQ9: Multiline scalar that looks like a YAML directive 71 | Y2GN: Anchor with colon in the middle 72 | ZWK4: Key with anchor after missing explicit mapping value 73 | -------------------------------------------------------------------------------- /tests/ignorelist/libyaml-parser: -------------------------------------------------------------------------------- 1 | 2JQS: Block Mapping with Missing Keys 2 | 2LFX: Spec Example 6.13. Reserved Directives [1.3] 3 | 2SXE: Anchors With Colon in Name 4 | 4ABK: Spec Example 7.17. Flow Mapping Separate Values 5 | 4MUZ: Flow mapping colon on line after key 6 | 5MUD: Colon and adjacent value on next line 7 | 6BCT: Spec Example 6.3. Separation Spaces 8 | 6LVF: Spec Example 6.13. Reserved Directives 9 | 6M2F: Aliases in Explicit Block Mapping 10 | 7Z25: Bare document after document end marker 11 | 8XYN: Anchor with unicode character 12 | 9MMW: Spec Example 7.21. Single Pair Implicit Entries [1.3 13 | 9SA2: Multiline double quoted flow mapping key 14 | A2M4: Spec Example 6.2. Indentation Indicators 15 | BEC7: Spec Example 6.14. “YAML” directive 16 | DBG4: Spec Example 7.10. Plain Characters 17 | DK3J: Zero indented block scalar with line that looks like a comment 18 | FP8R: Zero indented block scalar 19 | FRK4: Spec Example 7.3. Completely Empty Flow Nodes 20 | HWV9: Document-end marker 21 | K3WX: Colon and adjacent value after comment on next line 22 | KZN9: Spec Example 7.21. Single Pair Implicit Entries 23 | M7A3: Spec Example 9.3. Bare Documents 24 | NHX8: Empty Lines at End of Document 25 | NJ66: Multiline plain flow mapping key 26 | Q5MG: Tab at beginning of line followed by a flow mapping 27 | QT73: Comment and document-end marker 28 | R4YG: Spec Example 8.2. Block Indentation Indicator 29 | S3PD: Spec Example 8.18. Implicit Block Mapping Entries 30 | UT92: Spec Example 9.4. Explicit Documents 31 | W4TN: Spec Example 9.5. Directives Documents 32 | W5VH: Allowed characters in alias 33 | WZ62: Spec Example 7.2. Empty Content 34 | Y2GN: Anchor with colon in the middle 35 | -------------------------------------------------------------------------------- /tests/ignorelist/libyaml-parser-error: -------------------------------------------------------------------------------- 1 | 9C9N: Wrong indented flow sequence 2 | 9HCY: Need document footer before directives 3 | 9JBA: Invalid comment after end of flow sequence 4 | CVW2: Invalid comment after comma 5 | EB22: Missing document-end marker before directive 6 | QB6E: Wrong indented multiline quoted scalar 7 | RHX7: YAML directive without document end marker 8 | S98Z: Block scalar with more spaces than first content line 9 | SU5Z: Comment without whitespace after doublequoted scalar 10 | X4QW: Comment without whitespace after block scalar indicator 11 | -------------------------------------------------------------------------------- /tests/test_emitter.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::type_complexity)] 2 | 3 | mod bin; 4 | #[path = "../src/bin/run-emitter-test-suite.rs"] 5 | #[allow(dead_code)] 6 | mod run_emitter_test_suite; 7 | 8 | use std::fs; 9 | use std::path::Path; 10 | 11 | fn test(id: &str) { 12 | let dir = Path::new("tests") 13 | .join("data") 14 | .join("yaml-test-suite") 15 | .join(id); 16 | 17 | let output = bin::run( 18 | env!("CARGO_BIN_EXE_run-emitter-test-suite"), 19 | run_emitter_test_suite::test_main, 20 | &dir.join("test.event"), 21 | ); 22 | 23 | let stdout = String::from_utf8_lossy(&output.stdout); 24 | let stderr = String::from_utf8_lossy(&output.stderr); 25 | eprint!("{stderr}"); 26 | 27 | let out = if dir.join("out.yaml").exists() { 28 | dir.join("out.yaml") 29 | } else { 30 | dir.join("in.yaml") 31 | }; 32 | let expected = fs::read_to_string(out).unwrap(); 33 | pretty_assertions::assert_str_eq!(expected, stdout); 34 | assert!(output.success); 35 | } 36 | 37 | unsafe_libyaml_test_suite::test_emitter!(); 38 | -------------------------------------------------------------------------------- /tests/test_parser.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::type_complexity)] 2 | 3 | mod bin; 4 | #[path = "../src/bin/run-parser-test-suite.rs"] 5 | #[allow(dead_code)] 6 | mod run_parser_test_suite; 7 | 8 | use std::fs; 9 | use std::path::Path; 10 | 11 | fn test(id: &str) { 12 | let dir = Path::new("tests") 13 | .join("data") 14 | .join("yaml-test-suite") 15 | .join(id); 16 | 17 | let output = bin::run( 18 | env!("CARGO_BIN_EXE_run-parser-test-suite"), 19 | run_parser_test_suite::test_main, 20 | &dir.join("in.yaml"), 21 | ); 22 | 23 | let stdout = String::from_utf8_lossy(&output.stdout); 24 | let stderr = String::from_utf8_lossy(&output.stderr); 25 | eprint!("{stderr}"); 26 | 27 | let expected = fs::read_to_string(dir.join("test.event")).unwrap(); 28 | pretty_assertions::assert_str_eq!(expected, stdout); 29 | assert!(output.success); 30 | } 31 | 32 | unsafe_libyaml_test_suite::test_parser!(); 33 | -------------------------------------------------------------------------------- /tests/test_parser_error.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::type_complexity)] 2 | 3 | mod bin; 4 | #[path = "../src/bin/run-parser-test-suite.rs"] 5 | #[allow(dead_code)] 6 | mod run_parser_test_suite; 7 | 8 | use std::path::Path; 9 | 10 | fn test(id: &str) { 11 | let dir = Path::new("tests") 12 | .join("data") 13 | .join("yaml-test-suite") 14 | .join(id); 15 | 16 | let output = bin::run( 17 | env!("CARGO_BIN_EXE_run-parser-test-suite"), 18 | run_parser_test_suite::test_main, 19 | &dir.join("in.yaml"), 20 | ); 21 | 22 | if output.success { 23 | let stdout = String::from_utf8_lossy(&output.stdout); 24 | let stderr = String::from_utf8_lossy(&output.stderr); 25 | eprint!("{stdout}"); 26 | eprint!("{stderr}"); 27 | panic!("expected parse to fail"); 28 | } 29 | } 30 | 31 | unsafe_libyaml_test_suite::test_parser_error!(); 32 | --------------------------------------------------------------------------------