├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── tests ├── data │ ├── .gitignore │ ├── Cargo.toml │ ├── build.rs │ └── lib.rs ├── ignorelist │ ├── libyaml-parser-error │ ├── libyaml-parser │ └── libyaml-emitter ├── test_parser_error.rs ├── test_parser.rs ├── test_emitter.rs ├── bin │ └── mod.rs ├── test_mark_display.rs └── test_owned_input.rs ├── fuzz ├── .gitignore ├── fuzz_targets │ ├── parse.rs │ ├── load.rs │ └── scan.rs └── Cargo.toml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-MIT ├── src ├── token.rs ├── bin │ ├── run-parser-test-suite.rs │ └── run-emitter-test-suite.rs ├── macros.rs ├── event.rs ├── error.rs ├── reader.rs ├── lib.rs ├── document.rs └── parser.rs ├── README.md └── benches └── bench.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: simonask 2 | -------------------------------------------------------------------------------- /tests/data/.gitignore: -------------------------------------------------------------------------------- 1 | /yaml-test-suite 2 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | artifacts/ 2 | corpus/ 3 | coverage/ 4 | target/ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build.rs 2 | /rust-toolchain 3 | target 4 | Cargo.lock 5 | /.vscode/launch.json 6 | /.vscode/settings.json 7 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/parse.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use libyaml_safer::{EventData, Parser}; 5 | 6 | fuzz_target!(|data: &[u8]| fuzz_target(data)); 7 | 8 | fn fuzz_target(mut data: &[u8]) { 9 | let mut parser = Parser::new(); 10 | parser.set_input(&mut data); 11 | 12 | while let Ok(event) = parser.parse() { 13 | let is_end = matches!(event.data, EventData::StreamEnd); 14 | if is_end { 15 | break; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/load.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use libyaml_safer::{Document, Parser}; 5 | 6 | fuzz_target!(|data: &[u8]| fuzz_target(data)); 7 | 8 | fn fuzz_target(mut data: &[u8]) { 9 | let mut parser = Parser::new(); 10 | parser.set_input(&mut data); 11 | 12 | while let Ok(mut document) = Document::load(&mut parser) { 13 | let done = document.get_root_node().is_none(); 14 | if done { 15 | break; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/scan.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use libyaml_safer::{Scanner, TokenData}; 5 | 6 | fuzz_target!(|data: &[u8]| fuzz_target(data)); 7 | 8 | fn fuzz_target(mut data: &[u8]) { 9 | let mut scanner = Scanner::new(); 10 | scanner.set_input(&mut data); 11 | 12 | while let Ok(token) = Scanner::scan(&mut scanner) { 13 | let is_end = matches!(token.data, TokenData::StreamEnd); 14 | if is_end { 15 | break; 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /tests/ignorelist/libyaml-parser-error: -------------------------------------------------------------------------------- 1 | 9C9N: Wrong indented flow sequence 2 | 9HCY: Need document footer before directives 3 | 9JBA: Invalid comment after end of flow sequence 4 | CVW2: Invalid comment after comma 5 | EB22: Missing document-end marker before directive 6 | QB6E: Wrong indented multiline quoted scalar 7 | RHX7: YAML directive without document end marker 8 | S98Z: Block scalar with more spaces than first content line 9 | SU5Z: Comment without whitespace after doublequoted scalar 10 | X4QW: Comment without whitespace after block scalar indicator 11 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unsafe-libyaml-fuzz" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | libfuzzer-sys = "0.4.7" 13 | libyaml-safer = { path = ".." } 14 | 15 | [[bin]] 16 | name = "scan" 17 | path = "fuzz_targets/scan.rs" 18 | test = false 19 | doc = false 20 | 21 | [[bin]] 22 | name = "parse" 23 | path = "fuzz_targets/parse.rs" 24 | test = false 25 | doc = false 26 | 27 | [[bin]] 28 | name = "load" 29 | path = "fuzz_targets/load.rs" 30 | test = false 31 | doc = false 32 | 33 | [workspace] 34 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.3.0 - 2025-12-22 4 | 5 | ### Breaking changes 6 | 7 | - `Scanner` and `Parser` are now generic over the input stream, instead of using dynamic 8 | dispatch `dyn BufRead`. This allows the input to be owned by the parser using `std::io::Cursor`. 9 | 10 | ### Changes 11 | 12 | - MSRV lowered from 1.70 to 1.64 (@jayvdb). 13 | 14 | ## 0.2.0 - 2025-11-26 15 | 16 | ### Bugfixes 17 | 18 | - Fix handling of CRLF line endings (@dougvalenta). 19 | - Use 1-based mark offsets (@jayvdb). 20 | 21 | ## 0.1.1 - 2024-02-11 22 | 23 | ### Added 24 | 25 | - Implement `PartialEq` and `Debug` for `Event` and `Token`. 26 | 27 | ### Bugfixes 28 | 29 | - Fix a bug where marks would not be correctly set for tokens and events. 30 | -------------------------------------------------------------------------------- /tests/data/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unsafe-libyaml-test-suite" 3 | version = "0.0.0" 4 | authors = ["David Tolnay "] 5 | edition = "2021" 6 | publish = false 7 | 8 | [lib] 9 | path = "lib.rs" 10 | proc-macro = true 11 | 12 | [dependencies] 13 | proc-macro2 = "=1.0.60" 14 | quote = "=1.0.28" 15 | 16 | [build-dependencies] 17 | # Pins are for Rust 1.64.0 compatibility 18 | anyhow = "1.0" 19 | flate2 = "=1.0.26" 20 | minreq = { version = "=2.6.0", features = ["https-native"] } 21 | native-tls = "=0.2.11" 22 | once_cell = "=1.18.0" 23 | openssl = "=0.10.64" 24 | openssl-sys = "=0.9.101" 25 | log = "=0.4.20" 26 | regex = "=1.9.6" 27 | regex-syntax = "=0.7.5" 28 | regex-automata = "=0.3.9" 29 | security-framework-sys = "=2.9.1" 30 | tar = "0.4.16" 31 | -------------------------------------------------------------------------------- /tests/test_parser_error.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::type_complexity)] 2 | 3 | mod bin; 4 | #[path = "../src/bin/run-parser-test-suite.rs"] 5 | #[allow(dead_code)] 6 | mod run_parser_test_suite; 7 | 8 | use std::path::Path; 9 | 10 | fn test(id: &str) { 11 | let dir = Path::new("tests") 12 | .join("data") 13 | .join("yaml-test-suite") 14 | .join(id); 15 | 16 | let output = bin::run( 17 | env!("CARGO_BIN_EXE_run-parser-test-suite"), 18 | run_parser_test_suite::test_main, 19 | &dir.join("in.yaml"), 20 | ); 21 | 22 | if output.success { 23 | let stdout = String::from_utf8_lossy(&output.stdout); 24 | let stderr = String::from_utf8_lossy(&output.stderr); 25 | eprint!("{stdout}"); 26 | eprint!("{stderr}"); 27 | panic!("expected parse to fail"); 28 | } 29 | } 30 | 31 | unsafe_libyaml_test_suite::test_parser_error!(); 32 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libyaml-safer" 3 | version = "0.3.0" 4 | authors = ["Simon Ask Ulsnes , 10 | pub stderr: Vec, 11 | } 12 | 13 | pub fn run( 14 | compiled: &str, 15 | unsafe_main: unsafe fn( 16 | stdin: &mut dyn Read, 17 | stdout: &mut dyn Write, 18 | ) -> Result<(), Box>, 19 | input: &Path, 20 | ) -> Output { 21 | if cfg!(miri) { 22 | let mut input = File::open(input).unwrap(); 23 | let mut stdout = Vec::new(); 24 | let result = unsafe { unsafe_main(&mut input, &mut stdout) }; 25 | 26 | Output { 27 | success: result.is_ok(), 28 | stdout, 29 | stderr: result 30 | .err() 31 | .as_ref() 32 | .map_or_else(String::new, ToString::to_string) 33 | .into(), 34 | } 35 | } else { 36 | let output = Command::new(compiled) 37 | .arg(input) 38 | .stdin(Stdio::null()) 39 | .output() 40 | .unwrap(); 41 | 42 | Output { 43 | success: output.status.success(), 44 | stdout: output.stdout, 45 | stderr: output.stderr, 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /tests/data/build.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::uninlined_format_args)] 2 | 3 | use anyhow::Result; 4 | use flate2::read::GzDecoder; 5 | use std::fs; 6 | use std::path::Path; 7 | use tar::Archive; 8 | 9 | const TAG: &str = "data-2020-02-11"; 10 | 11 | fn main() { 12 | let needs_clone = match fs::read_to_string("yaml-test-suite/COMMIT") { 13 | Err(_) => true, 14 | Ok(contents) => contents.trim() != TAG, 15 | }; 16 | if needs_clone { 17 | download_and_unpack().unwrap(); 18 | } 19 | } 20 | 21 | fn download_and_unpack() -> Result<()> { 22 | let url = format!("https://github.com/yaml/yaml-test-suite/archive/refs/tags/{TAG}.tar.gz"); 23 | let response = minreq::get(&url).send()?; 24 | let bytes = response.as_bytes(); 25 | let decoder = GzDecoder::new(bytes); 26 | let mut archive = Archive::new(decoder); 27 | let prefix = format!("yaml-test-suite-{}", TAG); 28 | 29 | let yaml_test_suite = Path::new("yaml-test-suite"); 30 | if yaml_test_suite.exists() { 31 | fs::remove_dir_all(yaml_test_suite)?; 32 | } 33 | 34 | for entry in archive.entries()? { 35 | let mut entry = entry?; 36 | let path = entry.path()?; 37 | if path == Path::new("pax_global_header") { 38 | continue; 39 | } 40 | let relative = path.strip_prefix(&prefix)?; 41 | let out = yaml_test_suite.join(relative); 42 | entry.unpack(&out)?; 43 | } 44 | 45 | fs::write("yaml-test-suite/COMMIT", TAG)?; 46 | Ok(()) 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_mark_display.rs: -------------------------------------------------------------------------------- 1 | use libyaml_safer::Parser; 2 | 3 | /// Test that errors at the very beginning display as line 1 column 1. 4 | #[test] 5 | fn first_position() { 6 | const INVALID_YAML: &str = "\t"; 7 | 8 | let mut parser = Parser::new(); 9 | let mut input = INVALID_YAML.as_bytes(); 10 | parser.set_input_string(&mut input); 11 | 12 | let result = parser.collect::, _>>(); 13 | 14 | assert!(result.is_err(), "Expected parsing to fail for invalid YAML"); 15 | let err = result.unwrap_err(); 16 | 17 | let mark = err.problem_mark().unwrap(); 18 | let mark_str = mark.to_string(); 19 | eprintln!("Problem mark: {}", mark_str); 20 | 21 | assert_eq!(mark_str, "line 1 column 1"); 22 | } 23 | 24 | /// Test that error messages display 1-based line and column numbers. 25 | /// 26 | /// This YAML has a missing closing quote (from test CQ3W). 27 | #[test] 28 | fn multiline_error() { 29 | const INVALID_YAML: &str = "---\nkey: \"missing closing quote"; 30 | 31 | let mut parser = Parser::new(); 32 | let mut input = INVALID_YAML.as_bytes(); 33 | parser.set_input_string(&mut input); 34 | 35 | let result = parser.collect::, _>>(); 36 | 37 | assert!(result.is_err(), "Expected parsing to fail for invalid YAML"); 38 | let err = result.unwrap_err(); 39 | 40 | // Verify that the mark can be retrieved and displays correctly 41 | let mark = err.problem_mark().unwrap(); 42 | let mark_str = mark.to_string(); 43 | eprintln!("Problem mark: {}", mark_str); 44 | 45 | assert_eq!(mark_str, "line 2 column 28"); 46 | } 47 | -------------------------------------------------------------------------------- /tests/ignorelist/libyaml-parser: -------------------------------------------------------------------------------- 1 | 2JQS: Block Mapping with Missing Keys 2 | 2LFX: Spec Example 6.13. Reserved Directives [1.3] 3 | 2SXE: Anchors With Colon in Name 4 | 4ABK: Spec Example 7.17. Flow Mapping Separate Values 5 | 4MUZ: Flow mapping colon on line after key 6 | 5MUD: Colon and adjacent value on next line 7 | 6BCT: Spec Example 6.3. Separation Spaces 8 | 6LVF: Spec Example 6.13. Reserved Directives 9 | 6M2F: Aliases in Explicit Block Mapping 10 | 7Z25: Bare document after document end marker 11 | 8XYN: Anchor with unicode character 12 | 9MMW: Spec Example 7.21. Single Pair Implicit Entries [1.3 13 | 9SA2: Multiline double quoted flow mapping key 14 | A2M4: Spec Example 6.2. Indentation Indicators 15 | BEC7: Spec Example 6.14. “YAML” directive 16 | DBG4: Spec Example 7.10. Plain Characters 17 | DK3J: Zero indented block scalar with line that looks like a comment 18 | FP8R: Zero indented block scalar 19 | FRK4: Spec Example 7.3. Completely Empty Flow Nodes 20 | HWV9: Document-end marker 21 | K3WX: Colon and adjacent value after comment on next line 22 | KZN9: Spec Example 7.21. Single Pair Implicit Entries 23 | M7A3: Spec Example 9.3. Bare Documents 24 | NHX8: Empty Lines at End of Document 25 | NJ66: Multiline plain flow mapping key 26 | Q5MG: Tab at beginning of line followed by a flow mapping 27 | QT73: Comment and document-end marker 28 | R4YG: Spec Example 8.2. Block Indentation Indicator 29 | S3PD: Spec Example 8.18. Implicit Block Mapping Entries 30 | UT92: Spec Example 9.4. Explicit Documents 31 | W4TN: Spec Example 9.5. Directives Documents 32 | W5VH: Allowed characters in alias 33 | WZ62: Spec Example 7.2. Empty Content 34 | Y2GN: Anchor with colon in the middle 35 | -------------------------------------------------------------------------------- /src/token.rs: -------------------------------------------------------------------------------- 1 | use crate::{Encoding, Mark, ScalarStyle}; 2 | 3 | /// The token structure. 4 | #[derive(Debug, PartialEq)] 5 | #[non_exhaustive] 6 | pub struct Token { 7 | /// The token type. 8 | pub data: TokenData, 9 | /// The beginning of the token. 10 | pub start_mark: Mark, 11 | /// The end of the token. 12 | pub end_mark: Mark, 13 | } 14 | 15 | #[derive(Debug, PartialEq)] 16 | pub enum TokenData { 17 | /// A STREAM-START token. 18 | StreamStart { 19 | /// The stream encoding. 20 | encoding: Encoding, 21 | }, 22 | /// A STREAM-END token. 23 | StreamEnd, 24 | /// A VERSION-DIRECTIVE token. 25 | VersionDirective { 26 | /// The major version number. 27 | major: i32, 28 | /// The minor version number. 29 | minor: i32, 30 | }, 31 | /// A TAG-DIRECTIVE token. 32 | TagDirective { 33 | /// The tag handle. 34 | handle: String, 35 | /// The tag prefix. 36 | prefix: String, 37 | }, 38 | /// A DOCUMENT-START token. 39 | DocumentStart, 40 | /// A DOCUMENT-END token. 41 | DocumentEnd, 42 | /// A BLOCK-SEQUENCE-START token. 43 | BlockSequenceStart, 44 | /// A BLOCK-MAPPING-START token. 45 | BlockMappingStart, 46 | /// A BLOCK-END token. 47 | BlockEnd, 48 | /// A FLOW-SEQUENCE-START token. 49 | FlowSequenceStart, 50 | /// A FLOW-SEQUENCE-END token. 51 | FlowSequenceEnd, 52 | /// A FLOW-MAPPING-START token. 53 | FlowMappingStart, 54 | /// A FLOW-MAPPING-END token. 55 | FlowMappingEnd, 56 | /// A BLOCK-ENTRY token. 57 | BlockEntry, 58 | /// A FLOW-ENTRY token. 59 | FlowEntry, 60 | /// A KEY token. 61 | Key, 62 | /// A VALUE token. 63 | Value, 64 | /// An ALIAS token. 65 | Alias { 66 | /// The alias value. 67 | value: String, 68 | }, 69 | /// An ANCHOR token. 70 | Anchor { 71 | /// The anchor value. 72 | value: String, 73 | }, 74 | /// A TAG token. 75 | Tag { 76 | /// The tag handle. 77 | handle: String, 78 | /// The tag suffix. 79 | suffix: String, 80 | }, 81 | /// A SCALAR token. 82 | Scalar { 83 | /// The scalar value. 84 | value: String, 85 | /// The scalar style. 86 | style: ScalarStyle, 87 | }, 88 | } 89 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | schedule: [cron: "40 1 * * *"] 8 | 9 | permissions: 10 | contents: read 11 | 12 | env: 13 | RUSTFLAGS: -Dwarnings 14 | 15 | jobs: 16 | pre_ci: 17 | uses: dtolnay/.github/.github/workflows/pre_ci.yml@master 18 | 19 | test: 20 | name: Rust ${{matrix.rust}} 21 | needs: pre_ci 22 | if: needs.pre_ci.outputs.continue 23 | runs-on: ubuntu-latest 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | rust: [nightly, beta, stable, 1.75.0, 1.70.0, 1.64.0] 28 | timeout-minutes: 45 29 | steps: 30 | - uses: actions/checkout@v4 31 | - uses: dtolnay/rust-toolchain@master 32 | with: 33 | toolchain: ${{matrix.rust}} 34 | - name: Enable type layout randomization 35 | run: echo RUSTFLAGS=${RUSTFLAGS}\ -Zrandomize-layout >> $GITHUB_ENV 36 | if: matrix.rust == 'nightly' 37 | - name: Disable criterion 38 | run: sed -i '/criterion/d' Cargo.toml 39 | if: startsWith(matrix.rust, '1.') 40 | - run: cargo test 41 | 42 | doc: 43 | name: Documentation 44 | needs: pre_ci 45 | if: needs.pre_ci.outputs.continue 46 | runs-on: ubuntu-latest 47 | timeout-minutes: 45 48 | env: 49 | RUSTDOCFLAGS: -Dwarnings 50 | steps: 51 | - uses: actions/checkout@v4 52 | - uses: dtolnay/rust-toolchain@nightly 53 | - uses: dtolnay/install@cargo-docs-rs 54 | - run: cargo docs-rs 55 | 56 | clippy: 57 | name: Clippy 58 | runs-on: ubuntu-latest 59 | if: github.event_name != 'pull_request' 60 | timeout-minutes: 45 61 | steps: 62 | - uses: actions/checkout@v4 63 | - uses: dtolnay/rust-toolchain@clippy 64 | - run: cargo clippy --tests 65 | 66 | miri: 67 | name: Miri 68 | needs: pre_ci 69 | if: needs.pre_ci.outputs.continue 70 | runs-on: ubuntu-latest 71 | timeout-minutes: 45 72 | steps: 73 | - uses: actions/checkout@v4 74 | - uses: dtolnay/rust-toolchain@miri 75 | - run: cargo miri setup 76 | - run: cargo miri test 77 | env: 78 | MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance 79 | 80 | fuzz: 81 | name: Fuzz 82 | needs: pre_ci 83 | if: needs.pre_ci.outputs.continue 84 | runs-on: ubuntu-latest 85 | timeout-minutes: 45 86 | steps: 87 | - uses: actions/checkout@v4 88 | - uses: dtolnay/rust-toolchain@nightly 89 | - uses: dtolnay/install@cargo-fuzz 90 | - run: cargo fuzz check 91 | -------------------------------------------------------------------------------- /tests/data/lib.rs: -------------------------------------------------------------------------------- 1 | use proc_macro::TokenStream; 2 | use quote::{format_ident, quote}; 3 | use std::collections::{BTreeMap as Map, BTreeSet as Set}; 4 | use std::fs::{self, File}; 5 | use std::io::{BufRead, BufReader}; 6 | use std::path::Path; 7 | 8 | #[proc_macro] 9 | pub fn test_emitter(_input: TokenStream) -> TokenStream { 10 | test("libyaml-emitter", |dir| !dir.join("error").exists()) 11 | } 12 | 13 | #[proc_macro] 14 | pub fn test_parser(_input: TokenStream) -> TokenStream { 15 | test("libyaml-parser", |dir| !dir.join("error").exists()) 16 | } 17 | 18 | #[proc_macro] 19 | pub fn test_parser_error(_input: TokenStream) -> TokenStream { 20 | test("libyaml-parser-error", |dir| dir.join("error").exists()) 21 | } 22 | 23 | fn test(ignorelist: &str, check: fn(&Path) -> bool) -> TokenStream { 24 | let tests_dir = Path::new("tests"); 25 | 26 | let mut ignored_ids = Set::new(); 27 | let ignorelist = tests_dir.join("ignorelist").join(ignorelist); 28 | for line in BufReader::new(File::open(ignorelist).unwrap()).lines() { 29 | let mut line = line.unwrap(); 30 | line.truncate(4); 31 | ignored_ids.insert(line); 32 | } 33 | 34 | let mut ids = Map::new(); 35 | let yaml_test_suite = tests_dir.join("data").join("yaml-test-suite"); 36 | for entry in fs::read_dir(yaml_test_suite).unwrap() { 37 | let entry = entry.unwrap(); 38 | if !entry.file_type().unwrap().is_dir() { 39 | continue; 40 | } 41 | 42 | let path = entry.path(); 43 | let description = path.join("==="); 44 | let slug = if let Ok(description) = fs::read_to_string(description) { 45 | description_to_slug(description) 46 | } else { 47 | continue; 48 | }; 49 | 50 | if !check(&path) { 51 | continue; 52 | } 53 | 54 | let file_name = entry.file_name(); 55 | let id = file_name.to_str().unwrap().to_owned(); 56 | ids.insert(id, slug); 57 | } 58 | 59 | let mut tests = proc_macro2::TokenStream::new(); 60 | let ignore = quote!(#[ignore]); 61 | for (id, slug) in ids { 62 | let test_name = format_ident!("_{id}_{slug}"); 63 | let ignore = ignored_ids.contains(&id).then_some(&ignore); 64 | 65 | tests.extend(quote! { 66 | #[test] 67 | #ignore 68 | #[allow(non_snake_case)] 69 | fn #test_name() { 70 | test(#id); 71 | } 72 | }); 73 | } 74 | 75 | TokenStream::from(tests) 76 | } 77 | 78 | fn description_to_slug(mut description: String) -> String { 79 | description = description.replace(|ch: char| !ch.is_ascii_alphanumeric(), "_"); 80 | while description.contains("__") { 81 | description = description.replace("__", "_"); 82 | } 83 | description.trim_matches('_').to_ascii_lowercase() 84 | } 85 | -------------------------------------------------------------------------------- /tests/ignorelist/libyaml-emitter: -------------------------------------------------------------------------------- 1 | 26DV: Whitespace around colon in mappings 2 | 2EBW: Allowed characters in keys 3 | 2JQS: Block Mapping with Missing Keys 4 | 2LFX: Spec Example 6.13. Reserved Directives [1.3] 5 | 2SXE: Anchors With Colon in Name 6 | 2XXW: Spec Example 2.25. Unordered Sets 7 | 3MYT: Plain Scalar looking like key, comment, anchor and tag 8 | 4ABK: Spec Example 7.17. Flow Mapping Separate Values 9 | 4MUZ: Flow mapping colon on line after key 10 | 4QFQ: Spec Example 8.2. Block Indentation Indicator [1.3] 11 | 52DL: Explicit Non-Specific Tag [1.3] 12 | 565N: Construct Binary 13 | 5TYM: Spec Example 6.21. Local Tag Prefix 14 | 5WE3: Spec Example 8.17. Explicit Block Mapping Entries 15 | 6CK3: Spec Example 6.26. Tag Shorthands 16 | 6FWR: Block Scalar Keep 17 | 6KGN: Anchor for empty node 18 | 6M2F: Aliases in Explicit Block Mapping 19 | 6PBE: Zero-indented sequences in explicit mapping keys 20 | 6SLA: Allowed characters in quoted mapping key 21 | 6WLZ: Spec Example 6.18. Primary Tag Handle [1.3] 22 | 6WPF: Spec Example 6.8. Flow Folding [1.3] 23 | 6XDY: Two document start markers 24 | 6ZKB: Spec Example 9.6. Stream 25 | 7T8X: Spec Example 8.10. Folded Lines - 8.13. Final Empty Lines 26 | 7W2P: Block Mapping with Missing Values 27 | 7Z25: Bare document after document end marker 28 | 8KB6: Multiline plain flow mapping key without value 29 | 8XYN: Anchor with unicode character 30 | 9BXH: Multiline doublequoted flow mapping key without value 31 | 8MK2: Explicit Non-Specific Tag 32 | 9DXL: Spec Example 9.6. Stream [1.3] 33 | 9MMW: Spec Example 7.21. Single Pair Implicit Entries [1.3 34 | 9TFX: Spec Example 7.6. Double Quoted Lines [1.3] 35 | B3HG: Spec Example 8.9. Folded Scalar [1.3] 36 | C2DT: Spec Example 7.18. Flow Mapping Adjacent Values 37 | DFF7: Spec Example 7.16. Flow Mapping Entries 38 | E76Z: Aliases in Implicit Block Mapping 39 | EX5H: Multiline Scalar at Top Level [1.3] 40 | EXG3: Three dashes and content without space [1.3] 41 | FBC9: Allowed characters in plain scalars 42 | FH7J: Tags on Empty Scalars 43 | FRK4: Spec Example 7.3. Completely Empty Flow Nodes 44 | J3BT: Spec Example 5.12. Tabs and Spaces 45 | JDH8: Plain Scalar looking like key, comment, anchor and tag [1.3] 46 | JTV5: Block Mapping with Multiline Scalars 47 | K54U: Tab after document header 48 | KK5P: Various combinations of explicit block mappings 49 | KSS4: Scalars on --- line 50 | KZN9: Spec Example 7.21. Single Pair Implicit Entries 51 | LE5A: Spec Example 7.24. Flow Nodes 52 | M7A3: Spec Example 9.3. Bare Documents 53 | M9B4: Spec Example 8.7. Literal Scalar 54 | NAT4: Various empty or newline only quoted strings 55 | NHX8: Empty Lines at End of Document 56 | PUW8: Document start on last line 57 | PW8X: Anchors on Empty Scalars 58 | Q8AD: Spec Example 7.5. Double Quoted Line Breaks [1.3] 59 | S3PD: Spec Example 8.18. Implicit Block Mapping Entries 60 | S4JQ: Spec Example 6.28. Non-Specific Tags 61 | T26H: Spec Example 8.8. Literal Content [1.3] 62 | T4YY: Spec Example 7.9. Single Quoted Lines [1.3] 63 | T5N4: Spec Example 8.7. Literal Scalar [1.3] 64 | UT92: Spec Example 9.4. Explicit Documents 65 | W42U: Spec Example 8.15. Block Sequence Entry Types 66 | W4TN: Spec Example 9.5. Directives Documents 67 | W5VH: Allowed characters in alias 68 | WZ62: Spec Example 7.2. Empty Content 69 | X38W: Aliases in Flow Objects 70 | XLQ9: Multiline scalar that looks like a YAML directive 71 | Y2GN: Anchor with colon in the middle 72 | ZWK4: Key with anchor after missing explicit mapping value 73 | -------------------------------------------------------------------------------- /tests/test_owned_input.rs: -------------------------------------------------------------------------------- 1 | use std::io::Cursor; 2 | 3 | use libyaml_safer::{Document, EventData, Parser, Scanner, TokenData}; 4 | 5 | #[test] 6 | fn test_scanner_with_owned_input() { 7 | let yaml_string = String::from("key: value"); 8 | let mut scanner = Scanner::new(); 9 | scanner.set_input(Cursor::new(yaml_string.into_bytes())); 10 | 11 | // Get the first token 12 | let token = scanner.next().unwrap().unwrap(); 13 | assert!(matches!(token.data, TokenData::StreamStart { .. })); 14 | } 15 | 16 | #[test] 17 | fn test_parser_with_owned_vec() { 18 | let yaml_data = b"key: value\nlist:\n - item1\n - item2".to_vec(); 19 | let mut parser = Parser::new(); 20 | parser.set_input(Cursor::new(yaml_data)); 21 | 22 | // Parse the first event 23 | let event = parser.parse().unwrap(); 24 | assert!(matches!(event.data, EventData::StreamStart { .. })); 25 | 26 | // Continue parsing to verify it works 27 | let event = parser.parse().unwrap(); 28 | assert!(matches!(event.data, EventData::DocumentStart { .. })); 29 | } 30 | 31 | #[test] 32 | fn test_parser_with_owned_string() { 33 | let yaml_string = String::from("name: test\nvalue: 123"); 34 | let mut parser = Parser::new(); 35 | parser.set_input(Cursor::new(yaml_string)); 36 | 37 | // Parse the first event 38 | let event = parser.parse().unwrap(); 39 | assert!(matches!(event.data, EventData::StreamStart { .. })); 40 | 41 | // Continue parsing to verify it works 42 | let event = parser.parse().unwrap(); 43 | assert!(matches!(event.data, EventData::DocumentStart { .. })); 44 | } 45 | 46 | #[test] 47 | fn test_document_load_with_owned_input() { 48 | let yaml_string = String::from( 49 | r#" 50 | users: 51 | - name: Alice 52 | age: 30 53 | - name: Bob 54 | age: 25 55 | "#, 56 | ); 57 | 58 | let mut parser = Parser::new(); 59 | parser.set_input(Cursor::new(yaml_string.into_bytes())); 60 | 61 | // Load the document 62 | let doc = Document::load(&mut parser).unwrap(); 63 | 64 | // Verify we got a valid document with nodes 65 | assert!(!doc.nodes.is_empty()); 66 | } 67 | 68 | #[test] 69 | fn test_owned_input_no_lifetime_constraint() { 70 | // This test demonstrates that owned input doesn't require 71 | // the caller to maintain the buffer 72 | fn parse_yaml_owned() -> Parser> { 73 | let mut parser = Parser::new(); 74 | let data = String::from("test: value"); 75 | parser.set_input(Cursor::new(data)); 76 | parser 77 | } 78 | 79 | let mut parser = parse_yaml_owned(); 80 | let event = parser.parse().unwrap(); 81 | assert!(matches!(event.data, EventData::StreamStart { .. })); 82 | } 83 | 84 | #[test] 85 | fn test_owned_vs_borrowed_equivalence() { 86 | const YAML: &str = "key: value"; 87 | 88 | // Parse with borrowed input 89 | let mut parser_borrowed = Parser::new(); 90 | let mut borrowed_input = YAML.as_bytes(); 91 | parser_borrowed.set_input_string(&mut borrowed_input); 92 | 93 | let mut events_borrowed = Vec::new(); 94 | for event in &mut parser_borrowed { 95 | events_borrowed.push(format!("{:?}", event.unwrap().data)); 96 | } 97 | 98 | // Parse with owned input 99 | let mut parser_owned = Parser::new(); 100 | parser_owned.set_input(Cursor::new(String::from(YAML))); 101 | 102 | let mut events_owned = Vec::new(); 103 | for event in &mut parser_owned { 104 | events_owned.push(format!("{:?}", event.unwrap().data)); 105 | } 106 | 107 | // Verify both produce the same events 108 | assert_eq!(events_borrowed, events_owned); 109 | } 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | libyaml-safer 2 | ============== 3 | 4 | [github](https://github.com/simonask/libyaml-safer) 5 | [crates.io](https://crates.io/crates/libyaml-safer) 6 | [docs.rs](https://docs.rs/libyaml-safer) 7 | [build status](https://github.com/simonask/libyaml-safer/actions?query=branch%3Amaster) 8 | 9 | This library is a fork of [unsafe-libyaml] translated to safe and idiomatic Rust. 10 | 11 | [unsafe-libyaml] is [libyaml] translated from C to unsafe Rust with the 12 | assistance of [c2rust]. 13 | 14 | [unsafe-libyaml]: https://github.com/dtolnay/unsafe-libyaml 15 | [libyaml]: https://github.com/yaml/libyaml/tree/2c891fc7a770e8ba2fec34fc6b545c672beb37e6 16 | [c2rust]: https://github.com/immunant/c2rust 17 | 18 | ```toml 19 | [dependencies] 20 | libyaml-safer = "0.1" 21 | ``` 22 | 23 | *Compiler support: requires rustc 1.70* 24 | 25 | ## Notes 26 | 27 | This library uses the same test suite as unsafe-libyaml, which is also the 28 | "official" test suite for libyaml. The library was ported line by line, function 29 | by function, from unsafe-libyaml, with the aim of precisely matching its 30 | behavior, including performance and allocation patterns. Any observable 31 | difference in behavior, outside of API differences due to Rust conventions, is 32 | considered a bug. 33 | 34 | One notable exception to the above is that this library uses the Rust standard 35 | library in place of custom routines where possible. For example, most UTF-8 and 36 | UTF-16 encoding and decoding is handled by the standard library, and 37 | input/output callbacks are replaced with the applicable `std::io::*` traits. Due 38 | to the use of `std::io`, this library cannot currently be `no_std`. 39 | 40 | Memory allocation patterns are generally preserved, except that standard library 41 | containers may overallocate buffers using different heuristics. 42 | 43 | In places where libyaml routines are replaced by the standard library, certain 44 | errors may be reported with reduced fidelity compared with libyaml (e.g., error 45 | messages may look slightly different), but the same inputs should generate the 46 | same general errors. 47 | 48 | ### Compatibility and interoperability 49 | 50 | While this library matches the behavior of libyaml, it is not intended as a 51 | drop-in replacement. The shape of the API is idiomatic Rust, and while it is 52 | possible to emulate the C API using this library, supporting this use case is 53 | not a priority. Use `unsafe-libyaml` if that is what you need. 54 | 55 | ### Performance 56 | 57 | Performance is largely on par with `unsafe-libyaml`. No significant effort has 58 | been put into optimizing this library, beyond just choosing the most 59 | straightforward ways to reasonably port concepts from the C-like code. 60 | 61 | See 62 | [`benches/bench.rs`](https://github.com/simonask/libyaml-safer/benches/bench.rs) 63 | for a very simple benchmark dealing with a very large (~700 KiB) YAML document. 64 | On my machine (Ryzen 9 3950X) the parser from this library is slightly slower 65 | and the emitter is slightly faster, but both within about ~1ms of their unsafe 66 | counterparts. Run `cargo bench` to test on your machine. 67 | 68 | If there is demand, there are clear paths forward to optimize the parser. For 69 | example, due to it being ported directly from unsafe C-like code doing pointer 70 | arithmetic, it performs a completely unreasonable number of bounds checks for 71 | each input byte. 72 | 73 | ## License 74 | 75 | MIT license, same as unsafe-libyaml and libyaml. 76 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | use std::mem::MaybeUninit; 2 | 3 | use criterion::{criterion_group, criterion_main, Criterion}; 4 | use libyaml_safer::{Document, Emitter, Parser}; 5 | use unsafe_libyaml::*; 6 | 7 | static VERY_LARGE_YAML: &[u8] = include_bytes!("very_large.yml"); 8 | 9 | pub fn parser(c: &mut Criterion) { 10 | c.bench_function("libyaml-safer parse large", |b| { 11 | // Note: Not using `iter_with_large_drop` because that would be unfair 12 | // to unsafe-libyaml, which needs a call to `yaml_document_delete`. 13 | b.iter(|| { 14 | let mut input = VERY_LARGE_YAML; 15 | let mut parser = Parser::new(); 16 | parser.set_input(&mut input); 17 | Document::load(&mut parser) 18 | }) 19 | }); 20 | 21 | c.bench_function("unsafe-libyaml parse large", |b| { 22 | b.iter(|| unsafe { 23 | let mut parser = MaybeUninit::zeroed(); 24 | if !yaml_parser_initialize(parser.as_mut_ptr()).ok { 25 | panic!("yaml_parser_initialize failed"); 26 | } 27 | let mut parser = parser.assume_init(); 28 | yaml_parser_set_input_string( 29 | &mut parser, 30 | VERY_LARGE_YAML.as_ptr(), 31 | VERY_LARGE_YAML.len() as _, 32 | ); 33 | let mut document = MaybeUninit::zeroed(); 34 | if !yaml_parser_load(&mut parser, document.as_mut_ptr()).ok { 35 | panic!("yaml_parser_load faled"); 36 | }; 37 | yaml_document_delete(document.as_mut_ptr()); 38 | yaml_parser_delete(&mut parser); 39 | }) 40 | }); 41 | 42 | c.bench_function("libyaml-safer emit large", |b| { 43 | // output shouldn't be much larger than the input, but just to be safe... 44 | let mut buffer = Vec::with_capacity(VERY_LARGE_YAML.len()); 45 | 46 | let doc = { 47 | let mut parser = Parser::new(); 48 | let mut input = VERY_LARGE_YAML; 49 | parser.set_input(&mut input); 50 | Document::load(&mut parser).unwrap() 51 | }; 52 | 53 | b.iter_custom(|iters| { 54 | let mut measurement = std::time::Duration::ZERO; 55 | for _ in 0..iters { 56 | let doc = doc.clone(); 57 | let start_time = std::time::Instant::now(); 58 | let mut emitter = Emitter::new(); 59 | emitter.set_output(&mut buffer); 60 | doc.dump(&mut emitter).unwrap(); 61 | measurement += start_time.elapsed(); 62 | } 63 | measurement 64 | }); 65 | }); 66 | 67 | c.bench_function("unsafe-libyaml emit large", |b| { 68 | // output shouldn't be much larger than the input, but just to be safe... 69 | let mut buffer = vec![0; VERY_LARGE_YAML.len() * 2]; 70 | 71 | // `yaml_document_t` cannot be cloned, so we have to parse it every iteration unfortunately. 72 | let read_doc = || unsafe { 73 | let mut parser = MaybeUninit::zeroed(); 74 | if !yaml_parser_initialize(parser.as_mut_ptr()).ok { 75 | panic!("yaml_parser_initialize failed"); 76 | } 77 | let mut parser = parser.assume_init(); 78 | yaml_parser_set_input_string( 79 | &mut parser, 80 | VERY_LARGE_YAML.as_ptr(), 81 | VERY_LARGE_YAML.len() as _, 82 | ); 83 | let mut document = MaybeUninit::zeroed(); 84 | if !yaml_parser_load(&mut parser, document.as_mut_ptr()).ok { 85 | panic!("yaml_parser_load faled"); 86 | }; 87 | yaml_parser_delete(&mut parser); 88 | document.assume_init() 89 | }; 90 | 91 | b.iter_custom(|iters| { 92 | let mut measurement = std::time::Duration::ZERO; 93 | for _ in 0..iters { 94 | unsafe { 95 | let mut doc = read_doc(); 96 | let start_time = std::time::Instant::now(); 97 | let mut emitter = MaybeUninit::zeroed(); 98 | if !yaml_emitter_initialize(emitter.as_mut_ptr()).ok { 99 | panic!("yaml_emitter_initialize failed"); 100 | } 101 | let mut emitter = emitter.assume_init(); 102 | let mut size_written = 0; 103 | yaml_emitter_set_output_string( 104 | &mut emitter, 105 | buffer.as_mut_ptr(), 106 | buffer.len() as _, 107 | &mut size_written, 108 | ); 109 | if !yaml_emitter_dump(&mut emitter, &mut doc).ok { 110 | panic!("yaml_emitter_dump failed"); 111 | } 112 | measurement += start_time.elapsed(); 113 | yaml_emitter_delete(&mut emitter); 114 | } 115 | } 116 | measurement 117 | }); 118 | }); 119 | } 120 | 121 | criterion_group!(benches, parser); 122 | criterion_main!(benches); 123 | -------------------------------------------------------------------------------- /src/bin/run-parser-test-suite.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::pedantic)] 2 | #![allow( 3 | clippy::cast_lossless, 4 | clippy::cast_possible_truncation, 5 | clippy::cast_possible_wrap, 6 | clippy::cast_sign_loss, 7 | clippy::items_after_statements, 8 | clippy::let_underscore_untyped, 9 | clippy::missing_errors_doc, 10 | clippy::missing_safety_doc, 11 | clippy::too_many_lines 12 | )] 13 | 14 | use libyaml_safer::{EventData, Parser, ScalarStyle}; 15 | use std::env; 16 | use std::error::Error; 17 | use std::fs::File; 18 | use std::io::{self, Read, Write}; 19 | use std::process::{self, ExitCode}; 20 | use std::slice; 21 | 22 | pub(crate) fn test_main( 23 | stdin: &mut dyn Read, 24 | stdout: &mut dyn Write, 25 | ) -> Result<(), Box> { 26 | let mut parser = Parser::new(); 27 | 28 | let mut stdin = std::io::BufReader::new(stdin); 29 | parser.set_input(&mut stdin); 30 | 31 | loop { 32 | let event = match parser.parse() { 33 | Err(err) => { 34 | let error = format!("Parse error: {err}"); 35 | return Err(error.into()); 36 | } 37 | Ok(event) => event, 38 | }; 39 | 40 | let mut is_end = false; 41 | 42 | match &event.data { 43 | EventData::StreamStart { .. } => { 44 | _ = writeln!(stdout, "+STR"); 45 | } 46 | EventData::StreamEnd => { 47 | is_end = true; 48 | _ = writeln!(stdout, "-STR"); 49 | } 50 | EventData::DocumentStart { implicit, .. } => { 51 | _ = write!(stdout, "+DOC"); 52 | if !implicit { 53 | _ = write!(stdout, " ---"); 54 | } 55 | _ = writeln!(stdout); 56 | } 57 | EventData::DocumentEnd { implicit } => { 58 | _ = write!(stdout, "-DOC"); 59 | if !implicit { 60 | _ = write!(stdout, " ..."); 61 | } 62 | _ = writeln!(stdout); 63 | } 64 | EventData::Alias { anchor } => { 65 | _ = writeln!(stdout, "=ALI *{anchor}"); 66 | } 67 | EventData::Scalar { 68 | anchor, 69 | tag, 70 | value, 71 | style, 72 | .. 73 | } => { 74 | let _ = write!(stdout, "=VAL"); 75 | if let Some(anchor) = anchor { 76 | _ = write!(stdout, " &{anchor}"); 77 | } 78 | if let Some(tag) = tag { 79 | _ = write!(stdout, " <{tag}>"); 80 | } 81 | _ = stdout.write_all(match style { 82 | ScalarStyle::Plain => b" :", 83 | ScalarStyle::SingleQuoted => b" '", 84 | ScalarStyle::DoubleQuoted => b" \"", 85 | ScalarStyle::Literal => b" |", 86 | ScalarStyle::Folded => b" >", 87 | _ => process::abort(), 88 | }); 89 | print_escaped(stdout, value); 90 | _ = writeln!(stdout); 91 | } 92 | EventData::SequenceStart { anchor, tag, .. } => { 93 | let _ = write!(stdout, "+SEQ"); 94 | if let Some(anchor) = anchor { 95 | _ = write!(stdout, " &{anchor}"); 96 | } 97 | if let Some(tag) = tag { 98 | _ = write!(stdout, " <{tag}>"); 99 | } 100 | _ = writeln!(stdout); 101 | } 102 | EventData::SequenceEnd => { 103 | _ = writeln!(stdout, "-SEQ"); 104 | } 105 | EventData::MappingStart { anchor, tag, .. } => { 106 | let _ = write!(stdout, "+MAP"); 107 | if let Some(anchor) = anchor { 108 | _ = write!(stdout, " &{anchor}"); 109 | } 110 | if let Some(tag) = tag { 111 | _ = write!(stdout, " <{tag}>"); 112 | } 113 | _ = writeln!(stdout); 114 | } 115 | EventData::MappingEnd => { 116 | _ = writeln!(stdout, "-MAP"); 117 | } 118 | } 119 | 120 | if is_end { 121 | break; 122 | } 123 | } 124 | Ok(()) 125 | } 126 | 127 | fn print_escaped(stdout: &mut dyn Write, s: &str) { 128 | for ch in s.bytes() { 129 | let repr = match &ch { 130 | b'\\' => b"\\\\", 131 | b'\0' => b"\\0", 132 | b'\x08' => b"\\b", 133 | b'\n' => b"\\n", 134 | b'\r' => b"\\r", 135 | b'\t' => b"\\t", 136 | c => slice::from_ref(c), 137 | }; 138 | let _ = stdout.write_all(repr); 139 | } 140 | } 141 | 142 | fn main() -> ExitCode { 143 | let args = env::args_os().skip(1); 144 | if args.len() == 0 { 145 | let _ = writeln!(io::stderr(), "Usage: run-parser-test-suite ..."); 146 | return ExitCode::FAILURE; 147 | } 148 | for arg in args { 149 | let mut stdin = File::open(arg).unwrap(); 150 | let mut stdout = io::stdout(); 151 | let result = test_main(&mut stdin, &mut stdout); 152 | if let Err(err) = result { 153 | let _ = writeln!(io::stderr(), "{err}"); 154 | return ExitCode::FAILURE; 155 | } 156 | } 157 | ExitCode::SUCCESS 158 | } 159 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | macro_rules! CHECK_AT { 2 | ($buffer:expr, $octet:expr, $offset:expr) => { 3 | $buffer.get($offset).copied() == Some($octet) 4 | }; 5 | } 6 | 7 | macro_rules! CHECK { 8 | ($buffer:expr, $octet:expr) => { 9 | $buffer.get(0).copied() == Some($octet) 10 | }; 11 | } 12 | 13 | macro_rules! IS_ALPHA { 14 | ($buffer:expr) => { 15 | crate::macros::is_alpha($buffer.get(0).copied()) 16 | }; 17 | } 18 | 19 | pub(crate) fn is_alpha(ch: impl Into>) -> bool { 20 | let ch = match ch.into() { 21 | Some(ch) => ch, 22 | None => return false, 23 | }; 24 | ch >= '0' && ch <= '9' 25 | || ch >= 'A' && ch <= 'Z' 26 | || ch >= 'a' && ch <= 'z' 27 | || ch == '_' 28 | || ch == '-' 29 | } 30 | 31 | macro_rules! IS_DIGIT { 32 | ($buffer:expr) => { 33 | $buffer 34 | .get(0) 35 | .copied() 36 | .map(|ch| ch.is_digit(10)) 37 | .unwrap_or(false) 38 | }; 39 | } 40 | 41 | macro_rules! AS_DIGIT { 42 | ($buffer:expr) => { 43 | $buffer 44 | .get(0) 45 | .copied() 46 | .expect("out of bounds buffer access") 47 | .to_digit(10) 48 | .expect("not in digit range") 49 | }; 50 | } 51 | 52 | macro_rules! IS_HEX_AT { 53 | ($buffer:expr, $offset:expr) => { 54 | if let Some(ch) = $buffer.get($offset).copied() { 55 | ch.is_digit(16) 56 | } else { 57 | false 58 | } 59 | }; 60 | } 61 | 62 | macro_rules! AS_HEX_AT { 63 | ($buffer:expr, $offset:expr) => { 64 | $buffer 65 | .get($offset) 66 | .copied() 67 | .expect("out of range buffer access") 68 | .to_digit(16) 69 | .expect("not in digit range (hex)") 70 | }; 71 | } 72 | 73 | pub(crate) fn is_ascii(ch: char) -> bool { 74 | ch.is_ascii() 75 | } 76 | 77 | pub(crate) fn is_printable(ch: char) -> bool { 78 | match ch { 79 | '\u{feff}' | '\u{fffe}' | '\u{ffff}' => false, 80 | // ASCII 81 | '\x0a' 82 | | '\x20'..='\x7e' 83 | | '\u{00a0}'..='\u{00bf}' 84 | | '\u{00c0}'..='\u{cfff}' 85 | | '\u{d000}'..='\u{d7ff}' 86 | | '\u{e000}'..='\u{efff}' 87 | | '\u{f000}'..='\u{fffd}' 88 | | '\u{10000}'..='\u{10ffff}' => true, 89 | _ => false, 90 | } 91 | } 92 | 93 | macro_rules! IS_Z_AT { 94 | ($buffer:expr, $offset:expr) => { 95 | $buffer.get($offset).is_none() 96 | }; 97 | } 98 | 99 | macro_rules! IS_Z { 100 | ($string:expr) => { 101 | IS_Z_AT!($string, 0) 102 | }; 103 | } 104 | 105 | macro_rules! IS_BOM { 106 | ($buffer:expr) => { 107 | CHECK!($buffer, '\u{feff}') 108 | }; 109 | } 110 | 111 | pub(crate) fn is_bom(ch: char) -> bool { 112 | ch == '\u{7eff}' 113 | } 114 | 115 | macro_rules! IS_SPACE_AT { 116 | ($string:expr, $offset:expr) => { 117 | CHECK_AT!($string, ' ', $offset) 118 | }; 119 | } 120 | 121 | macro_rules! IS_SPACE { 122 | ($string:expr) => { 123 | IS_SPACE_AT!($string, 0) 124 | }; 125 | } 126 | 127 | pub(crate) fn is_space(ch: impl Into>) -> bool { 128 | ch.into() == Some(' ') 129 | } 130 | 131 | macro_rules! IS_TAB_AT { 132 | ($buffer:expr, $offset:expr) => { 133 | CHECK_AT!($buffer, '\t', $offset) 134 | }; 135 | } 136 | 137 | macro_rules! IS_TAB { 138 | ($string:expr) => { 139 | IS_TAB_AT!($string, 0) 140 | }; 141 | } 142 | 143 | pub(crate) fn is_tab(ch: impl Into>) -> bool { 144 | ch.into() == Some('\t') 145 | } 146 | 147 | macro_rules! IS_BLANK_AT { 148 | ($buffer:expr, $offset:expr) => {{ 149 | let ch = $buffer.get($offset).copied(); 150 | $crate::macros::is_space(ch) || crate::macros::is_tab(ch) 151 | }}; 152 | } 153 | 154 | macro_rules! IS_BLANK { 155 | ($string:expr) => { 156 | IS_BLANK_AT!($string, 0) 157 | }; 158 | } 159 | 160 | pub(crate) fn is_blank(ch: impl Into>) -> bool { 161 | let ch = ch.into(); 162 | is_space(ch) || is_tab(ch) 163 | } 164 | 165 | pub(crate) fn is_blankz(ch: impl Into>) -> bool { 166 | let ch = ch.into(); 167 | is_blank(ch) || is_breakz(ch) 168 | } 169 | 170 | macro_rules! IS_BREAK_AT { 171 | ($buffer:expr, $offset:expr) => { 172 | $crate::macros::is_break($buffer.get($offset).copied()) 173 | }; 174 | } 175 | 176 | pub(crate) fn is_break(ch: impl Into>) -> bool { 177 | matches!( 178 | ch.into(), 179 | Some('\r' | '\n' | '\u{0085}' | '\u{2028}' | '\u{2029}') 180 | ) 181 | } 182 | 183 | pub(crate) fn is_breakz(ch: impl Into>) -> bool { 184 | let ch = ch.into(); 185 | ch.is_none() || is_break(ch) 186 | } 187 | 188 | macro_rules! IS_BREAK { 189 | ($string:expr) => { 190 | IS_BREAK_AT!($string, 0) 191 | }; 192 | } 193 | 194 | macro_rules! IS_BREAKZ_AT { 195 | ($buffer:expr, $offset:expr) => {{ 196 | let ch = $buffer.get($offset).copied(); 197 | crate::macros::is_breakz(ch) 198 | }}; 199 | } 200 | 201 | macro_rules! IS_BREAKZ { 202 | ($string:expr) => { 203 | IS_BREAKZ_AT!($string, 0) 204 | }; 205 | } 206 | 207 | macro_rules! IS_BLANKZ_AT { 208 | ($buffer:expr, $offset:expr) => {{ 209 | let ch = $buffer.get($offset).copied(); 210 | $crate::macros::is_blank(ch) || $crate::macros::is_breakz(ch) 211 | }}; 212 | } 213 | 214 | macro_rules! IS_BLANKZ { 215 | ($string:expr) => { 216 | IS_BLANKZ_AT!($string, 0) 217 | }; 218 | } 219 | 220 | #[cfg(test)] 221 | mod tests { 222 | use super::*; 223 | 224 | #[test] 225 | fn printable() { 226 | for ch in "🎉".chars() { 227 | assert!(is_printable(ch)); 228 | } 229 | for ch in "\u{1f389}".chars() { 230 | assert!(is_printable(ch)); 231 | } 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /src/bin/run-emitter-test-suite.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::pedantic)] 2 | #![allow( 3 | clippy::cast_lossless, 4 | clippy::cast_possible_truncation, 5 | clippy::cast_possible_wrap, 6 | clippy::cast_sign_loss, 7 | clippy::items_after_statements, 8 | clippy::let_underscore_untyped, 9 | clippy::missing_errors_doc, 10 | clippy::missing_safety_doc, 11 | clippy::ptr_as_ptr, 12 | clippy::single_match_else, 13 | clippy::too_many_lines, 14 | clippy::unreadable_literal, 15 | clippy::manual_strip 16 | )] 17 | 18 | use libyaml_safer::{Emitter, Encoding, Event, MappingStyle, ScalarStyle, SequenceStyle}; 19 | use std::env; 20 | use std::error::Error; 21 | use std::fs::File; 22 | use std::io::{self, BufRead, Read, Write}; 23 | use std::process::ExitCode; 24 | 25 | pub(crate) fn test_main( 26 | stdin: &mut dyn Read, 27 | stdout: &mut dyn Write, 28 | ) -> Result<(), Box> { 29 | let mut emitter = Emitter::new(); 30 | 31 | emitter.set_output(stdout); 32 | emitter.set_canonical(false); 33 | emitter.set_unicode(false); 34 | 35 | let mut buf = std::io::BufReader::new(stdin); 36 | let mut line_buffer = String::with_capacity(1024); 37 | let mut value_buffer = String::with_capacity(128); 38 | 39 | loop { 40 | line_buffer.clear(); 41 | let n = buf.read_line(&mut line_buffer)?; 42 | if n == 0 { 43 | return Ok(()); 44 | } 45 | let line = line_buffer.strip_suffix('\n').unwrap_or(&line_buffer); 46 | 47 | let event = if line.starts_with("+STR") { 48 | Event::stream_start(Encoding::Utf8) 49 | } else if line.starts_with("-STR") { 50 | Event::stream_end() 51 | } else if line.starts_with("+DOC") { 52 | let implicit = !line[4..].starts_with(" ---"); 53 | Event::document_start(None, &[], implicit) 54 | } else if line.starts_with("-DOC") { 55 | let implicit = !line[4..].starts_with(" ..."); 56 | Event::document_end(implicit) 57 | } else if line.starts_with("+MAP") { 58 | Event::mapping_start( 59 | get_anchor('&', line), 60 | get_tag(line), 61 | false, 62 | MappingStyle::Block, 63 | ) 64 | } else if line.starts_with("-MAP") { 65 | Event::mapping_end() 66 | } else if line.starts_with("+SEQ") { 67 | Event::sequence_start( 68 | get_anchor('&', line), 69 | get_tag(line), 70 | false, 71 | SequenceStyle::Block, 72 | ) 73 | } else if line.starts_with("-SEQ") { 74 | Event::sequence_end() 75 | } else if line.starts_with("=VAL") { 76 | let mut style = ScalarStyle::Any; 77 | let value = get_value(line, &mut value_buffer, &mut style); 78 | let implicit = get_tag(line).is_none(); 79 | Event::scalar( 80 | get_anchor('&', line), 81 | get_tag(line), 82 | value, 83 | implicit, 84 | implicit, 85 | style, 86 | ) 87 | } else if line.starts_with("=ALI") { 88 | Event::alias(get_anchor('*', line).expect("no alias name")) 89 | } else { 90 | return Err(format!("Unknown event: '{line}'").into()); 91 | }; 92 | 93 | if let Err(err) = emitter.emit(event) { 94 | return Err(err.into()); 95 | } 96 | } 97 | } 98 | 99 | fn get_anchor(sigil: char, line: &str) -> Option<&str> { 100 | let (_, from_sigil) = line.split_once(sigil)?; 101 | if let Some((until_space, _tail)) = from_sigil.split_once(' ') { 102 | Some(until_space) 103 | } else if !from_sigil.is_empty() { 104 | Some(from_sigil) 105 | } else { 106 | None 107 | } 108 | } 109 | 110 | fn get_tag(line: &str) -> Option<&str> { 111 | let (_, from_angle_open) = line.split_once('<')?; 112 | let (until_angle_close, _) = from_angle_open.split_once('>')?; 113 | Some(until_angle_close) 114 | } 115 | 116 | fn get_value<'a>(line: &str, buffer: &'a mut String, style: &mut ScalarStyle) -> &'a str { 117 | let mut remainder = line; 118 | let value = loop { 119 | let (_before, tail) = match remainder.split_once(' ') { 120 | Some(parts) => parts, 121 | None => panic!("invalid line: {line}"), 122 | }; 123 | 124 | *style = match tail.chars().next().expect("string should not be empty") { 125 | ':' => ScalarStyle::Plain, 126 | '\'' => ScalarStyle::SingleQuoted, 127 | '"' => ScalarStyle::DoubleQuoted, 128 | '|' => ScalarStyle::Literal, 129 | '>' => ScalarStyle::Folded, 130 | _ => { 131 | // This was an anchor, move to the next space. 132 | remainder = tail; 133 | continue; 134 | } 135 | }; 136 | break &tail[1..]; 137 | }; 138 | 139 | buffer.clear(); 140 | // Unescape the value 141 | let mut chars = value.chars(); 142 | while let Some(ch) = chars.next() { 143 | if ch == '\\' { 144 | buffer.push(match chars.next().expect("unterminated escape sequence") { 145 | '\\' => '\\', 146 | '0' => '\0', 147 | 'b' => '\x08', 148 | 'n' => '\n', 149 | 'r' => '\r', 150 | 't' => '\t', 151 | otherwise => panic!("invalid escape character: {otherwise:?}"), 152 | }); 153 | } else { 154 | buffer.push(ch); 155 | } 156 | } 157 | 158 | &*buffer 159 | } 160 | 161 | fn main() -> ExitCode { 162 | let args = env::args_os().skip(1); 163 | if args.len() == 0 { 164 | let _ = writeln!( 165 | io::stderr(), 166 | "Usage: run-emitter-test-suite ...", 167 | ); 168 | return ExitCode::FAILURE; 169 | } 170 | for arg in args { 171 | let mut stdin = File::open(arg).unwrap(); 172 | let mut stdout = io::stdout(); 173 | let result = test_main(&mut stdin, &mut stdout); 174 | if let Err(err) = result { 175 | let _ = writeln!(io::stderr(), "{err}"); 176 | return ExitCode::FAILURE; 177 | } 178 | } 179 | ExitCode::SUCCESS 180 | } 181 | -------------------------------------------------------------------------------- /src/event.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | Encoding, MappingStyle, Mark, ScalarStyle, SequenceStyle, TagDirective, VersionDirective, 3 | }; 4 | 5 | /// The event structure. 6 | #[derive(Debug, PartialEq)] 7 | #[non_exhaustive] 8 | pub struct Event { 9 | /// The event data. 10 | pub data: EventData, 11 | /// The beginning of the event. 12 | pub start_mark: Mark, 13 | /// The end of the event. 14 | pub end_mark: Mark, 15 | } 16 | 17 | #[derive(Debug, PartialEq)] 18 | pub enum EventData { 19 | /// The stream parameters (for `YAML_STREAM_START_EVENT`). 20 | StreamStart { 21 | /// The document encoding. 22 | encoding: Encoding, 23 | }, 24 | StreamEnd, 25 | /// The document parameters (for `YAML_DOCUMENT_START_EVENT`). 26 | DocumentStart { 27 | /// The version directive. 28 | version_directive: Option, 29 | /// The tag directives list. 30 | tag_directives: Vec, 31 | /// Is the document indicator implicit? 32 | implicit: bool, 33 | }, 34 | /// The document end parameters (for `YAML_DOCUMENT_END_EVENT`). 35 | DocumentEnd { 36 | implicit: bool, 37 | }, 38 | /// The alias parameters (for `YAML_ALIAS_EVENT`). 39 | Alias { 40 | /// The anchor. 41 | anchor: String, 42 | }, 43 | /// The scalar parameters (for `YAML_SCALAR_EVENT`). 44 | Scalar { 45 | /// The anchor. 46 | anchor: Option, 47 | /// The tag. 48 | tag: Option, 49 | /// The scalar value. 50 | value: String, 51 | /// Is the tag optional for the plain style? 52 | plain_implicit: bool, 53 | /// Is the tag optional for any non-plain style? 54 | quoted_implicit: bool, 55 | /// The scalar style. 56 | style: ScalarStyle, 57 | }, 58 | /// The sequence parameters (for `YAML_SEQUENCE_START_EVENT`). 59 | SequenceStart { 60 | /// The anchor. 61 | anchor: Option, 62 | /// The tag. 63 | tag: Option, 64 | /// Is the tag optional? 65 | implicit: bool, 66 | /// The sequence style. 67 | style: SequenceStyle, 68 | }, 69 | SequenceEnd, 70 | /// The mapping parameters (for `YAML_MAPPING_START_EVENT`). 71 | MappingStart { 72 | /// The anchor. 73 | anchor: Option, 74 | /// The tag. 75 | tag: Option, 76 | /// Is the tag optional? 77 | implicit: bool, 78 | /// The mapping style. 79 | style: MappingStyle, 80 | }, 81 | MappingEnd, 82 | } 83 | 84 | impl Event { 85 | /// Make an event from its data, setting both marks to zero. 86 | pub(crate) fn new(data: EventData) -> Self { 87 | Self { 88 | data, 89 | start_mark: Mark::default(), 90 | end_mark: Mark::default(), 91 | } 92 | } 93 | 94 | /// Create the STREAM-START event. 95 | pub fn stream_start(encoding: Encoding) -> Self { 96 | Self::new(EventData::StreamStart { encoding }) 97 | } 98 | 99 | /// Create the STREAM-END event. 100 | pub fn stream_end() -> Self { 101 | Self::new(EventData::StreamEnd) 102 | } 103 | 104 | /// Create the DOCUMENT-START event. 105 | /// 106 | /// The `implicit` argument is considered as a stylistic parameter and may be 107 | /// ignored by the emitter. 108 | pub fn document_start( 109 | version_directive: Option, 110 | tag_directives_in: &[TagDirective], 111 | implicit: bool, 112 | ) -> Self { 113 | let tag_directives = tag_directives_in.to_vec(); 114 | 115 | Self::new(EventData::DocumentStart { 116 | version_directive, 117 | tag_directives, 118 | implicit, 119 | }) 120 | } 121 | 122 | /// Create the DOCUMENT-END event. 123 | /// 124 | /// The `implicit` argument is considered as a stylistic parameter and may be 125 | /// ignored by the emitter. 126 | pub fn document_end(implicit: bool) -> Self { 127 | Self::new(EventData::DocumentEnd { implicit }) 128 | } 129 | 130 | /// Create an ALIAS event. 131 | pub fn alias(anchor: &str) -> Self { 132 | Self::new(EventData::Alias { 133 | anchor: String::from(anchor), 134 | }) 135 | } 136 | 137 | /// Create a SCALAR event. 138 | /// 139 | /// The `style` argument may be ignored by the emitter. 140 | /// 141 | /// Either the `tag` attribute or one of the `plain_implicit` and 142 | /// `quoted_implicit` flags must be set. 143 | /// 144 | pub fn scalar( 145 | anchor: Option<&str>, 146 | tag: Option<&str>, 147 | value: &str, 148 | plain_implicit: bool, 149 | quoted_implicit: bool, 150 | style: ScalarStyle, 151 | ) -> Self { 152 | let mut anchor_copy: Option = None; 153 | let mut tag_copy: Option = None; 154 | 155 | if let Some(anchor) = anchor { 156 | anchor_copy = Some(String::from(anchor)); 157 | } 158 | if let Some(tag) = tag { 159 | tag_copy = Some(String::from(tag)); 160 | } 161 | 162 | Self::new(EventData::Scalar { 163 | anchor: anchor_copy, 164 | tag: tag_copy, 165 | value: String::from(value), 166 | plain_implicit, 167 | quoted_implicit, 168 | style, 169 | }) 170 | } 171 | 172 | /// Create a SEQUENCE-START event. 173 | /// 174 | /// The `style` argument may be ignored by the emitter. 175 | /// 176 | /// Either the `tag` attribute or the `implicit` flag must be set. 177 | pub fn sequence_start( 178 | anchor: Option<&str>, 179 | tag: Option<&str>, 180 | implicit: bool, 181 | style: SequenceStyle, 182 | ) -> Self { 183 | let mut anchor_copy: Option = None; 184 | let mut tag_copy: Option = None; 185 | 186 | if let Some(anchor) = anchor { 187 | anchor_copy = Some(String::from(anchor)); 188 | } 189 | if let Some(tag) = tag { 190 | tag_copy = Some(String::from(tag)); 191 | } 192 | 193 | Self::new(EventData::SequenceStart { 194 | anchor: anchor_copy, 195 | tag: tag_copy, 196 | implicit, 197 | style, 198 | }) 199 | } 200 | 201 | /// Create a SEQUENCE-END event. 202 | pub fn sequence_end() -> Self { 203 | Self::new(EventData::SequenceEnd) 204 | } 205 | 206 | /// Create a MAPPING-START event. 207 | /// 208 | /// The `style` argument may be ignored by the emitter. 209 | /// 210 | /// Either the `tag` attribute or the `implicit` flag must be set. 211 | pub fn mapping_start( 212 | anchor: Option<&str>, 213 | tag: Option<&str>, 214 | implicit: bool, 215 | style: MappingStyle, 216 | ) -> Self { 217 | let mut anchor_copy: Option = None; 218 | let mut tag_copy: Option = None; 219 | 220 | if let Some(anchor) = anchor { 221 | anchor_copy = Some(String::from(anchor)); 222 | } 223 | 224 | if let Some(tag) = tag { 225 | tag_copy = Some(String::from(tag)); 226 | } 227 | 228 | Self::new(EventData::MappingStart { 229 | anchor: anchor_copy, 230 | tag: tag_copy, 231 | implicit, 232 | style, 233 | }) 234 | } 235 | 236 | /// Create a MAPPING-END event. 237 | pub fn mapping_end() -> Self { 238 | Self::new(EventData::MappingEnd) 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | pub type Result = core::result::Result; 2 | 3 | /// The pointer position. 4 | #[derive(Copy, Clone, Default, Debug, PartialEq, Eq)] 5 | #[non_exhaustive] 6 | pub struct Mark { 7 | /// The position index. 8 | pub index: u64, 9 | /// The position line. 10 | pub line: u64, 11 | /// The position column. 12 | pub column: u64, 13 | } 14 | 15 | impl std::fmt::Display for Mark { 16 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 17 | write!(f, "line {} column {}", self.line + 1, self.column + 1) 18 | } 19 | } 20 | 21 | #[derive(Debug)] 22 | #[allow(clippy::struct_field_names)] 23 | struct Problem { 24 | pub problem: &'static str, 25 | pub problem_mark: Mark, 26 | pub context: &'static str, 27 | pub context_mark: Mark, 28 | } 29 | 30 | #[derive(Debug)] 31 | enum ErrorImpl { 32 | Reader { 33 | problem: &'static str, 34 | offset: usize, 35 | value: i32, 36 | }, 37 | Scanner(Problem), 38 | Parser(Problem), 39 | Composer(Problem), 40 | Emitter(&'static str), 41 | Io(std::io::Error), 42 | } 43 | 44 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 45 | pub enum ErrorKind { 46 | Reader, 47 | Scanner, 48 | Parser, 49 | Composer, 50 | Emitter, 51 | Io, 52 | } 53 | 54 | #[derive(Debug)] 55 | pub struct Error(Box); 56 | 57 | impl From for Error { 58 | fn from(value: std::io::Error) -> Self { 59 | Self(Box::new(ErrorImpl::Io(value))) 60 | } 61 | } 62 | 63 | impl Error { 64 | pub(crate) fn reader(problem: &'static str, offset: usize, value: i32) -> Self { 65 | Self(Box::new(ErrorImpl::Reader { 66 | problem, 67 | offset, 68 | value, 69 | })) 70 | } 71 | 72 | pub(crate) fn scanner( 73 | context: &'static str, 74 | context_mark: Mark, 75 | problem: &'static str, 76 | problem_mark: Mark, 77 | ) -> Self { 78 | Self(Box::new(ErrorImpl::Scanner(Problem { 79 | problem, 80 | problem_mark, 81 | context, 82 | context_mark, 83 | }))) 84 | } 85 | 86 | pub(crate) fn parser( 87 | context: &'static str, 88 | context_mark: Mark, 89 | problem: &'static str, 90 | problem_mark: Mark, 91 | ) -> Self { 92 | Self(Box::new(ErrorImpl::Parser(Problem { 93 | problem, 94 | problem_mark, 95 | context, 96 | context_mark, 97 | }))) 98 | } 99 | 100 | pub(crate) fn composer( 101 | context: &'static str, 102 | context_mark: Mark, 103 | problem: &'static str, 104 | problem_mark: Mark, 105 | ) -> Self { 106 | Self(Box::new(ErrorImpl::Composer(Problem { 107 | problem, 108 | problem_mark, 109 | context, 110 | context_mark, 111 | }))) 112 | } 113 | 114 | pub(crate) fn emitter(problem: &'static str) -> Self { 115 | Self(Box::new(ErrorImpl::Emitter(problem))) 116 | } 117 | 118 | pub fn kind(&self) -> ErrorKind { 119 | match &*self.0 { 120 | ErrorImpl::Reader { .. } => ErrorKind::Reader, 121 | ErrorImpl::Scanner(_) => ErrorKind::Scanner, 122 | ErrorImpl::Parser(_) => ErrorKind::Parser, 123 | ErrorImpl::Composer(_) => ErrorKind::Composer, 124 | ErrorImpl::Emitter(_) => ErrorKind::Emitter, 125 | ErrorImpl::Io(_) => ErrorKind::Io, 126 | } 127 | } 128 | 129 | pub fn problem_mark(&self) -> Option { 130 | match &*self.0 { 131 | ErrorImpl::Reader { .. } | ErrorImpl::Emitter(_) | ErrorImpl::Io(_) => None, 132 | ErrorImpl::Scanner(p) | ErrorImpl::Parser(p) | ErrorImpl::Composer(p) => { 133 | Some(p.problem_mark) 134 | } 135 | } 136 | } 137 | 138 | pub fn context_mark(&self) -> Option { 139 | match &*self.0 { 140 | ErrorImpl::Reader { .. } | ErrorImpl::Emitter(..) | ErrorImpl::Io(_) => None, 141 | ErrorImpl::Scanner(p) | ErrorImpl::Parser(p) | ErrorImpl::Composer(p) => { 142 | if p.context.is_empty() { 143 | None 144 | } else { 145 | Some(p.context_mark) 146 | } 147 | } 148 | } 149 | } 150 | 151 | pub fn problem(&self) -> &'static str { 152 | match &*self.0 { 153 | ErrorImpl::Reader { problem, .. } | ErrorImpl::Emitter(problem) => problem, 154 | ErrorImpl::Scanner(p) | ErrorImpl::Parser(p) | ErrorImpl::Composer(p) => p.problem, 155 | ErrorImpl::Io(_) => "I/O error", 156 | } 157 | } 158 | 159 | pub fn context(&self) -> Option<&'static str> { 160 | match &*self.0 { 161 | ErrorImpl::Reader { .. } | ErrorImpl::Emitter(..) | ErrorImpl::Io(_) => None, 162 | ErrorImpl::Scanner(p) | ErrorImpl::Parser(p) | ErrorImpl::Composer(p) => { 163 | if p.context.is_empty() { 164 | None 165 | } else { 166 | Some(p.context) 167 | } 168 | } 169 | } 170 | } 171 | } 172 | 173 | impl std::error::Error for Error { 174 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 175 | if let ErrorImpl::Io(err) = &*self.0 { 176 | Some(err) 177 | } else { 178 | None 179 | } 180 | } 181 | } 182 | 183 | impl TryFrom for std::io::Error { 184 | type Error = Error; 185 | 186 | fn try_from(value: Error) -> Result { 187 | if value.kind() == ErrorKind::Io { 188 | if let ErrorImpl::Io(err) = *value.0 { 189 | Ok(err) 190 | } else { 191 | unreachable!() 192 | } 193 | } else { 194 | Err(value) 195 | } 196 | } 197 | } 198 | 199 | impl core::fmt::Display for ErrorKind { 200 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 201 | f.write_str(match self { 202 | ErrorKind::Reader => "Reader", 203 | ErrorKind::Scanner => "Scanner", 204 | ErrorKind::Parser => "Parser", 205 | ErrorKind::Composer => "Composer", 206 | ErrorKind::Emitter => "Emitter", 207 | ErrorKind::Io => "I/O", 208 | }) 209 | } 210 | } 211 | 212 | impl core::fmt::Display for Problem { 213 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 214 | let Self { 215 | problem, 216 | problem_mark, 217 | context, 218 | context_mark, 219 | } = self; 220 | 221 | if self.context.is_empty() { 222 | write!(f, "{problem_mark}: {problem}") 223 | } else { 224 | write!(f, "{problem_mark}: {problem} {context} ({context_mark})") 225 | } 226 | } 227 | } 228 | 229 | impl core::fmt::Display for Error { 230 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 231 | write!(f, "{} error: ", self.kind())?; 232 | match *self.0 { 233 | ErrorImpl::Reader { 234 | problem, 235 | offset, 236 | value, 237 | } => write!(f, "{problem} (offset {offset}, value {value})"), 238 | ErrorImpl::Scanner(ref p) | ErrorImpl::Parser(ref p) | ErrorImpl::Composer(ref p) => { 239 | write!(f, "{p}") 240 | } 241 | ErrorImpl::Emitter(problem) => write!(f, "{problem}"), 242 | ErrorImpl::Io(ref err) => write!(f, "{err}"), 243 | } 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /src/reader.rs: -------------------------------------------------------------------------------- 1 | use std::io::BufRead; 2 | 3 | use alloc::collections::VecDeque; 4 | 5 | use crate::{Encoding, Error, Result, scanner::Scanner}; 6 | 7 | const BOM_UTF8: [u8; 3] = [0xef, 0xbb, 0xbf]; 8 | const BOM_UTF16LE: [u8; 2] = [0xff, 0xfe]; 9 | const BOM_UTF16BE: [u8; 2] = [0xfe, 0xff]; 10 | 11 | fn yaml_parser_determine_encoding(reader: &mut dyn BufRead) -> Result> { 12 | let initial_bytes = reader.fill_buf()?; 13 | if initial_bytes.is_empty() { 14 | return Ok(None); 15 | } 16 | 17 | match initial_bytes[0] { 18 | 0xef => { 19 | let mut bom = [0; 3]; 20 | reader.read_exact(&mut bom)?; 21 | if bom == BOM_UTF8 { 22 | Ok(Some(Encoding::Utf8)) 23 | } else { 24 | Err(Error::reader( 25 | "invalid byte order marker", 26 | 0, 27 | i32::from_be_bytes([bom[0], bom[1], bom[2], 0]), 28 | )) 29 | } 30 | } 31 | 0xff | 0xfe => { 32 | let mut bom = [0; 2]; 33 | reader.read_exact(&mut bom)?; 34 | if bom == BOM_UTF16LE { 35 | Ok(Some(Encoding::Utf16Le)) 36 | } else if bom == BOM_UTF16BE { 37 | Ok(Some(Encoding::Utf16Be)) 38 | } else { 39 | Err(Error::reader( 40 | "invalid byte order marker", 41 | 0, 42 | i32::from_le_bytes([bom[0], bom[1], 0, 0]), 43 | )) 44 | } 45 | } 46 | _ => Ok(Some(Encoding::Utf8)), 47 | } 48 | } 49 | 50 | // Allowing unsafe code because it is the only efficient way to partially decode 51 | // a string slice from a stream of UTF-8 bytes. 52 | #[allow(unsafe_code)] 53 | fn read_utf8_buffered( 54 | reader: &mut dyn BufRead, 55 | out: &mut VecDeque, 56 | offset: &mut usize, 57 | ) -> Result { 58 | let available = loop { 59 | match reader.fill_buf() { 60 | Ok([]) => return Ok(false), 61 | Ok(available) => break available, 62 | Err(err) if err.kind() == std::io::ErrorKind::Interrupted => (), 63 | Err(err) => return Err(err.into()), 64 | } 65 | }; 66 | 67 | match core::str::from_utf8(available) { 68 | Ok(valid) => { 69 | let used = valid.len(); 70 | // The entire contents of the input buffer was valid UTF-8. 71 | for ch in valid.chars() { 72 | push_char(out, ch, *offset)?; 73 | *offset += ch.len_utf8(); 74 | } 75 | reader.consume(used); 76 | Ok(true) 77 | } 78 | Err(err) => { 79 | let valid_bytes = err.valid_up_to(); 80 | 81 | // If some of the buffer contents were valid, append that to the 82 | // output. 83 | let valid = unsafe { 84 | // SAFETY: This is safe because of `valid_up_to()`. 85 | core::str::from_utf8_unchecked(&available[..valid_bytes]) 86 | }; 87 | for ch in valid.chars() { 88 | push_char(out, ch, *offset)?; 89 | *offset += ch.len_utf8(); 90 | } 91 | 92 | match err.error_len() { 93 | Some(_invalid_len) => Err(Error::reader( 94 | "invalid UTF-8", 95 | *offset, 96 | available[valid_bytes] as _, 97 | )), 98 | None => { 99 | if valid_bytes != 0 { 100 | // Some valid UTF-8 characters were present, and the 101 | // tail end of the buffer was an incomplete sequence. 102 | // Leave the incomplete sequence in the buffer. 103 | reader.consume(valid_bytes); 104 | Ok(true) 105 | } else { 106 | // The beginning of the buffer was an incomplete UTF-8 107 | // sequence. Read the whole character unbuffered. 108 | // 109 | // This will return `UnexpectedEof` if the sequence 110 | // cannot be completed. Note that `read_exact()` handles 111 | // interrupt automatically. 112 | let initial = available[0]; 113 | read_utf8_char_unbuffered(reader, out, initial, offset)?; 114 | Ok(true) 115 | } 116 | } 117 | } 118 | } 119 | } 120 | } 121 | 122 | fn read_utf8_char_unbuffered( 123 | reader: &mut dyn BufRead, 124 | out: &mut VecDeque, 125 | initial: u8, 126 | offset: &mut usize, 127 | ) -> Result<()> { 128 | let width = utf8_char_width(initial); 129 | let mut buffer = [0; 4]; 130 | reader.read_exact(&mut buffer[..width])?; 131 | if let Ok(valid) = core::str::from_utf8(&buffer[..width]) { 132 | // We read a whole, valid character. 133 | let ch = match valid.chars().next() { 134 | Some(ch) => ch, 135 | None => unreachable!(), 136 | }; 137 | push_char(out, ch, *offset)?; 138 | *offset += width; 139 | Ok(()) 140 | } else { 141 | // Since we read the exact character width, the only 142 | // possible error here is invalid Unicode. 143 | Err(Error::reader("invalid UTF-8", *offset, buffer[0] as _)) 144 | } 145 | } 146 | 147 | fn read_utf16_buffered( 148 | reader: &mut dyn BufRead, 149 | out: &mut VecDeque, 150 | offset: &mut usize, 151 | ) -> Result { 152 | let available = loop { 153 | match reader.fill_buf() { 154 | Ok([]) => return Ok(false), 155 | Ok(available) => break available, 156 | Err(err) if err.kind() == std::io::ErrorKind::Interrupted => (), 157 | Err(err) => return Err(err.into()), 158 | } 159 | }; 160 | 161 | let chunks = available.chunks_exact(2).map(|chunk| { 162 | let (a, b) = match chunk { 163 | [a, b] => (a, b), 164 | _ => unreachable!(), 165 | }; 166 | if BIG_ENDIAN { 167 | u16::from_be_bytes([*a, *b]) 168 | } else { 169 | u16::from_le_bytes([*a, *b]) 170 | } 171 | }); 172 | 173 | let mut used = 0; 174 | for ch in core::char::decode_utf16(chunks) { 175 | match ch { 176 | Ok(ch) => { 177 | push_char(out, ch, *offset)?; 178 | let n = ch.len_utf16(); 179 | *offset += n; 180 | used += n; 181 | } 182 | Err(_) => { 183 | // An unpaired surrogate may either be a corrupt stream, but it 184 | // can also be that the buffer just happens to contain the first 185 | // half of a surrogate pair. Consume all of the valid bytes in 186 | // the buffer first, and then handle the unpaired surrogate in 187 | // the "slow" path (`read_utf16_char_unbuffered`) the next time 188 | // we are called. 189 | break; 190 | } 191 | } 192 | } 193 | 194 | if used != 0 { 195 | reader.consume(used); 196 | *offset += used; 197 | Ok(true) 198 | } else { 199 | debug_assert!(!available.is_empty() && available.len() < 2); 200 | read_utf16_char_unbuffered::(reader, out, offset)?; 201 | Ok(true) 202 | } 203 | } 204 | 205 | fn read_utf16_char_unbuffered( 206 | reader: &mut dyn BufRead, 207 | out: &mut VecDeque, 208 | offset: &mut usize, 209 | ) -> Result<()> { 210 | let mut buffer = [0; 2]; 211 | reader.read_exact(&mut buffer)?; 212 | let first = if BIG_ENDIAN { 213 | u16::from_be_bytes(buffer) 214 | } else { 215 | u16::from_le_bytes(buffer) 216 | }; 217 | 218 | if is_utf16_surrogate(first) { 219 | reader.read_exact(&mut buffer)?; 220 | let second = if BIG_ENDIAN { 221 | u16::from_be_bytes(buffer) 222 | } else { 223 | u16::from_le_bytes(buffer) 224 | }; 225 | 226 | match core::char::decode_utf16([first, second]).next() { 227 | Some(Ok(ch)) => { 228 | push_char(out, ch, *offset)?; 229 | *offset += 4; 230 | Ok(()) 231 | } 232 | Some(Err(err)) => Err(Error::reader( 233 | "invalid UTF-16", 234 | *offset, 235 | err.unpaired_surrogate() as _, 236 | )), 237 | None => unreachable!(), 238 | } 239 | } else { 240 | match core::char::decode_utf16([first]).next() { 241 | Some(Ok(ch)) => { 242 | push_char(out, ch, *offset)?; 243 | *offset += 2; 244 | Ok(()) 245 | } 246 | Some(Err(_)) | None => unreachable!(), 247 | } 248 | } 249 | } 250 | 251 | fn utf8_char_width(initial: u8) -> usize { 252 | if initial & 0x80 == 0 { 253 | 1 254 | } else if initial & 0xE0 == 0xC0 { 255 | 2 256 | } else if initial & 0xF0 == 0xE0 { 257 | 3 258 | } else if initial & 0xF8 == 0xF0 { 259 | 4 260 | } else { 261 | 0 262 | } 263 | } 264 | 265 | fn is_utf16_surrogate(value: u16) -> bool { 266 | matches!(value, 0xD800..=0xDFFF) 267 | } 268 | 269 | fn push_char(out: &mut VecDeque, ch: char, offset: usize) -> Result<()> { 270 | if !(ch == '\x09' 271 | || ch == '\x0A' 272 | || ch == '\x0D' 273 | || ch >= '\x20' && ch <= '\x7E' 274 | || ch == '\u{0085}' 275 | || ch >= '\u{00A0}' && ch <= '\u{D7FF}' 276 | || ch >= '\u{E000}' && ch <= '\u{FFFD}' 277 | || ch >= '\u{10000}' && ch <= '\u{10FFFF}') 278 | { 279 | return Err(Error::reader( 280 | "control characters are not allowed", 281 | offset, 282 | ch as _, 283 | )); 284 | } 285 | out.push_back(ch); 286 | Ok(()) 287 | } 288 | 289 | pub(crate) fn yaml_parser_update_buffer( 290 | parser: &mut Scanner, 291 | length: usize, 292 | ) -> Result<()> { 293 | let reader = parser.read_handler.as_mut().expect("no read handler"); 294 | if parser.buffer.len() >= length { 295 | return Ok(()); 296 | } 297 | if parser.encoding == Encoding::Any { 298 | if let Some(encoding) = yaml_parser_determine_encoding(reader)? { 299 | parser.encoding = encoding; 300 | } else { 301 | parser.eof = true; 302 | return Ok(()); 303 | } 304 | } 305 | 306 | while parser.buffer.len() < length { 307 | if parser.eof { 308 | return Ok(()); 309 | } 310 | 311 | let not_eof = match parser.encoding { 312 | Encoding::Any => unreachable!(), 313 | Encoding::Utf8 => read_utf8_buffered(reader, &mut parser.buffer, &mut parser.offset)?, 314 | Encoding::Utf16Le => { 315 | read_utf16_buffered::(reader, &mut parser.buffer, &mut parser.offset)? 316 | } 317 | Encoding::Utf16Be => { 318 | read_utf16_buffered::(reader, &mut parser.buffer, &mut parser.offset)? 319 | } 320 | }; 321 | if !not_eof { 322 | parser.eof = true; 323 | return Ok(()); 324 | } 325 | } 326 | 327 | if parser.offset >= (!0_usize).wrapping_div(2_usize) { 328 | return Err(Error::reader("input is too long", parser.offset, -1)); 329 | } 330 | Ok(()) 331 | } 332 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc = include_str!("../README.md")] 2 | #![doc(html_root_url = "https://docs.rs/libyaml-safer/0.1.0")] 3 | #![warn(clippy::pedantic)] 4 | #![allow( 5 | clippy::cast_lossless, 6 | clippy::cast_possible_truncation, 7 | clippy::cast_possible_wrap, 8 | clippy::cast_sign_loss, 9 | clippy::fn_params_excessive_bools, 10 | clippy::manual_range_contains, 11 | clippy::missing_panics_doc, 12 | clippy::missing_errors_doc, 13 | clippy::module_name_repetitions, 14 | clippy::must_use_candidate, 15 | clippy::needless_pass_by_value, 16 | clippy::struct_excessive_bools, 17 | clippy::too_many_arguments, 18 | clippy::too_many_lines, 19 | clippy::unnecessary_wraps, 20 | clippy::match_wildcard_for_single_variants 21 | )] 22 | #![deny(unsafe_code)] 23 | 24 | extern crate alloc; 25 | 26 | #[macro_use] 27 | mod macros; 28 | 29 | mod document; 30 | mod emitter; 31 | mod error; 32 | mod event; 33 | mod parser; 34 | mod reader; 35 | mod scanner; 36 | mod token; 37 | 38 | pub use crate::document::*; 39 | pub use crate::emitter::*; 40 | pub use crate::error::*; 41 | pub use crate::event::*; 42 | pub use crate::parser::*; 43 | pub use crate::scanner::*; 44 | pub use crate::token::*; 45 | 46 | pub(crate) const INPUT_RAW_BUFFER_SIZE: usize = 16384; 47 | pub(crate) const INPUT_BUFFER_SIZE: usize = INPUT_RAW_BUFFER_SIZE; 48 | pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384; 49 | 50 | /// The tag `!!null` with the only possible value: `null`. 51 | pub const NULL_TAG: &str = "tag:yaml.org,2002:null"; 52 | /// The tag `!!bool` with the values: `true` and `false`. 53 | pub const BOOL_TAG: &str = "tag:yaml.org,2002:bool"; 54 | /// The tag `!!str` for string values. 55 | pub const STR_TAG: &str = "tag:yaml.org,2002:str"; 56 | /// The tag `!!int` for integer values. 57 | pub const INT_TAG: &str = "tag:yaml.org,2002:int"; 58 | /// The tag `!!float` for float values. 59 | pub const FLOAT_TAG: &str = "tag:yaml.org,2002:float"; 60 | /// The tag `!!timestamp` for date and time values. 61 | pub const TIMESTAMP_TAG: &str = "tag:yaml.org,2002:timestamp"; 62 | 63 | /// The tag `!!seq` is used to denote sequences. 64 | pub const SEQ_TAG: &str = "tag:yaml.org,2002:seq"; 65 | /// The tag `!!map` is used to denote mapping. 66 | pub const MAP_TAG: &str = "tag:yaml.org,2002:map"; 67 | 68 | /// The default scalar tag is `!!str`. 69 | pub const DEFAULT_SCALAR_TAG: &str = STR_TAG; 70 | /// The default sequence tag is `!!seq`. 71 | pub const DEFAULT_SEQUENCE_TAG: &str = SEQ_TAG; 72 | /// The default mapping tag is `!!map`. 73 | pub const DEFAULT_MAPPING_TAG: &str = MAP_TAG; 74 | 75 | /// The version directive data. 76 | #[derive(Clone, Copy, Debug, PartialEq)] 77 | #[non_exhaustive] 78 | pub struct VersionDirective { 79 | /// The major version number. 80 | pub major: i32, 81 | /// The minor version number. 82 | pub minor: i32, 83 | } 84 | 85 | /// The tag directive data. 86 | #[derive(Debug, Clone, PartialEq)] 87 | #[non_exhaustive] 88 | pub struct TagDirective { 89 | /// The tag handle. 90 | pub handle: String, 91 | /// The tag prefix. 92 | pub prefix: String, 93 | } 94 | 95 | /// The stream encoding. 96 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 97 | #[non_exhaustive] 98 | pub enum Encoding { 99 | /// Let the parser choose the encoding. 100 | #[default] 101 | Any = 0, 102 | /// The default UTF-8 encoding. 103 | Utf8 = 1, 104 | /// The UTF-16-LE encoding with BOM. 105 | Utf16Le = 2, 106 | /// The UTF-16-BE encoding with BOM. 107 | Utf16Be = 3, 108 | } 109 | 110 | /// Line break type. 111 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 112 | #[non_exhaustive] 113 | pub enum Break { 114 | /// Let the parser choose the break type. 115 | #[default] 116 | Any = 0, 117 | /// Use CR for line breaks (Mac style). 118 | Cr = 1, 119 | /// Use LN for line breaks (Unix style). 120 | Ln = 2, 121 | /// Use CR LN for line breaks (DOS style). 122 | CrLn = 3, 123 | } 124 | 125 | /// Scalar styles. 126 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 127 | #[non_exhaustive] 128 | pub enum ScalarStyle { 129 | /// Let the emitter choose the style. 130 | #[default] 131 | Any = 0, 132 | /// The plain scalar style. 133 | Plain = 1, 134 | /// The single-quoted scalar style. 135 | SingleQuoted = 2, 136 | /// The double-quoted scalar style. 137 | DoubleQuoted = 3, 138 | /// The literal scalar style. 139 | Literal = 4, 140 | /// The folded scalar style. 141 | Folded = 5, 142 | } 143 | 144 | /// Sequence styles. 145 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 146 | #[non_exhaustive] 147 | pub enum SequenceStyle { 148 | /// Let the emitter choose the style. 149 | Any = 0, 150 | /// The block sequence style. 151 | Block = 1, 152 | /// The flow sequence style. 153 | Flow = 2, 154 | } 155 | 156 | /// Mapping styles. 157 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 158 | #[non_exhaustive] 159 | pub enum MappingStyle { 160 | /// Let the emitter choose the style. 161 | Any = 0, 162 | /// The block mapping style. 163 | Block = 1, 164 | /// The flow mapping style. 165 | Flow = 2, 166 | } 167 | 168 | #[cfg(test)] 169 | mod tests { 170 | use super::*; 171 | 172 | #[test] 173 | fn sanity() { 174 | const SANITY_INPUT: &str = r#"unicode: "Sosa did fine.\u263A" 175 | control: "\b1998\t1999\t2000\n" 176 | hex esc: "\x0d\x0a is \r\n" 177 | 178 | single: '"Howdy!" he cried.' 179 | quoted: ' # Not a ''comment''.' 180 | tie-fighter: '|\-*-/|' 181 | "#; 182 | const SANITY_OUTPUT: &str = r#"unicode: "Sosa did fine.\u263A" 183 | control: "\b1998\t1999\t2000\n" 184 | hex esc: "\r\n is \r\n" 185 | single: '"Howdy!" he cried.' 186 | quoted: ' # Not a ''comment''.' 187 | tie-fighter: '|\-*-/|' 188 | "#; 189 | let mut parser = Parser::new(); 190 | let mut read_in = SANITY_INPUT.as_bytes(); 191 | parser.set_input_string(&mut read_in); 192 | let doc = Document::load(&mut parser).unwrap(); 193 | 194 | let mut emitter = Emitter::new(); 195 | let mut output = Vec::new(); 196 | emitter.set_output(&mut output); 197 | doc.dump(&mut emitter).unwrap(); 198 | let output_str = core::str::from_utf8(&output).expect("invalid UTF-8"); 199 | assert_eq!(output_str, SANITY_OUTPUT); 200 | } 201 | 202 | #[test] 203 | fn scanner_marks() { 204 | const INPUT: &str = "b: 205 | c: true"; 206 | let mut scanner = Scanner::new(); 207 | let mut read_in = INPUT.as_bytes(); 208 | scanner.set_input(&mut read_in); 209 | let events = scanner.collect::, _>>().unwrap(); 210 | let expected = &[ 211 | Token { 212 | data: TokenData::StreamStart { 213 | encoding: Encoding::Utf8, 214 | }, 215 | start_mark: Mark { 216 | index: 0, 217 | line: 0, 218 | column: 0, 219 | }, 220 | end_mark: Mark { 221 | index: 0, 222 | line: 0, 223 | column: 0, 224 | }, 225 | }, 226 | Token { 227 | data: TokenData::BlockMappingStart, 228 | start_mark: Mark { 229 | index: 0, 230 | line: 0, 231 | column: 0, 232 | }, 233 | end_mark: Mark { 234 | index: 0, 235 | line: 0, 236 | column: 0, 237 | }, 238 | }, 239 | Token { 240 | data: TokenData::Key, 241 | start_mark: Mark { 242 | index: 0, 243 | line: 0, 244 | column: 0, 245 | }, 246 | end_mark: Mark { 247 | index: 0, 248 | line: 0, 249 | column: 0, 250 | }, 251 | }, 252 | Token { 253 | data: TokenData::Scalar { 254 | value: String::from("b"), 255 | style: ScalarStyle::Plain, 256 | }, 257 | start_mark: Mark { 258 | index: 0, 259 | line: 0, 260 | column: 0, 261 | }, 262 | end_mark: Mark { 263 | index: 1, 264 | line: 0, 265 | column: 1, 266 | }, 267 | }, 268 | Token { 269 | data: TokenData::Value, 270 | start_mark: Mark { 271 | index: 1, 272 | line: 0, 273 | column: 1, 274 | }, 275 | end_mark: Mark { 276 | index: 2, 277 | line: 0, 278 | column: 2, 279 | }, 280 | }, 281 | Token { 282 | data: TokenData::Key, 283 | start_mark: Mark { 284 | index: 3, 285 | line: 1, 286 | column: 0, 287 | }, 288 | end_mark: Mark { 289 | index: 3, 290 | line: 1, 291 | column: 0, 292 | }, 293 | }, 294 | Token { 295 | data: TokenData::Scalar { 296 | value: String::from("c"), 297 | style: ScalarStyle::Plain, 298 | }, 299 | start_mark: Mark { 300 | index: 3, 301 | line: 1, 302 | column: 0, 303 | }, 304 | end_mark: Mark { 305 | index: 4, 306 | line: 1, 307 | column: 1, 308 | }, 309 | }, 310 | Token { 311 | data: TokenData::Value, 312 | start_mark: Mark { 313 | index: 4, 314 | line: 1, 315 | column: 1, 316 | }, 317 | end_mark: Mark { 318 | index: 5, 319 | line: 1, 320 | column: 2, 321 | }, 322 | }, 323 | Token { 324 | data: TokenData::Scalar { 325 | value: String::from("true"), 326 | style: ScalarStyle::Plain, 327 | }, 328 | start_mark: Mark { 329 | index: 6, 330 | line: 1, 331 | column: 3, 332 | }, 333 | end_mark: Mark { 334 | index: 10, 335 | line: 1, 336 | column: 7, 337 | }, 338 | }, 339 | Token { 340 | data: TokenData::BlockEnd, 341 | start_mark: Mark { 342 | index: 10, 343 | line: 2, 344 | column: 0, 345 | }, 346 | end_mark: Mark { 347 | index: 10, 348 | line: 2, 349 | column: 0, 350 | }, 351 | }, 352 | Token { 353 | data: TokenData::StreamEnd, 354 | start_mark: Mark { 355 | index: 10, 356 | line: 2, 357 | column: 0, 358 | }, 359 | end_mark: Mark { 360 | index: 10, 361 | line: 2, 362 | column: 0, 363 | }, 364 | }, 365 | ]; 366 | assert_eq!( 367 | events, 368 | expected, 369 | "diff:\n{}", 370 | zip_longest( 371 | format!("{events:#?}").lines(), 372 | format!("{expected:#?}").lines() 373 | ) 374 | .map(|(a, b)| { 375 | let a = a.unwrap_or_default(); 376 | let b = b.unwrap_or_default(); 377 | format!("{a:<40} {b}") 378 | }) 379 | .collect::>() 380 | .join("\n") 381 | ); 382 | } 383 | 384 | fn zip_longest( 385 | a: A, 386 | b: B, 387 | ) -> impl Iterator, Option)> { 388 | let mut a = a.map(Some).collect::>(); 389 | let mut b = b.map(Some).collect::>(); 390 | let len = a.len().max(b.len()); 391 | a.resize_with(len, || None); 392 | b.resize_with(len, || None); 393 | a.into_iter() 394 | .zip(b) 395 | .take_while(|(a, b)| a.is_some() || b.is_some()) 396 | } 397 | } 398 | -------------------------------------------------------------------------------- /src/document.rs: -------------------------------------------------------------------------------- 1 | use std::io::BufRead; 2 | 3 | use crate::{ 4 | AliasData, Anchors, DEFAULT_MAPPING_TAG, DEFAULT_SCALAR_TAG, DEFAULT_SEQUENCE_TAG, Emitter, 5 | Error, Event, EventData, MappingStyle, Mark, Parser, ParserInner, Result, ScalarStyle, 6 | SequenceStyle, TagDirective, VersionDirective, 7 | }; 8 | 9 | /// The document structure. 10 | #[derive(Clone, Debug)] 11 | #[non_exhaustive] 12 | pub struct Document { 13 | /// The document nodes. 14 | pub nodes: Vec, 15 | /// The version directive. 16 | pub version_directive: Option, 17 | /// The list of tag directives. 18 | pub tag_directives: Vec, 19 | /// Is the document start indicator implicit? 20 | pub start_implicit: bool, 21 | /// Is the document end indicator implicit? 22 | pub end_implicit: bool, 23 | /// The beginning of the document. 24 | pub start_mark: Mark, 25 | /// The end of the document. 26 | pub end_mark: Mark, 27 | } 28 | 29 | /// The node structure. 30 | #[derive(Clone, Default, Debug)] 31 | #[non_exhaustive] 32 | pub struct Node { 33 | /// The node type. 34 | pub data: NodeData, 35 | /// The node tag. 36 | pub tag: Option, 37 | /// The beginning of the node. 38 | pub start_mark: Mark, 39 | /// The end of the node. 40 | pub end_mark: Mark, 41 | } 42 | 43 | /// Node types. 44 | #[derive(Clone, Default, Debug)] 45 | pub enum NodeData { 46 | /// An empty node. 47 | #[default] 48 | NoNode, 49 | /// A scalar node. 50 | Scalar { 51 | /// The scalar value. 52 | value: String, 53 | /// The scalar style. 54 | style: ScalarStyle, 55 | }, 56 | /// A sequence node. 57 | Sequence { 58 | /// The stack of sequence items. 59 | items: Vec, 60 | /// The sequence style. 61 | style: SequenceStyle, 62 | }, 63 | /// A mapping node. 64 | Mapping { 65 | /// The stack of mapping pairs (key, value). 66 | pairs: Vec, 67 | /// The mapping style. 68 | style: MappingStyle, 69 | }, 70 | } 71 | 72 | /// An element of a sequence node. 73 | pub type NodeItem = i32; 74 | 75 | /// An element of a mapping node. 76 | #[derive(Copy, Clone, Default, Debug)] 77 | #[non_exhaustive] 78 | pub struct NodePair { 79 | /// The key of the element. 80 | pub key: i32, 81 | /// The value of the element. 82 | pub value: i32, 83 | } 84 | 85 | impl Document { 86 | /// Create a YAML document. 87 | pub fn new( 88 | version_directive: Option, 89 | tag_directives_in: &[TagDirective], 90 | start_implicit: bool, 91 | end_implicit: bool, 92 | ) -> Document { 93 | let nodes = Vec::with_capacity(16); 94 | let tag_directives = tag_directives_in.to_vec(); 95 | 96 | Document { 97 | nodes, 98 | version_directive, 99 | tag_directives, 100 | start_implicit, 101 | end_implicit, 102 | start_mark: Mark::default(), 103 | end_mark: Mark::default(), 104 | } 105 | } 106 | 107 | /// Get a node of a YAML document. 108 | /// 109 | /// Returns the node object or `None` if `index` is out of range. 110 | pub fn get_node_mut(&mut self, index: i32) -> Option<&mut Node> { 111 | self.nodes.get_mut(index as usize - 1) 112 | } 113 | 114 | /// Get a node of a YAML document. 115 | /// 116 | /// Returns the node object or `None` if `index` is out of range. 117 | pub fn get_node(&self, index: i32) -> Option<&Node> { 118 | self.nodes.get(index as usize - 1) 119 | } 120 | 121 | /// Get the root of a YAML document node. 122 | /// 123 | /// The root object is the first object added to the document. 124 | /// 125 | /// An empty document produced by the parser signifies the end of a YAML stream. 126 | /// 127 | /// Returns the node object or `None` if the document is empty. 128 | pub fn get_root_node(&mut self) -> Option<&mut Node> { 129 | self.nodes.get_mut(0) 130 | } 131 | 132 | /// Create a SCALAR node and attach it to the document. 133 | /// 134 | /// The `style` argument may be ignored by the emitter. 135 | /// 136 | /// Returns the node id or 0 on error. 137 | #[must_use] 138 | pub fn add_scalar(&mut self, tag: Option<&str>, value: &str, style: ScalarStyle) -> i32 { 139 | let mark = Mark { 140 | index: 0_u64, 141 | line: 0_u64, 142 | column: 0_u64, 143 | }; 144 | let tag = tag.unwrap_or(DEFAULT_SCALAR_TAG); 145 | let tag_copy = String::from(tag); 146 | let value_copy = String::from(value); 147 | let node = Node { 148 | data: NodeData::Scalar { 149 | value: value_copy, 150 | style, 151 | }, 152 | tag: Some(tag_copy), 153 | start_mark: mark, 154 | end_mark: mark, 155 | }; 156 | self.nodes.push(node); 157 | self.nodes.len() as i32 158 | } 159 | 160 | /// Create a SEQUENCE node and attach it to the document. 161 | /// 162 | /// The `style` argument may be ignored by the emitter. 163 | /// 164 | /// Returns the node id, which is a nonzero integer. 165 | #[must_use] 166 | pub fn add_sequence(&mut self, tag: Option<&str>, style: SequenceStyle) -> i32 { 167 | let mark = Mark { 168 | index: 0_u64, 169 | line: 0_u64, 170 | column: 0_u64, 171 | }; 172 | 173 | let items = Vec::with_capacity(16); 174 | let tag = tag.unwrap_or(DEFAULT_SEQUENCE_TAG); 175 | let tag_copy = String::from(tag); 176 | let node = Node { 177 | data: NodeData::Sequence { items, style }, 178 | tag: Some(tag_copy), 179 | start_mark: mark, 180 | end_mark: mark, 181 | }; 182 | self.nodes.push(node); 183 | self.nodes.len() as i32 184 | } 185 | 186 | /// Create a MAPPING node and attach it to the document. 187 | /// 188 | /// The `style` argument may be ignored by the emitter. 189 | /// 190 | /// Returns the node id, which is a nonzero integer. 191 | #[must_use] 192 | pub fn add_mapping(&mut self, tag: Option<&str>, style: MappingStyle) -> i32 { 193 | let mark = Mark { 194 | index: 0_u64, 195 | line: 0_u64, 196 | column: 0_u64, 197 | }; 198 | let pairs = Vec::with_capacity(16); 199 | let tag = tag.unwrap_or(DEFAULT_MAPPING_TAG); 200 | let tag_copy = String::from(tag); 201 | 202 | let node = Node { 203 | data: NodeData::Mapping { pairs, style }, 204 | tag: Some(tag_copy), 205 | start_mark: mark, 206 | end_mark: mark, 207 | }; 208 | 209 | self.nodes.push(node); 210 | self.nodes.len() as i32 211 | } 212 | 213 | /// Add an item to a SEQUENCE node. 214 | pub fn append_sequence_item(&mut self, sequence: i32, item: i32) { 215 | assert!(sequence > 0 && sequence as usize - 1 < self.nodes.len()); 216 | assert!(matches!( 217 | &self.nodes[sequence as usize - 1].data, 218 | NodeData::Sequence { .. } 219 | )); 220 | assert!(item > 0 && item as usize - 1 < self.nodes.len()); 221 | if let NodeData::Sequence { items, .. } = &mut self.nodes[sequence as usize - 1].data { 222 | items.push(item); 223 | } 224 | } 225 | 226 | /// Add a pair of a key and a value to a MAPPING node. 227 | pub fn yaml_document_append_mapping_pair(&mut self, mapping: i32, key: i32, value: i32) { 228 | assert!(mapping > 0 && mapping as usize - 1 < self.nodes.len()); 229 | assert!(matches!( 230 | &self.nodes[mapping as usize - 1].data, 231 | NodeData::Mapping { .. } 232 | )); 233 | assert!(key > 0 && key as usize - 1 < self.nodes.len()); 234 | assert!(value > 0 && value as usize - 1 < self.nodes.len()); 235 | let pair = NodePair { key, value }; 236 | if let NodeData::Mapping { pairs, .. } = &mut self.nodes[mapping as usize - 1].data { 237 | pairs.push(pair); 238 | } 239 | } 240 | 241 | /// Parse the input stream and produce the next YAML document. 242 | /// 243 | /// Call this function subsequently to produce a sequence of documents 244 | /// constituting the input stream. 245 | /// 246 | /// If the produced document has no root node, it means that the document 247 | /// end has been reached. 248 | /// 249 | /// An application must not alternate the calls of [`Document::load()`] with 250 | /// the calls of [`Parser::parse()`]. Doing this will break the parser. 251 | pub fn load(parser: &mut Parser) -> Result { 252 | let mut document = Document::new(None, &[], false, false); 253 | document.nodes.reserve(16); 254 | 255 | if !parser.scanner.stream_start_produced { 256 | match parser.parse() { 257 | Ok(Event { 258 | data: EventData::StreamStart { .. }, 259 | .. 260 | }) => (), 261 | Ok(_) => panic!("expected stream start"), 262 | Err(err) => { 263 | parser.inner.delete_aliases(); 264 | return Err(err); 265 | } 266 | } 267 | } 268 | if parser.scanner.stream_end_produced { 269 | return Ok(document); 270 | } 271 | let err: Error; 272 | match parser.parse() { 273 | Ok(event) => { 274 | if let EventData::StreamEnd = &event.data { 275 | return Ok(document); 276 | } 277 | parser.inner.aliases.reserve(16); 278 | match document.load_document(parser, event) { 279 | Ok(()) => { 280 | parser.inner.delete_aliases(); 281 | return Ok(document); 282 | } 283 | Err(e) => err = e, 284 | } 285 | } 286 | Err(e) => err = e, 287 | } 288 | parser.inner.delete_aliases(); 289 | Err(err) 290 | } 291 | 292 | fn load_document(&mut self, parser: &mut Parser, event: Event) -> Result<()> { 293 | let mut ctx = vec![]; 294 | if let EventData::DocumentStart { 295 | version_directive, 296 | tag_directives, 297 | implicit, 298 | } = event.data 299 | { 300 | self.version_directive = version_directive; 301 | self.tag_directives = tag_directives; 302 | self.start_implicit = implicit; 303 | self.start_mark = event.start_mark; 304 | ctx.reserve(16); 305 | if let Err(err) = self.load_nodes(parser, &mut ctx) { 306 | ctx.clear(); 307 | return Err(err); 308 | } 309 | ctx.clear(); 310 | Ok(()) 311 | } else { 312 | panic!("Expected YAML_DOCUMENT_START_EVENT") 313 | } 314 | } 315 | 316 | fn load_nodes(&mut self, parser: &mut Parser, ctx: &mut Vec) -> Result<()> { 317 | let end_implicit; 318 | let end_mark; 319 | 320 | loop { 321 | let event = parser.parse()?; 322 | match event.data { 323 | EventData::StreamStart { .. } => panic!("unexpected stream start event"), 324 | EventData::StreamEnd => panic!("unexpected stream end event"), 325 | EventData::DocumentStart { .. } => panic!("unexpected document start event"), 326 | EventData::DocumentEnd { implicit } => { 327 | end_implicit = implicit; 328 | end_mark = event.end_mark; 329 | break; 330 | } 331 | EventData::Alias { .. } => { 332 | self.load_alias(&parser.inner, event, ctx)?; 333 | } 334 | EventData::Scalar { .. } => { 335 | self.load_scalar(&mut parser.inner, event, ctx)?; 336 | } 337 | EventData::SequenceStart { .. } => { 338 | self.load_sequence(&mut parser.inner, event, ctx)?; 339 | } 340 | EventData::SequenceEnd => { 341 | self.load_sequence_end(event, ctx)?; 342 | } 343 | EventData::MappingStart { .. } => { 344 | self.load_mapping(&mut parser.inner, event, ctx)?; 345 | } 346 | EventData::MappingEnd => { 347 | self.load_mapping_end(event, ctx)?; 348 | } 349 | } 350 | } 351 | self.end_implicit = end_implicit; 352 | self.end_mark = end_mark; 353 | Ok(()) 354 | } 355 | 356 | fn register_anchor( 357 | &mut self, 358 | parser: &mut ParserInner, 359 | index: i32, 360 | anchor: Option, 361 | ) -> Result<()> { 362 | let anchor = match anchor { 363 | Some(anchor) => anchor, 364 | None => return Ok(()), 365 | }; 366 | let data = AliasData { 367 | anchor, 368 | index, 369 | mark: self.nodes[index as usize - 1].start_mark, 370 | }; 371 | for alias_data in &parser.aliases { 372 | if alias_data.anchor == data.anchor { 373 | return Err(Error::composer( 374 | "found duplicate anchor; first occurrence", 375 | alias_data.mark, 376 | "second occurrence", 377 | data.mark, 378 | )); 379 | } 380 | } 381 | parser.aliases.push(data); 382 | Ok(()) 383 | } 384 | 385 | fn load_node_add(&mut self, ctx: &[i32], index: i32) -> Result<()> { 386 | let parent_index = match ctx.last() { 387 | Some(parent_index) => parent_index, 388 | None => return Ok(()), 389 | }; 390 | let parent_index = *parent_index; 391 | let parent = &mut self.nodes[parent_index as usize - 1]; 392 | match parent.data { 393 | NodeData::Sequence { ref mut items, .. } => { 394 | items.push(index); 395 | } 396 | NodeData::Mapping { ref mut pairs, .. } => match pairs.last_mut() { 397 | // If the last pair does not have a value, set `index` as the value. 398 | Some(pair @ NodePair { value: 0, .. }) => { 399 | pair.value = index; 400 | } 401 | // Otherwise push a new pair where `index` is the key. 402 | _ => pairs.push(NodePair { 403 | key: index, 404 | value: 0, 405 | }), 406 | }, 407 | _ => { 408 | panic!("document parent node is not a sequence or a mapping") 409 | } 410 | } 411 | Ok(()) 412 | } 413 | 414 | fn load_alias(&mut self, parser: &ParserInner, event: Event, ctx: &[i32]) -> Result<()> { 415 | let anchor = match &event.data { 416 | EventData::Alias { anchor } => anchor, 417 | _ => unreachable!(), 418 | }; 419 | 420 | for alias_data in &parser.aliases { 421 | if alias_data.anchor == *anchor { 422 | return self.load_node_add(ctx, alias_data.index); 423 | } 424 | } 425 | 426 | Err(Error::composer( 427 | "", 428 | Mark::default(), 429 | "found undefined alias", 430 | event.start_mark, 431 | )) 432 | } 433 | 434 | fn load_scalar(&mut self, parser: &mut ParserInner, event: Event, ctx: &[i32]) -> Result<()> { 435 | let (mut tag, value, style, anchor) = match event.data { 436 | EventData::Scalar { 437 | tag, 438 | value, 439 | style, 440 | anchor, 441 | .. 442 | } => (tag, value, style, anchor), 443 | _ => unreachable!(), 444 | }; 445 | 446 | if tag.is_none() || tag.as_deref() == Some("!") { 447 | tag = Some(String::from(DEFAULT_SCALAR_TAG)); 448 | } 449 | let node = Node { 450 | data: NodeData::Scalar { value, style }, 451 | tag, 452 | start_mark: event.start_mark, 453 | end_mark: event.end_mark, 454 | }; 455 | self.nodes.push(node); 456 | let index: i32 = self.nodes.len() as i32; 457 | self.register_anchor(parser, index, anchor)?; 458 | self.load_node_add(ctx, index) 459 | } 460 | 461 | fn load_sequence( 462 | &mut self, 463 | parser: &mut ParserInner, 464 | event: Event, 465 | ctx: &mut Vec, 466 | ) -> Result<()> { 467 | let (anchor, mut tag, style) = match event.data { 468 | EventData::SequenceStart { 469 | anchor, 470 | tag, 471 | style, 472 | .. 473 | } => (anchor, tag, style), 474 | _ => unreachable!(), 475 | }; 476 | 477 | let mut items = Vec::with_capacity(16); 478 | 479 | if tag.is_none() || tag.as_deref() == Some("!") { 480 | tag = Some(String::from(DEFAULT_SEQUENCE_TAG)); 481 | } 482 | 483 | let node = Node { 484 | data: NodeData::Sequence { 485 | items: core::mem::take(&mut items), 486 | style, 487 | }, 488 | tag, 489 | start_mark: event.start_mark, 490 | end_mark: event.end_mark, 491 | }; 492 | 493 | self.nodes.push(node); 494 | let index: i32 = self.nodes.len() as i32; 495 | self.register_anchor(parser, index, anchor)?; 496 | self.load_node_add(ctx, index)?; 497 | ctx.push(index); 498 | Ok(()) 499 | } 500 | 501 | fn load_sequence_end(&mut self, event: Event, ctx: &mut Vec) -> Result<()> { 502 | let index = match ctx.last().copied() { 503 | Some(index) => index, 504 | None => panic!("sequence_end without a current sequence"), 505 | }; 506 | assert!(matches!( 507 | self.nodes[index as usize - 1].data, 508 | NodeData::Sequence { .. } 509 | )); 510 | self.nodes[index as usize - 1].end_mark = event.end_mark; 511 | ctx.pop(); 512 | Ok(()) 513 | } 514 | 515 | fn load_mapping( 516 | &mut self, 517 | parser: &mut ParserInner, 518 | event: Event, 519 | ctx: &mut Vec, 520 | ) -> Result<()> { 521 | let (anchor, mut tag, style) = match event.data { 522 | EventData::MappingStart { 523 | anchor, 524 | tag, 525 | style, 526 | .. 527 | } => (anchor, tag, style), 528 | _ => unreachable!(), 529 | }; 530 | 531 | let mut pairs = Vec::with_capacity(16); 532 | 533 | if tag.is_none() || tag.as_deref() == Some("!") { 534 | tag = Some(String::from(DEFAULT_MAPPING_TAG)); 535 | } 536 | let node = Node { 537 | data: NodeData::Mapping { 538 | pairs: core::mem::take(&mut pairs), 539 | style, 540 | }, 541 | tag, 542 | start_mark: event.start_mark, 543 | end_mark: event.end_mark, 544 | }; 545 | self.nodes.push(node); 546 | let index: i32 = self.nodes.len() as i32; 547 | self.register_anchor(parser, index, anchor)?; 548 | self.load_node_add(ctx, index)?; 549 | ctx.push(index); 550 | Ok(()) 551 | } 552 | 553 | fn load_mapping_end(&mut self, event: Event, ctx: &mut Vec) -> Result<()> { 554 | let index = match ctx.last().copied() { 555 | Some(index) => index, 556 | None => panic!("mapping_end without a current mapping"), 557 | }; 558 | assert!(matches!( 559 | self.nodes[index as usize - 1].data, 560 | NodeData::Mapping { .. } 561 | )); 562 | self.nodes[index as usize - 1].end_mark = event.end_mark; 563 | ctx.pop(); 564 | Ok(()) 565 | } 566 | 567 | /// Emit a YAML document. 568 | /// 569 | /// The document object may be generated using the [`Document::load()`] 570 | /// function or the [`Document::new()`] function. 571 | pub fn dump(mut self, emitter: &mut Emitter) -> Result<()> { 572 | if !emitter.opened { 573 | if let Err(err) = emitter.open() { 574 | emitter.reset_anchors(); 575 | return Err(err); 576 | } 577 | } 578 | if self.nodes.is_empty() { 579 | // TODO: Do we really want to close the emitter just because the 580 | // document contains no nodes? Isn't it OK to emit multiple documents in 581 | // the same stream? 582 | emitter.close()?; 583 | } else { 584 | assert!(emitter.opened); 585 | emitter.anchors = vec![Anchors::default(); self.nodes.len()]; 586 | let event = Event::new(EventData::DocumentStart { 587 | version_directive: self.version_directive, 588 | tag_directives: core::mem::take(&mut self.tag_directives), 589 | implicit: self.start_implicit, 590 | }); 591 | emitter.emit(event)?; 592 | self.anchor_node(emitter, 1); 593 | self.dump_node(emitter, 1)?; 594 | let event = Event::document_end(self.end_implicit); 595 | emitter.emit(event)?; 596 | } 597 | 598 | emitter.reset_anchors(); 599 | Ok(()) 600 | } 601 | 602 | fn anchor_node(&self, emitter: &mut Emitter, index: i32) { 603 | let node = &self.nodes[index as usize - 1]; 604 | emitter.anchors[index as usize - 1].references += 1; 605 | if emitter.anchors[index as usize - 1].references == 1 { 606 | match &node.data { 607 | NodeData::Sequence { items, .. } => { 608 | for item in items { 609 | emitter.anchor_node_sub(*item); 610 | } 611 | } 612 | NodeData::Mapping { pairs, .. } => { 613 | for pair in pairs { 614 | emitter.anchor_node_sub(pair.key); 615 | emitter.anchor_node_sub(pair.value); 616 | } 617 | } 618 | _ => {} 619 | } 620 | } else if emitter.anchors[index as usize - 1].references == 2 { 621 | emitter.last_anchor_id += 1; 622 | emitter.anchors[index as usize - 1].anchor = emitter.last_anchor_id; 623 | } 624 | } 625 | 626 | fn dump_node(&mut self, emitter: &mut Emitter, index: i32) -> Result<()> { 627 | assert!(index > 0); 628 | let node = &mut self.nodes[index as usize - 1]; 629 | let anchor_id: i32 = emitter.anchors[index as usize - 1].anchor; 630 | let mut anchor: Option = None; 631 | if anchor_id != 0 { 632 | anchor = Some(Emitter::generate_anchor(anchor_id)); 633 | } 634 | if emitter.anchors[index as usize - 1].serialized { 635 | return Self::dump_alias(emitter, anchor.unwrap()); 636 | } 637 | emitter.anchors[index as usize - 1].serialized = true; 638 | 639 | let node = core::mem::take(node); 640 | match node.data { 641 | NodeData::Scalar { .. } => Self::dump_scalar(emitter, node, anchor), 642 | NodeData::Sequence { .. } => self.dump_sequence(emitter, node, anchor), 643 | NodeData::Mapping { .. } => self.dump_mapping(emitter, node, anchor), 644 | _ => unreachable!("document node is neither a scalar, sequence, or a mapping"), 645 | } 646 | } 647 | 648 | fn dump_alias(emitter: &mut Emitter, anchor: String) -> Result<()> { 649 | let event = Event::new(EventData::Alias { anchor }); 650 | emitter.emit(event) 651 | } 652 | 653 | fn dump_scalar(emitter: &mut Emitter, node: Node, anchor: Option) -> Result<()> { 654 | let plain_implicit = node.tag.as_deref() == Some(DEFAULT_SCALAR_TAG); 655 | let quoted_implicit = node.tag.as_deref() == Some(DEFAULT_SCALAR_TAG); // TODO: Why compare twice?! (even the C code does this) 656 | 657 | let (value, style) = match node.data { 658 | NodeData::Scalar { value, style } => (value, style), 659 | _ => unreachable!(), 660 | }; 661 | let event = Event::new(EventData::Scalar { 662 | anchor, 663 | tag: node.tag, 664 | value, 665 | plain_implicit, 666 | quoted_implicit, 667 | style, 668 | }); 669 | emitter.emit(event) 670 | } 671 | 672 | fn dump_sequence( 673 | &mut self, 674 | emitter: &mut Emitter, 675 | node: Node, 676 | anchor: Option, 677 | ) -> Result<()> { 678 | let implicit = node.tag.as_deref() == Some(DEFAULT_SEQUENCE_TAG); 679 | 680 | let (items, style) = match node.data { 681 | NodeData::Sequence { items, style } => (items, style), 682 | _ => unreachable!(), 683 | }; 684 | let event = Event::new(EventData::SequenceStart { 685 | anchor, 686 | tag: node.tag, 687 | implicit, 688 | style, 689 | }); 690 | 691 | emitter.emit(event)?; 692 | for item in items { 693 | self.dump_node(emitter, item)?; 694 | } 695 | let event = Event::sequence_end(); 696 | emitter.emit(event) 697 | } 698 | 699 | fn dump_mapping( 700 | &mut self, 701 | emitter: &mut Emitter, 702 | node: Node, 703 | anchor: Option, 704 | ) -> Result<()> { 705 | let implicit = node.tag.as_deref() == Some(DEFAULT_MAPPING_TAG); 706 | 707 | let (pairs, style) = match node.data { 708 | NodeData::Mapping { pairs, style } => (pairs, style), 709 | _ => unreachable!(), 710 | }; 711 | let event = Event::new(EventData::MappingStart { 712 | anchor, 713 | tag: node.tag, 714 | implicit, 715 | style, 716 | }); 717 | 718 | emitter.emit(event)?; 719 | for pair in pairs { 720 | self.dump_node(emitter, pair.key)?; 721 | self.dump_node(emitter, pair.value)?; 722 | } 723 | let event = Event::mapping_end(); 724 | emitter.emit(event) 725 | } 726 | } 727 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | use std::io::BufRead; 2 | 3 | use crate::scanner::Scanner; 4 | use crate::{ 5 | Encoding, Error, Event, EventData, MappingStyle, Mark, Result, ScalarStyle, SequenceStyle, 6 | TagDirective, TokenData, VersionDirective, 7 | }; 8 | 9 | /// The parser structure. 10 | #[non_exhaustive] 11 | pub struct Parser { 12 | pub(crate) scanner: Scanner, 13 | pub(crate) inner: ParserInner, 14 | } 15 | 16 | /// The non-generic parts of `Parser`. 17 | #[derive(Default)] 18 | pub(crate) struct ParserInner { 19 | /// The parser states stack. 20 | pub(crate) states: Vec, 21 | /// The current parser state. 22 | pub(crate) state: ParserState, 23 | /// The stack of marks. 24 | pub(crate) marks: Vec, 25 | /// The list of TAG directives. 26 | pub(crate) tag_directives: Vec, 27 | /// The alias data. 28 | pub(crate) aliases: Vec, 29 | } 30 | 31 | impl Default for Parser { 32 | fn default() -> Self { 33 | Self::new() 34 | } 35 | } 36 | 37 | /// This structure holds information about a potential simple key. 38 | #[derive(Copy, Clone)] 39 | #[non_exhaustive] 40 | pub struct SimpleKey { 41 | /// Is a simple key possible? 42 | pub possible: bool, 43 | /// Is a simple key required? 44 | pub required: bool, 45 | /// The number of the token. 46 | pub token_number: usize, 47 | /// The position mark. 48 | pub mark: Mark, 49 | } 50 | 51 | /// The states of the parser. 52 | #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 53 | #[non_exhaustive] 54 | pub enum ParserState { 55 | /// Expect STREAM-START. 56 | #[default] 57 | StreamStart = 0, 58 | /// Expect the beginning of an implicit document. 59 | ImplicitDocumentStart = 1, 60 | /// Expect DOCUMENT-START. 61 | DocumentStart = 2, 62 | /// Expect the content of a document. 63 | DocumentContent = 3, 64 | /// Expect DOCUMENT-END. 65 | DocumentEnd = 4, 66 | /// Expect a block node. 67 | BlockNode = 5, 68 | /// Expect a block node or indentless sequence. 69 | BlockNodeOrIndentlessSequence = 6, 70 | /// Expect a flow node. 71 | FlowNode = 7, 72 | /// Expect the first entry of a block sequence. 73 | BlockSequenceFirstEntry = 8, 74 | /// Expect an entry of a block sequence. 75 | BlockSequenceEntry = 9, 76 | /// Expect an entry of an indentless sequence. 77 | IndentlessSequenceEntry = 10, 78 | /// Expect the first key of a block mapping. 79 | BlockMappingFirstKey = 11, 80 | /// Expect a block mapping key. 81 | BlockMappingKey = 12, 82 | /// Expect a block mapping value. 83 | BlockMappingValue = 13, 84 | /// Expect the first entry of a flow sequence. 85 | FlowSequenceFirstEntry = 14, 86 | /// Expect an entry of a flow sequence. 87 | FlowSequenceEntry = 15, 88 | /// Expect a key of an ordered mapping. 89 | FlowSequenceEntryMappingKey = 16, 90 | /// Expect a value of an ordered mapping. 91 | FlowSequenceEntryMappingValue = 17, 92 | /// Expect the and of an ordered mapping entry. 93 | FlowSequenceEntryMappingEnd = 18, 94 | /// Expect the first key of a flow mapping. 95 | FlowMappingFirstKey = 19, 96 | /// Expect a key of a flow mapping. 97 | FlowMappingKey = 20, 98 | /// Expect a value of a flow mapping. 99 | FlowMappingValue = 21, 100 | /// Expect an empty value of a flow mapping. 101 | FlowMappingEmptyValue = 22, 102 | /// Expect nothing. 103 | End = 23, 104 | } 105 | 106 | /// This structure holds aliases data. 107 | #[non_exhaustive] 108 | pub struct AliasData { 109 | /// The anchor. 110 | pub anchor: String, 111 | /// The node id. 112 | pub index: i32, 113 | /// The anchor mark. 114 | pub mark: Mark, 115 | } 116 | 117 | impl Iterator for Parser { 118 | type Item = Result; 119 | 120 | fn next(&mut self) -> Option { 121 | if self.scanner.stream_end_produced || self.inner.state == ParserState::End { 122 | None 123 | } else { 124 | Some(self.parse()) 125 | } 126 | } 127 | } 128 | 129 | impl core::iter::FusedIterator for Parser {} 130 | 131 | impl Parser { 132 | /// Create a parser. 133 | pub fn new() -> Parser { 134 | Parser { 135 | scanner: Scanner::new(), 136 | inner: ParserInner { 137 | states: Vec::with_capacity(16), 138 | state: ParserState::default(), 139 | marks: Vec::with_capacity(16), 140 | tag_directives: Vec::with_capacity(16), 141 | aliases: Vec::new(), 142 | }, 143 | } 144 | } 145 | 146 | /// Reset the parser state. 147 | pub fn reset(&mut self) { 148 | self.scanner.reset(); 149 | 150 | // Preserve allocations. 151 | self.inner.states.clear(); 152 | self.inner.state = ParserState::default(); 153 | self.inner.marks.clear(); 154 | self.inner.tag_directives.clear(); 155 | self.inner.aliases.clear(); 156 | } 157 | } 158 | 159 | impl<'r, 'b> Parser<&'b mut &'r [u8]> { 160 | /// Set a string input. 161 | pub fn set_input_string(&mut self, input: &'r mut &'b [u8]) { 162 | self.scanner.set_input_string(input); 163 | } 164 | } 165 | 166 | impl Parser { 167 | /// Set a generic input handler. 168 | pub fn set_input(&mut self, input: R) { 169 | self.scanner.set_input(input); 170 | } 171 | 172 | /// Set the source encoding. 173 | pub fn set_encoding(&mut self, encoding: Encoding) { 174 | self.scanner.set_encoding(encoding); 175 | } 176 | 177 | /// Parse the input stream and produce the next parsing event. 178 | /// 179 | /// Call the function subsequently to produce a sequence of events 180 | /// corresponding to the input stream. The initial event has the type 181 | /// [`EventData::StreamStart`](crate::EventData::StreamStart) while the 182 | /// ending event has the type 183 | /// [`EventData::StreamEnd`](crate::EventData::StreamEnd). 184 | /// 185 | /// An application must not alternate the calls of [`Parser::parse()`] with 186 | /// the calls of [`Document::load()`](crate::Document::load). Doing this 187 | /// will break the parser. 188 | pub fn parse(&mut self) -> Result { 189 | if self.scanner.stream_end_produced || self.inner.state == ParserState::End { 190 | return Ok(Event::stream_end()); 191 | } 192 | self.state_machine() 193 | } 194 | 195 | fn state_machine(&mut self) -> Result { 196 | match self.inner.state { 197 | ParserState::StreamStart => self.parse_stream_start(), 198 | ParserState::ImplicitDocumentStart => self.parse_document_start(true), 199 | ParserState::DocumentStart => self.parse_document_start(false), 200 | ParserState::DocumentContent => self.parse_document_content(), 201 | ParserState::DocumentEnd => self.parse_document_end(), 202 | ParserState::BlockNode => self.parse_node(true, false), 203 | ParserState::BlockNodeOrIndentlessSequence => self.parse_node(true, true), 204 | ParserState::FlowNode => self.parse_node(false, false), 205 | ParserState::BlockSequenceFirstEntry => self.parse_block_sequence_entry(true), 206 | ParserState::BlockSequenceEntry => self.parse_block_sequence_entry(false), 207 | ParserState::IndentlessSequenceEntry => self.parse_indentless_sequence_entry(), 208 | ParserState::BlockMappingFirstKey => self.parse_block_mapping_key(true), 209 | ParserState::BlockMappingKey => self.parse_block_mapping_key(false), 210 | ParserState::BlockMappingValue => self.parse_block_mapping_value(), 211 | ParserState::FlowSequenceFirstEntry => self.parse_flow_sequence_entry(true), 212 | ParserState::FlowSequenceEntry => self.parse_flow_sequence_entry(false), 213 | ParserState::FlowSequenceEntryMappingKey => { 214 | self.parse_flow_sequence_entry_mapping_key() 215 | } 216 | ParserState::FlowSequenceEntryMappingValue => { 217 | self.parse_flow_sequence_entry_mapping_value() 218 | } 219 | ParserState::FlowSequenceEntryMappingEnd => { 220 | self.parse_flow_sequence_entry_mapping_end() 221 | } 222 | ParserState::FlowMappingFirstKey => self.parse_flow_mapping_key(true), 223 | ParserState::FlowMappingKey => self.parse_flow_mapping_key(false), 224 | ParserState::FlowMappingValue => self.parse_flow_mapping_value(false), 225 | ParserState::FlowMappingEmptyValue => self.parse_flow_mapping_value(true), 226 | ParserState::End => panic!("parser end state reached unexpectedly"), 227 | } 228 | } 229 | 230 | fn parse_stream_start(&mut self) -> Result { 231 | let token = self.scanner.peek()?; 232 | 233 | if let TokenData::StreamStart { encoding } = &token.data { 234 | let event = Event { 235 | data: EventData::StreamStart { 236 | encoding: *encoding, 237 | }, 238 | start_mark: token.start_mark, 239 | end_mark: token.end_mark, 240 | }; 241 | self.inner.state = ParserState::ImplicitDocumentStart; 242 | self.scanner.skip_token(); 243 | Ok(event) 244 | } else { 245 | let mark = token.start_mark; 246 | Err(Error::parser( 247 | "", 248 | Mark::default(), 249 | "did not find expected ", 250 | mark, 251 | )) 252 | } 253 | } 254 | 255 | fn parse_document_start(&mut self, implicit: bool) -> Result { 256 | let mut version_directive: Option = None; 257 | 258 | let mut tag_directives = vec![]; 259 | let mut token = self.scanner.peek()?; 260 | if !implicit { 261 | while let TokenData::DocumentEnd = &token.data { 262 | self.scanner.skip_token(); 263 | token = self.scanner.peek()?; 264 | } 265 | } 266 | if implicit 267 | && !matches!( 268 | token.data, 269 | TokenData::VersionDirective { .. } 270 | | TokenData::TagDirective { .. } 271 | | TokenData::DocumentStart 272 | | TokenData::StreamEnd 273 | ) 274 | { 275 | let event = Event { 276 | data: EventData::DocumentStart { 277 | version_directive: None, 278 | tag_directives: vec![], 279 | implicit: true, 280 | }, 281 | start_mark: token.start_mark, 282 | end_mark: token.end_mark, 283 | }; 284 | self.process_directives(None, None)?; 285 | self.inner.states.push(ParserState::DocumentEnd); 286 | self.inner.state = ParserState::BlockNode; 287 | Ok(event) 288 | } else if !matches!(token.data, TokenData::StreamEnd) { 289 | let end_mark: Mark; 290 | let start_mark: Mark = token.start_mark; 291 | self.process_directives(Some(&mut version_directive), Some(&mut tag_directives))?; 292 | token = self.scanner.peek()?; 293 | if let TokenData::DocumentStart = token.data { 294 | end_mark = token.end_mark; 295 | let event = Event { 296 | data: EventData::DocumentStart { 297 | version_directive, 298 | tag_directives: core::mem::take(&mut tag_directives), 299 | implicit: false, 300 | }, 301 | start_mark, 302 | end_mark, 303 | }; 304 | self.inner.states.push(ParserState::DocumentEnd); 305 | self.inner.state = ParserState::DocumentContent; 306 | self.scanner.skip_token(); 307 | Ok(event) 308 | } else { 309 | Err(Error::parser( 310 | "", 311 | Mark::default(), 312 | "did not find expected ", 313 | token.start_mark, 314 | )) 315 | } 316 | } else { 317 | let event = Event { 318 | data: EventData::StreamEnd, 319 | start_mark: token.start_mark, 320 | end_mark: token.end_mark, 321 | }; 322 | self.inner.state = ParserState::End; 323 | self.scanner.skip_token(); 324 | Ok(event) 325 | } 326 | } 327 | 328 | fn parse_document_content(&mut self) -> Result { 329 | let token = self.scanner.peek()?; 330 | if let TokenData::VersionDirective { .. } 331 | | TokenData::TagDirective { .. } 332 | | TokenData::DocumentStart 333 | | TokenData::DocumentEnd 334 | | TokenData::StreamEnd = &token.data 335 | { 336 | let mark = token.start_mark; 337 | self.inner.state = self.inner.states.pop().unwrap(); 338 | Self::process_empty_scalar(mark) 339 | } else { 340 | self.parse_node(true, false) 341 | } 342 | } 343 | 344 | fn parse_document_end(&mut self) -> Result { 345 | let mut end_mark: Mark; 346 | let mut implicit = true; 347 | let token = self.scanner.peek()?; 348 | end_mark = token.start_mark; 349 | let start_mark: Mark = end_mark; 350 | if let TokenData::DocumentEnd = &token.data { 351 | end_mark = token.end_mark; 352 | self.scanner.skip_token(); 353 | implicit = false; 354 | } 355 | self.inner.tag_directives.clear(); 356 | self.inner.state = ParserState::DocumentStart; 357 | Ok(Event { 358 | data: EventData::DocumentEnd { implicit }, 359 | start_mark, 360 | end_mark, 361 | }) 362 | } 363 | 364 | fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> Result { 365 | let mut anchor: Option = None; 366 | let mut tag_handle: Option = None; 367 | let mut tag_suffix: Option = None; 368 | let mut tag: Option = None; 369 | let mut start_mark: Mark; 370 | let mut end_mark: Mark; 371 | let mut tag_mark = Mark { 372 | index: 0, 373 | line: 0, 374 | column: 0, 375 | }; 376 | 377 | let mut token = self.scanner.peek_mut()?; 378 | 379 | if let TokenData::Alias { value } = &mut token.data { 380 | let event = Event { 381 | data: EventData::Alias { 382 | anchor: core::mem::take(value), 383 | }, 384 | start_mark: token.start_mark, 385 | end_mark: token.end_mark, 386 | }; 387 | self.inner.state = self.inner.states.pop().unwrap(); 388 | self.scanner.skip_token(); 389 | return Ok(event); 390 | } 391 | 392 | end_mark = token.start_mark; 393 | start_mark = end_mark; 394 | if let TokenData::Anchor { value } = &mut token.data { 395 | anchor = Some(core::mem::take(value)); 396 | start_mark = token.start_mark; 397 | end_mark = token.end_mark; 398 | self.scanner.skip_token(); 399 | token = self.scanner.peek_mut()?; 400 | if let TokenData::Tag { handle, suffix } = &mut token.data { 401 | tag_handle = Some(core::mem::take(handle)); 402 | tag_suffix = Some(core::mem::take(suffix)); 403 | tag_mark = token.start_mark; 404 | end_mark = token.end_mark; 405 | self.scanner.skip_token(); 406 | } 407 | } else if let TokenData::Tag { handle, suffix } = &mut token.data { 408 | tag_handle = Some(core::mem::take(handle)); 409 | tag_suffix = Some(core::mem::take(suffix)); 410 | tag_mark = token.start_mark; 411 | start_mark = tag_mark; 412 | end_mark = token.end_mark; 413 | self.scanner.skip_token(); 414 | token = self.scanner.peek_mut()?; 415 | if let TokenData::Anchor { value } = &mut token.data { 416 | anchor = Some(core::mem::take(value)); 417 | end_mark = token.end_mark; 418 | self.scanner.skip_token(); 419 | } 420 | } 421 | 422 | if let Some(ref tag_handle_value) = tag_handle { 423 | if tag_handle_value.is_empty() { 424 | tag = tag_suffix; 425 | } else { 426 | for tag_directive in &self.inner.tag_directives { 427 | if tag_directive.handle == *tag_handle_value { 428 | let suffix = tag_suffix.as_deref().unwrap_or(""); 429 | tag = Some(alloc::format!("{}{}", tag_directive.prefix, suffix)); 430 | break; 431 | } 432 | } 433 | if tag.is_none() { 434 | return Err(Error::parser( 435 | "while parsing a node", 436 | start_mark, 437 | "found undefined tag handle", 438 | tag_mark, 439 | )); 440 | } 441 | } 442 | } 443 | 444 | let token = self.scanner.peek_mut()?; 445 | 446 | let implicit = tag.is_none() || tag.as_deref() == Some(""); 447 | 448 | if indentless_sequence && matches!(token.data, TokenData::BlockEntry) { 449 | end_mark = token.end_mark; 450 | self.inner.state = ParserState::IndentlessSequenceEntry; 451 | let event = Event { 452 | data: EventData::SequenceStart { 453 | anchor, 454 | tag, 455 | implicit, 456 | style: SequenceStyle::Block, 457 | }, 458 | start_mark, 459 | end_mark, 460 | }; 461 | Ok(event) 462 | } else if let TokenData::Scalar { value, style } = &mut token.data { 463 | let mut plain_implicit = false; 464 | let mut quoted_implicit = false; 465 | end_mark = token.end_mark; 466 | if *style == ScalarStyle::Plain && tag.is_none() || tag.as_deref() == Some("!") { 467 | plain_implicit = true; 468 | } else if tag.is_none() { 469 | quoted_implicit = true; 470 | } 471 | let event = Event { 472 | data: EventData::Scalar { 473 | anchor, 474 | tag, 475 | value: core::mem::take(value), 476 | plain_implicit, 477 | quoted_implicit, 478 | style: *style, 479 | }, 480 | start_mark, 481 | end_mark, 482 | }; 483 | self.inner.state = self.inner.states.pop().unwrap(); 484 | self.scanner.skip_token(); 485 | Ok(event) 486 | } else if let TokenData::FlowSequenceStart = &token.data { 487 | end_mark = token.end_mark; 488 | self.inner.state = ParserState::FlowSequenceFirstEntry; 489 | let event = Event { 490 | data: EventData::SequenceStart { 491 | anchor, 492 | tag, 493 | implicit, 494 | style: SequenceStyle::Flow, 495 | }, 496 | start_mark, 497 | end_mark, 498 | }; 499 | Ok(event) 500 | } else if let TokenData::FlowMappingStart = &token.data { 501 | end_mark = token.end_mark; 502 | self.inner.state = ParserState::FlowMappingFirstKey; 503 | let event = Event { 504 | data: EventData::MappingStart { 505 | anchor, 506 | tag, 507 | implicit, 508 | style: MappingStyle::Flow, 509 | }, 510 | start_mark, 511 | end_mark, 512 | }; 513 | Ok(event) 514 | } else if block && matches!(token.data, TokenData::BlockSequenceStart) { 515 | end_mark = token.end_mark; 516 | self.inner.state = ParserState::BlockSequenceFirstEntry; 517 | let event = Event { 518 | data: EventData::SequenceStart { 519 | anchor, 520 | tag, 521 | implicit, 522 | style: SequenceStyle::Block, 523 | }, 524 | start_mark, 525 | end_mark, 526 | }; 527 | Ok(event) 528 | } else if block && matches!(token.data, TokenData::BlockMappingStart) { 529 | end_mark = token.end_mark; 530 | self.inner.state = ParserState::BlockMappingFirstKey; 531 | let event = Event { 532 | data: EventData::MappingStart { 533 | anchor, 534 | tag, 535 | implicit, 536 | style: MappingStyle::Block, 537 | }, 538 | start_mark, 539 | end_mark, 540 | }; 541 | Ok(event) 542 | } else if anchor.is_some() || tag.is_some() { 543 | self.inner.state = self.inner.states.pop().unwrap(); 544 | let event = Event { 545 | data: EventData::Scalar { 546 | anchor, 547 | tag, 548 | value: String::new(), 549 | plain_implicit: implicit, 550 | quoted_implicit: false, 551 | style: ScalarStyle::Plain, 552 | }, 553 | start_mark, 554 | end_mark, 555 | }; 556 | Ok(event) 557 | } else { 558 | Err(Error::parser( 559 | if block { 560 | "while parsing a block node" 561 | } else { 562 | "while parsing a flow node" 563 | }, 564 | start_mark, 565 | "did not find expected node content", 566 | token.start_mark, 567 | )) 568 | } 569 | } 570 | 571 | fn parse_block_sequence_entry(&mut self, first: bool) -> Result { 572 | if first { 573 | let token = self.scanner.peek()?; 574 | let mark = token.start_mark; 575 | self.inner.marks.push(mark); 576 | self.scanner.skip_token(); 577 | } 578 | 579 | let mut token = self.scanner.peek()?; 580 | 581 | if let TokenData::BlockEntry = &token.data { 582 | let mark: Mark = token.end_mark; 583 | self.scanner.skip_token(); 584 | token = self.scanner.peek()?; 585 | if matches!(token.data, TokenData::BlockEntry | TokenData::BlockEnd) { 586 | self.inner.state = ParserState::BlockSequenceEntry; 587 | Self::process_empty_scalar(mark) 588 | } else { 589 | self.inner.states.push(ParserState::BlockSequenceEntry); 590 | self.parse_node(true, false) 591 | } 592 | } else if let TokenData::BlockEnd = token.data { 593 | let event = Event { 594 | data: EventData::SequenceEnd, 595 | start_mark: token.start_mark, 596 | end_mark: token.end_mark, 597 | }; 598 | self.inner.state = self.inner.states.pop().unwrap(); 599 | let _ = self.inner.marks.pop(); 600 | self.scanner.skip_token(); 601 | Ok(event) 602 | } else { 603 | let token_mark = token.start_mark; 604 | let mark = self.inner.marks.pop().unwrap(); 605 | Err(Error::parser( 606 | "while parsing a block collection", 607 | mark, 608 | "did not find expected '-' indicator", 609 | token_mark, 610 | )) 611 | } 612 | } 613 | 614 | fn parse_indentless_sequence_entry(&mut self) -> Result { 615 | let mut token = self.scanner.peek()?; 616 | if let TokenData::BlockEntry = token.data { 617 | let mark: Mark = token.end_mark; 618 | self.scanner.skip_token(); 619 | token = self.scanner.peek()?; 620 | 621 | if matches!( 622 | token.data, 623 | TokenData::BlockEntry | TokenData::Key | TokenData::Value | TokenData::BlockEnd 624 | ) { 625 | self.inner.state = ParserState::IndentlessSequenceEntry; 626 | Self::process_empty_scalar(mark) 627 | } else { 628 | self.inner.states.push(ParserState::IndentlessSequenceEntry); 629 | self.parse_node(true, false) 630 | } 631 | } else { 632 | let event = Event { 633 | data: EventData::SequenceEnd, 634 | start_mark: token.start_mark, 635 | end_mark: token.end_mark, 636 | }; 637 | self.inner.state = self.inner.states.pop().unwrap(); 638 | Ok(event) 639 | } 640 | } 641 | 642 | fn parse_block_mapping_key(&mut self, first: bool) -> Result { 643 | if first { 644 | let token = self.scanner.peek()?; 645 | let mark = token.start_mark; 646 | self.inner.marks.push(mark); 647 | self.scanner.skip_token(); 648 | } 649 | 650 | let mut token = self.scanner.peek()?; 651 | if let TokenData::Key = token.data { 652 | let mark: Mark = token.end_mark; 653 | self.scanner.skip_token(); 654 | token = self.scanner.peek()?; 655 | if matches!( 656 | token.data, 657 | TokenData::Key | TokenData::Value | TokenData::BlockEnd 658 | ) { 659 | self.inner.state = ParserState::BlockMappingValue; 660 | Self::process_empty_scalar(mark) 661 | } else { 662 | self.inner.states.push(ParserState::BlockMappingValue); 663 | self.parse_node(true, true) 664 | } 665 | } else if let TokenData::BlockEnd = token.data { 666 | let event = Event { 667 | data: EventData::MappingEnd, 668 | start_mark: token.start_mark, 669 | end_mark: token.end_mark, 670 | }; 671 | self.inner.state = self.inner.states.pop().unwrap(); 672 | _ = self.inner.marks.pop(); 673 | self.scanner.skip_token(); 674 | Ok(event) 675 | } else { 676 | let token_mark = token.start_mark; 677 | let mark = self.inner.marks.pop().unwrap(); 678 | Err(Error::parser( 679 | "while parsing a block mapping", 680 | mark, 681 | "did not find expected key", 682 | token_mark, 683 | )) 684 | } 685 | } 686 | 687 | fn parse_block_mapping_value(&mut self) -> Result { 688 | let mut token = self.scanner.peek()?; 689 | if let TokenData::Value = token.data { 690 | let mark: Mark = token.end_mark; 691 | self.scanner.skip_token(); 692 | token = self.scanner.peek()?; 693 | if matches!( 694 | token.data, 695 | TokenData::Key | TokenData::Value | TokenData::BlockEnd 696 | ) { 697 | self.inner.state = ParserState::BlockMappingKey; 698 | Self::process_empty_scalar(mark) 699 | } else { 700 | self.inner.states.push(ParserState::BlockMappingKey); 701 | self.parse_node(true, true) 702 | } 703 | } else { 704 | let mark = token.start_mark; 705 | self.inner.state = ParserState::BlockMappingKey; 706 | Self::process_empty_scalar(mark) 707 | } 708 | } 709 | 710 | fn parse_flow_sequence_entry(&mut self, first: bool) -> Result { 711 | if first { 712 | let token = self.scanner.peek()?; 713 | let mark = token.start_mark; 714 | self.inner.marks.push(mark); 715 | self.scanner.skip_token(); 716 | } 717 | 718 | let mut token = self.scanner.peek()?; 719 | if !matches!(token.data, TokenData::FlowSequenceEnd) { 720 | if !first { 721 | if let TokenData::FlowEntry = token.data { 722 | self.scanner.skip_token(); 723 | token = self.scanner.peek()?; 724 | } else { 725 | let token_mark = token.start_mark; 726 | let mark = self.inner.marks.pop().unwrap(); 727 | return Err(Error::parser( 728 | "while parsing a flow sequence", 729 | mark, 730 | "did not find expected ',' or ']'", 731 | token_mark, 732 | )); 733 | } 734 | } 735 | if let TokenData::Key = token.data { 736 | let event = Event { 737 | data: EventData::MappingStart { 738 | anchor: None, 739 | tag: None, 740 | implicit: true, 741 | style: MappingStyle::Flow, 742 | }, 743 | start_mark: token.start_mark, 744 | end_mark: token.end_mark, 745 | }; 746 | self.inner.state = ParserState::FlowSequenceEntryMappingKey; 747 | self.scanner.skip_token(); 748 | return Ok(event); 749 | } else if !matches!(token.data, TokenData::FlowSequenceEnd) { 750 | self.inner.states.push(ParserState::FlowSequenceEntry); 751 | return self.parse_node(false, false); 752 | } 753 | } 754 | let event = Event { 755 | data: EventData::SequenceEnd, 756 | start_mark: token.start_mark, 757 | end_mark: token.end_mark, 758 | }; 759 | self.inner.state = self.inner.states.pop().unwrap(); 760 | _ = self.inner.marks.pop(); 761 | self.scanner.skip_token(); 762 | Ok(event) 763 | } 764 | 765 | fn parse_flow_sequence_entry_mapping_key(&mut self) -> Result { 766 | let token = self.scanner.peek()?; 767 | if matches!( 768 | token.data, 769 | TokenData::Value | TokenData::FlowEntry | TokenData::FlowSequenceEnd 770 | ) { 771 | let mark: Mark = token.end_mark; 772 | self.scanner.skip_token(); 773 | self.inner.state = ParserState::FlowSequenceEntryMappingValue; 774 | Self::process_empty_scalar(mark) 775 | } else { 776 | self.inner 777 | .states 778 | .push(ParserState::FlowSequenceEntryMappingValue); 779 | self.parse_node(false, false) 780 | } 781 | } 782 | 783 | fn parse_flow_sequence_entry_mapping_value(&mut self) -> Result { 784 | let mut token = self.scanner.peek()?; 785 | if let TokenData::Value = token.data { 786 | self.scanner.skip_token(); 787 | token = self.scanner.peek()?; 788 | if !matches!( 789 | token.data, 790 | TokenData::FlowEntry | TokenData::FlowSequenceEnd 791 | ) { 792 | self.inner 793 | .states 794 | .push(ParserState::FlowSequenceEntryMappingEnd); 795 | return self.parse_node(false, false); 796 | } 797 | } 798 | let mark = token.start_mark; 799 | self.inner.state = ParserState::FlowSequenceEntryMappingEnd; 800 | Self::process_empty_scalar(mark) 801 | } 802 | 803 | fn parse_flow_sequence_entry_mapping_end(&mut self) -> Result { 804 | let token = self.scanner.peek()?; 805 | let start_mark = token.start_mark; 806 | let end_mark = token.end_mark; 807 | self.inner.state = ParserState::FlowSequenceEntry; 808 | Ok(Event { 809 | data: EventData::MappingEnd, 810 | start_mark, 811 | end_mark, 812 | }) 813 | } 814 | 815 | fn parse_flow_mapping_key(&mut self, first: bool) -> Result { 816 | if first { 817 | let token = self.scanner.peek()?; 818 | let mark = token.start_mark; 819 | self.inner.marks.push(mark); 820 | self.scanner.skip_token(); 821 | } 822 | 823 | let mut token = self.scanner.peek()?; 824 | if !matches!(token.data, TokenData::FlowMappingEnd) { 825 | if !first { 826 | if let TokenData::FlowEntry = token.data { 827 | self.scanner.skip_token(); 828 | token = self.scanner.peek()?; 829 | } else { 830 | let token_mark = token.start_mark; 831 | let mark = self.inner.marks.pop().unwrap(); 832 | return Err(Error::parser( 833 | "while parsing a flow mapping", 834 | mark, 835 | "did not find expected ',' or '}'", 836 | token_mark, 837 | )); 838 | } 839 | } 840 | if let TokenData::Key = token.data { 841 | self.scanner.skip_token(); 842 | token = self.scanner.peek()?; 843 | if !matches!( 844 | token.data, 845 | TokenData::Value | TokenData::FlowEntry | TokenData::FlowMappingEnd 846 | ) { 847 | self.inner.states.push(ParserState::FlowMappingValue); 848 | return self.parse_node(false, false); 849 | } 850 | let mark = token.start_mark; 851 | self.inner.state = ParserState::FlowMappingValue; 852 | return Self::process_empty_scalar(mark); 853 | } else if !matches!(token.data, TokenData::FlowMappingEnd) { 854 | self.inner.states.push(ParserState::FlowMappingEmptyValue); 855 | return self.parse_node(false, false); 856 | } 857 | } 858 | let event = Event { 859 | data: EventData::MappingEnd, 860 | start_mark: token.start_mark, 861 | end_mark: token.end_mark, 862 | }; 863 | self.inner.state = self.inner.states.pop().unwrap(); 864 | _ = self.inner.marks.pop(); 865 | self.scanner.skip_token(); 866 | Ok(event) 867 | } 868 | 869 | fn parse_flow_mapping_value(&mut self, empty: bool) -> Result { 870 | let mut token = self.scanner.peek()?; 871 | if empty { 872 | let mark = token.start_mark; 873 | self.inner.state = ParserState::FlowMappingKey; 874 | return Self::process_empty_scalar(mark); 875 | } 876 | if let TokenData::Value = token.data { 877 | self.scanner.skip_token(); 878 | token = self.scanner.peek()?; 879 | if !matches!(token.data, TokenData::FlowEntry | TokenData::FlowMappingEnd) { 880 | self.inner.states.push(ParserState::FlowMappingKey); 881 | return self.parse_node(false, false); 882 | } 883 | } 884 | let mark = token.start_mark; 885 | self.inner.state = ParserState::FlowMappingKey; 886 | Self::process_empty_scalar(mark) 887 | } 888 | 889 | fn process_empty_scalar(mark: Mark) -> Result { 890 | Ok(Event { 891 | data: EventData::Scalar { 892 | anchor: None, 893 | tag: None, 894 | value: String::new(), 895 | plain_implicit: true, 896 | quoted_implicit: false, 897 | style: ScalarStyle::Plain, 898 | }, 899 | start_mark: mark, 900 | end_mark: mark, 901 | }) 902 | } 903 | 904 | fn process_directives( 905 | &mut self, 906 | version_directive_ref: Option<&mut Option>, 907 | tag_directives_ref: Option<&mut Vec>, 908 | ) -> Result<()> { 909 | let default_tag_directives: [TagDirective; 2] = [ 910 | // TODO: Get rid of these heap allocations. 911 | TagDirective { 912 | handle: String::from("!"), 913 | prefix: String::from("!"), 914 | }, 915 | TagDirective { 916 | handle: String::from("!!"), 917 | prefix: String::from("tag:yaml.org,2002:"), 918 | }, 919 | ]; 920 | let mut version_directive: Option = None; 921 | 922 | let mut tag_directives = Vec::with_capacity(16); 923 | 924 | let mut token = self.scanner.peek_mut()?; 925 | 926 | loop { 927 | if !matches!( 928 | token.data, 929 | TokenData::VersionDirective { .. } | TokenData::TagDirective { .. } 930 | ) { 931 | break; 932 | } 933 | 934 | if let TokenData::VersionDirective { major, minor } = &token.data { 935 | let mark = token.start_mark; 936 | if version_directive.is_some() { 937 | return Err(Error::parser( 938 | "", 939 | Mark::default(), 940 | "found duplicate %YAML directive", 941 | mark, 942 | )); 943 | } else if *major != 1 || *minor != 1 && *minor != 2 { 944 | return Err(Error::parser( 945 | "", 946 | Mark::default(), 947 | "found incompatible YAML document", 948 | mark, 949 | )); 950 | } 951 | version_directive = Some(VersionDirective { 952 | major: *major, 953 | minor: *minor, 954 | }); 955 | } else if let TokenData::TagDirective { handle, prefix } = &mut token.data { 956 | let value = TagDirective { 957 | handle: core::mem::take(handle), 958 | prefix: core::mem::take(prefix), 959 | }; 960 | let mark = token.start_mark; 961 | self.inner 962 | .append_tag_directive(value.clone(), false, mark)?; 963 | 964 | tag_directives.push(value); 965 | } 966 | 967 | self.scanner.skip_token(); 968 | token = self.scanner.peek_mut()?; 969 | } 970 | 971 | let start_mark = token.start_mark; 972 | for default_tag_directive in default_tag_directives { 973 | self.inner 974 | .append_tag_directive(default_tag_directive, true, start_mark)?; 975 | } 976 | 977 | if let Some(version_directive_ref) = version_directive_ref { 978 | *version_directive_ref = version_directive; 979 | } 980 | if let Some(tag_directives_ref) = tag_directives_ref { 981 | if tag_directives.is_empty() { 982 | tag_directives_ref.clear(); 983 | tag_directives.clear(); 984 | } else { 985 | *tag_directives_ref = tag_directives; 986 | } 987 | } else { 988 | tag_directives.clear(); 989 | } 990 | 991 | Ok(()) 992 | } 993 | } 994 | 995 | impl ParserInner { 996 | fn append_tag_directive( 997 | &mut self, 998 | value: TagDirective, 999 | allow_duplicates: bool, 1000 | mark: Mark, 1001 | ) -> Result<()> { 1002 | for tag_directive in &self.tag_directives { 1003 | if value.handle == tag_directive.handle { 1004 | if allow_duplicates { 1005 | return Ok(()); 1006 | } 1007 | return Err(Error::parser( 1008 | "", 1009 | Mark::default(), 1010 | "found duplicate %TAG directive", 1011 | mark, 1012 | )); 1013 | } 1014 | } 1015 | self.tag_directives.push(value); 1016 | Ok(()) 1017 | } 1018 | 1019 | pub(crate) fn delete_aliases(&mut self) { 1020 | self.aliases.clear(); 1021 | } 1022 | } 1023 | --------------------------------------------------------------------------------