├── .gitattributes ├── jsonschema2gbnf ├── README.md ├── Cargo.toml └── src │ └── main.rs ├── .gitignore ├── .github └── workflows │ └── ci.yml ├── gbnf ├── Cargo.toml └── src │ ├── json.rs │ └── lib.rs ├── LICENSE └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /jsonschema2gbnf/README.md: -------------------------------------------------------------------------------- 1 | # JSON Schema to GBNF converter 2 | 3 | Install rust at https://rustup.rs/ 4 | 5 | ``` 6 | cargo install jsonschema2gbnf 7 | jsonschema2gbnf --help 8 | jsonschema2gbnf my_schema.json -o grammar.gbnf 9 | ``` 10 | -------------------------------------------------------------------------------- /jsonschema2gbnf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jsonschema2gbnf" 3 | version = "0.1.5" 4 | edition = "2021" 5 | description = "Convert JSON Schema to GBNF" 6 | license = "MIT" 7 | repository = "https://github.com/richardanaya/gbnf" 8 | 9 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 10 | 11 | [dependencies] 12 | clap = { version = "4", features = ["derive"] } 13 | gbnf = "0.2.3" 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Cargo Build & Test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | 10 | jobs: 11 | build_and_test: 12 | name: Rust project - latest 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | toolchain: 17 | - stable 18 | - beta 19 | - nightly 20 | steps: 21 | - uses: actions/checkout@v4 22 | - run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} 23 | - run: cd gbnf && cargo build --verbose 24 | - run: cd gbnf && cargo test --verbose 25 | -------------------------------------------------------------------------------- /gbnf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "gbnf" 3 | version = "0.2.5" 4 | edition = "2024" 5 | description = "A library for working with GBNF" 6 | license = "MIT" 7 | readme = "../README.md" 8 | repository = "https://github.com/richardanaya/gbnf" 9 | 10 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 11 | 12 | [dependencies] 13 | serde_json = { version = "1.0.111", features = ["preserve_order"] } 14 | serde = { version = "1.0", features = ["derive"] } 15 | thiserror = "1.0.56" 16 | strum = { version = "0.27.2", features = ["strum_macros", "derive"] } 17 | 18 | [dev-dependencies] 19 | chrono = { version = "0.4.41", features = ["serde"] } 20 | schemars = { version = "1.0.4", features = ["derive", "chrono04"] } 21 | pretty_assertions = "1.4.0" 22 | -------------------------------------------------------------------------------- /jsonschema2gbnf/src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use gbnf::Grammar; 3 | use std::fs; 4 | use std::path::PathBuf; 5 | 6 | #[derive(Parser, Clone)] 7 | #[command(author, version, about, long_about = None)] 8 | struct Args { 9 | #[arg(value_name = "JSON_SCHEMA_FILE", help = "Path to JSON schema file")] 10 | input_json_schema: PathBuf, 11 | 12 | #[arg( 13 | short, 14 | value_name = "GBNF_FILE", 15 | help = "Path to GBNF file", 16 | default_value = "grammar.gbnf" 17 | )] 18 | output_gbnf: Option, 19 | } 20 | 21 | fn main() -> Result<(), Box> { 22 | let args = Args::parse(); 23 | 24 | let json_schema = fs::read_to_string(args.input_json_schema)?; 25 | let gbnf = Grammar::from_json_schema(&json_schema)?; 26 | 27 | let gbnf_file = args.output_gbnf.expect("GBNF file path is required"); 28 | fs::write(gbnf_file, gbnf.to_string())?; 29 | 30 | Ok(()) 31 | } 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 RICHΛRD ΛNΛYΛ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GBNF-rs 2 | 3 | A library for working with llama.cpp GBNF files. GBNF files represent grammar of AI output. This project is meant to help make it easier to constrain and guide GBNF driven AIs like [llama.cpp](https://github.com/ggerganov/llama.cpp) using [JSON schema](https://json-schema.org/) ( a way to define the shape of JSON data ). The hope is make more useful outputs when combined with system prompting (that is hopefully also aware of JSON schema to some degree). 4 | 5 | * Data structures for representing GBNF 6 | * Rendering of a GBNF file from data structures 7 | * Conversion of a useful subset of JSON schema to GBNF grammar 8 | * Easily installable CLI converter [jsonschema2gbnf](https://github.com/richardanaya/gbnf/tree/main/jsonschema2gbnf) that uses library 9 | * **MIT** licensed 10 | 11 | This library was primarily built for it's sister project, an LLM API [epistemology](https://github.com/richardanaya/epistemology/). 12 | 13 | Screenshot 2024-01-07 at 12 04 56 AM 14 | 15 | # Installing 16 | 17 | ``` 18 | cargo add gnbf 19 | ``` 20 | 21 | # JSON schema support 22 | 23 | Currently this library can convert a limited but very useful subset of JSON schema: 24 | * boolean, number, string 25 | * object with all required properties 26 | * enum 27 | * oneOf 28 | * property order of objects is preserved 29 | * array support 30 | 31 | Known issues: 32 | * objects with property names with underscores don't translate well right now 33 | * no minimum, maximums, fixed lengths, etc. (though putting the schema in system prompt may help depending on model) 34 | 35 | Here's one of the most complext JSON schemas that can be handled right now: 36 | 37 | ```json 38 | { 39 | "$schema": "https://json-schema.org/draft/2019-09/schema", 40 | "type": "object", 41 | "properties": { 42 | "name": { 43 | "description": "name of a computer user", 44 | "type": "string" 45 | }, 46 | "age": { 47 | "description": "age of a computer user", 48 | "type": "number" 49 | }, 50 | "usesAI": { 51 | "description": "do they use AI", 52 | "type": "boolean" 53 | }, 54 | "favoriteAnimal": { 55 | "description": "favorite animal", 56 | "enum": [ 57 | "dog", 58 | "cat", 59 | "none" 60 | ] 61 | }, 62 | "currentAIModel": { 63 | "oneOf": [ 64 | { 65 | "type": "object", 66 | "properties": { 67 | "type": { 68 | "value": "hugging_face" 69 | }, 70 | "name": { 71 | "description": "name of hugging face model", 72 | "type": "string" 73 | } 74 | } 75 | }, 76 | { 77 | "type": "object", 78 | "properties": { 79 | "type": { 80 | "value": "openai" 81 | } 82 | } 83 | } 84 | ] 85 | }, 86 | "favoriteColors": { 87 | "type": "array", 88 | "items": { 89 | "type": "string" 90 | } 91 | } 92 | } 93 | } 94 | ``` 95 | 96 | # JSON-Schema Converting to AI Grammar 97 | 98 | ```rust 99 | fn simple_json_schema_basic_object_example() { 100 | let schema = r#" 101 | { 102 | "$id": "https://example.com/enumerated-values.schema.json", 103 | "$schema": "https://json-schema.org/draft/2020-12/schema", 104 | "title": "Enumerated Values", 105 | "type": "object", 106 | "properties": { 107 | "a": { 108 | "type": "boolean" 109 | }, 110 | "b": { 111 | "type": "number" 112 | }, 113 | "c": { 114 | "type": "string" 115 | } 116 | } 117 | } 118 | "#; 119 | let g = Grammar::from_json_schema(schema).unwrap(); 120 | let s = g.to_string(); 121 | pretty_assertions::assert_eq!( 122 | s, 123 | r#"################################################ 124 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 125 | # $id: https://example.com/enumerated-values.schema.json 126 | # $schema: https://json-schema.org/draft/2020-12/schema 127 | # title: Enumerated Values 128 | ################################################ 129 | 130 | symbol1-a-value ::= boolean ws 131 | symbol2-b-value ::= number ws 132 | symbol3-c-value ::= string ws 133 | root ::= "{" ws 134 | "a" ws ":" ws symbol1-a-value 135 | "b" ws ":" ws symbol2-b-value "," ws 136 | "c" ws ":" ws symbol3-c-value "," ws 137 | "}" ws 138 | 139 | ############################### 140 | # Primitive value type symbols 141 | ############################### 142 | null ::= "null" ws 143 | boolean ::= "true" | "false" ws 144 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 145 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 146 | ws ::= [ ] 147 | "# 148 | ) 149 | } 150 | ``` 151 | 152 | # Future goals of this project: 153 | * offer standard grammars 154 | * handle useful conversion of JSON schema to GBNF (we won't be able to handle it all probably). 155 | * parsing of GBNF files using Nom 7. 156 | 157 | I'm totally down for contributors, please add tests. 158 | 159 | See the [documentation](https://docs.rs/gbnf). 160 | 161 | # Attribution 162 | 163 | Multiple **MIT** licensed examples of GBNF were used from the `llama.cpp` [examples for grammar](https://github.com/ggerganov/llama.cpp/tree/master/grammars) for automated tests for compliance and general inspiration for this project from [python JSON schema converter](https://github.com/ggerganov/llama.cpp/blob/master/examples/json-schema-to-grammar.py). Thank you. 164 | 165 | 166 | -------------------------------------------------------------------------------- /gbnf/src/json.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | 3 | use crate::{ 4 | CharacterSet, CharacterSetItem, Grammar, GrammarItem, NonTerminalSymbol, Production, 5 | ProductionItem, RepetitionType, Rule, TerminalSymbol, 6 | }; 7 | use thiserror::Error; 8 | 9 | #[derive(Error, Debug)] 10 | pub enum JsonSchemaParseError { 11 | #[error("invalid json string: {0}")] 12 | JsonParseError(#[from] serde_json::Error), 13 | #[error( 14 | "failed find schema information, if you are sure the schema conforms to the json schema spec, please open a bug report!" 15 | )] 16 | UnknownSchemaType, 17 | #[error( 18 | "the value with name `{0}` is expected to be of type array but isn't, can not create grammar" 19 | )] 20 | ExpectedValueWithTypeArray(String), 21 | #[error("array with name `{0}` has no items declared, can not create grammar")] 22 | ArrayTypeWithoutItems(String), 23 | #[error("enum with name `{0}` has no variants declared, can not create grammar")] 24 | EnumTypeWithoutVariants(String), 25 | #[error("object with name `{0}` has no properties declared, can not create grammar")] 26 | ObjectTypeWithoutProperties(String), 27 | #[error("failed to parse constant json value as type `{0}`")] 28 | ConstParseError(String), 29 | #[error( 30 | "failed to find constant json value, if you think this might be a bug please open a bug report!" 31 | )] 32 | UnknownConstantValueType, 33 | #[error("unknown string formatting, no grammar has been implemented for `{0}` yet!")] 34 | UnknownStringFormat(String), 35 | } 36 | 37 | fn create_boolean_grammar_item(name: String) -> GrammarItem { 38 | GrammarItem::Rule(Rule { 39 | lhs: NonTerminalSymbol { name }, 40 | rhs: Production { 41 | items: vec![ 42 | ProductionItem::NonTerminal( 43 | NonTerminalSymbol { 44 | name: "boolean".to_string(), 45 | }, 46 | RepetitionType::One, 47 | ), 48 | ProductionItem::NonTerminal( 49 | NonTerminalSymbol { 50 | name: "ws".to_string(), 51 | }, 52 | RepetitionType::One, 53 | ), 54 | ], 55 | }, 56 | }) 57 | } 58 | 59 | fn create_number_grammar_item(name: String) -> GrammarItem { 60 | GrammarItem::Rule(Rule { 61 | lhs: NonTerminalSymbol { name }, 62 | rhs: Production { 63 | items: vec![ 64 | ProductionItem::NonTerminal( 65 | NonTerminalSymbol { 66 | name: "number".to_string(), 67 | }, 68 | RepetitionType::One, 69 | ), 70 | ProductionItem::NonTerminal( 71 | NonTerminalSymbol { 72 | name: "ws".to_string(), 73 | }, 74 | RepetitionType::One, 75 | ), 76 | ], 77 | }, 78 | }) 79 | } 80 | 81 | fn create_integer_grammar_item(name: String) -> GrammarItem { 82 | GrammarItem::Rule(Rule { 83 | lhs: NonTerminalSymbol { name }, 84 | rhs: Production { 85 | items: vec![ 86 | ProductionItem::NonTerminal( 87 | NonTerminalSymbol { 88 | name: "integer".to_string(), 89 | }, 90 | RepetitionType::One, 91 | ), 92 | ProductionItem::NonTerminal( 93 | NonTerminalSymbol { 94 | name: "ws".to_string(), 95 | }, 96 | RepetitionType::One, 97 | ), 98 | ], 99 | }, 100 | }) 101 | } 102 | 103 | fn create_simple_string_grammar_item(name: String) -> GrammarItem { 104 | GrammarItem::Rule(Rule { 105 | lhs: NonTerminalSymbol { name }, 106 | rhs: Production { 107 | items: vec![ 108 | ProductionItem::NonTerminal( 109 | NonTerminalSymbol { 110 | name: "string".to_string(), 111 | }, 112 | RepetitionType::One, 113 | ), 114 | ProductionItem::NonTerminal( 115 | NonTerminalSymbol { 116 | name: "ws".to_string(), 117 | }, 118 | RepetitionType::One, 119 | ), 120 | ], 121 | }, 122 | }) 123 | } 124 | 125 | #[derive(Debug, strum::IntoStaticStr, strum::EnumString)] 126 | #[strum(serialize_all = "kebab-case")] 127 | /// enum to handle json schema string formats 128 | /// 129 | /// [List of Json Schema Formats](https://www.learnjsonschema.com/2020-12/format-assertion/format/) 130 | pub enum JsonSchemaStringFormat { 131 | Date, 132 | DateTime, 133 | } 134 | 135 | impl JsonSchemaStringFormat { 136 | pub fn to_grammar_rule(&self) -> (NonTerminalSymbol, Production) { 137 | match self { 138 | JsonSchemaStringFormat::Date => ( 139 | NonTerminalSymbol { 140 | name: "date".to_string(), 141 | }, 142 | Production { 143 | items: vec![ 144 | ProductionItem::Terminal( 145 | TerminalSymbol { 146 | value: "\\\"".to_string(), 147 | }, 148 | RepetitionType::One, 149 | ), 150 | ProductionItem::CharacterSet( 151 | CharacterSet { 152 | is_complement: false, 153 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 154 | }, 155 | RepetitionType::Exact(4), 156 | ), 157 | ProductionItem::Terminal( 158 | TerminalSymbol { 159 | value: "-".to_string(), 160 | }, 161 | RepetitionType::One, 162 | ), 163 | ProductionItem::CharacterSet( 164 | CharacterSet { 165 | is_complement: false, 166 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 167 | }, 168 | RepetitionType::Exact(2), 169 | ), 170 | ProductionItem::Terminal( 171 | TerminalSymbol { 172 | value: "-".to_string(), 173 | }, 174 | RepetitionType::One, 175 | ), 176 | ProductionItem::CharacterSet( 177 | CharacterSet { 178 | is_complement: false, 179 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 180 | }, 181 | RepetitionType::Exact(2), 182 | ), 183 | ProductionItem::Terminal( 184 | TerminalSymbol { 185 | value: "\\\"".to_string(), 186 | }, 187 | RepetitionType::One, 188 | ), 189 | ], 190 | }, 191 | ), 192 | JsonSchemaStringFormat::DateTime => ( 193 | NonTerminalSymbol { 194 | name: "datetime".to_string(), 195 | }, 196 | Production { 197 | items: vec![ 198 | // " (Begin String) 199 | ProductionItem::Terminal( 200 | TerminalSymbol { 201 | value: "\\\"".to_string(), 202 | }, 203 | RepetitionType::One, 204 | ), 205 | // Year 206 | ProductionItem::CharacterSet( 207 | CharacterSet { 208 | is_complement: false, 209 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 210 | }, 211 | RepetitionType::Exact(4), 212 | ), 213 | // - 214 | ProductionItem::Terminal( 215 | TerminalSymbol { 216 | value: "-".to_string(), 217 | }, 218 | RepetitionType::One, 219 | ), 220 | // Month 221 | ProductionItem::CharacterSet( 222 | CharacterSet { 223 | is_complement: false, 224 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 225 | }, 226 | RepetitionType::Exact(2), 227 | ), 228 | // - 229 | ProductionItem::Terminal( 230 | TerminalSymbol { 231 | value: "-".to_string(), 232 | }, 233 | RepetitionType::One, 234 | ), 235 | // Day 236 | ProductionItem::CharacterSet( 237 | CharacterSet { 238 | is_complement: false, 239 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 240 | }, 241 | RepetitionType::Exact(2), 242 | ), 243 | // Split Date and Time "T" 244 | ProductionItem::Terminal( 245 | TerminalSymbol { 246 | value: "T".to_string(), 247 | }, 248 | RepetitionType::One, 249 | ), 250 | // Hour 251 | ProductionItem::CharacterSet( 252 | CharacterSet { 253 | is_complement: false, 254 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 255 | }, 256 | RepetitionType::Exact(2), 257 | ), 258 | // : 259 | ProductionItem::Terminal( 260 | TerminalSymbol { 261 | value: ":".to_string(), 262 | }, 263 | RepetitionType::One, 264 | ), 265 | // Minute 266 | ProductionItem::CharacterSet( 267 | CharacterSet { 268 | is_complement: false, 269 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 270 | }, 271 | RepetitionType::Exact(2), 272 | ), 273 | // : 274 | ProductionItem::Terminal( 275 | TerminalSymbol { 276 | value: ":".to_string(), 277 | }, 278 | RepetitionType::One, 279 | ), 280 | // Seconds 281 | ProductionItem::CharacterSet( 282 | CharacterSet { 283 | is_complement: false, 284 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 285 | }, 286 | RepetitionType::Exact(2), 287 | ), 288 | // TZ Info 289 | ProductionItem::OneOf(vec![ 290 | // UTC shorthand 291 | Production { 292 | items: vec![ProductionItem::Group( 293 | Box::new(Production { 294 | items: vec![ 295 | ProductionItem::Terminal( 296 | TerminalSymbol { 297 | value: "Z".to_string(), 298 | }, 299 | RepetitionType::One, 300 | ), 301 | // " (End String) 302 | ProductionItem::Terminal( 303 | TerminalSymbol { 304 | value: "\\\"".to_string(), 305 | }, 306 | RepetitionType::One, 307 | ), 308 | ], 309 | }), 310 | RepetitionType::One, 311 | )], 312 | }, 313 | // static timezone offset 314 | Production { 315 | items: vec![ProductionItem::Group( 316 | Box::new(Production { 317 | items: vec![ 318 | // positive or negative offset 319 | ProductionItem::OneOf(vec![ 320 | // + (negative offset) 321 | Production { 322 | items: vec![ProductionItem::Terminal( 323 | TerminalSymbol { 324 | value: "+".to_string(), 325 | }, 326 | RepetitionType::One, 327 | )], 328 | }, 329 | // - (negative offset) 330 | Production { 331 | items: vec![ProductionItem::Terminal( 332 | TerminalSymbol { 333 | value: "-".to_string(), 334 | }, 335 | RepetitionType::One, 336 | )], 337 | }, 338 | ]), 339 | // Hour offset 340 | ProductionItem::CharacterSet( 341 | CharacterSet { 342 | is_complement: false, 343 | items: vec![CharacterSetItem::CharacterRange( 344 | '0', '9', 345 | )], 346 | }, 347 | RepetitionType::Exact(2), 348 | ), 349 | // : 350 | ProductionItem::Terminal( 351 | TerminalSymbol { 352 | value: ":".to_string(), 353 | }, 354 | RepetitionType::One, 355 | ), 356 | // Minute offset 357 | ProductionItem::CharacterSet( 358 | CharacterSet { 359 | is_complement: false, 360 | items: vec![CharacterSetItem::CharacterRange( 361 | '0', '9', 362 | )], 363 | }, 364 | RepetitionType::Exact(2), 365 | ), 366 | ], 367 | }), 368 | RepetitionType::One, 369 | )], 370 | }, 371 | ]), 372 | // " (End String) 373 | ProductionItem::Terminal( 374 | TerminalSymbol { 375 | value: "\\\"".to_string(), 376 | }, 377 | RepetitionType::One, 378 | ), 379 | ], 380 | }, 381 | ), 382 | } 383 | } 384 | } 385 | 386 | fn dispatch_string_grammar_item( 387 | name: String, 388 | g: &mut Grammar, 389 | value: &serde_json::Value, 390 | ) -> Result { 391 | if let Some(string_format) = value.get("format") { 392 | if let Ok(format_type) = JsonSchemaStringFormat::from_str( 393 | string_format 394 | .as_str() 395 | .ok_or(JsonSchemaParseError::ConstParseError("string".to_string()))?, 396 | ) { 397 | let (term_sym, prod) = format_type.to_grammar_rule(); 398 | if !g.recurring_items.contains_key(&term_sym) { 399 | g.recurring_items.insert(term_sym.clone(), prod); 400 | } 401 | Ok(GrammarItem::Rule(Rule { 402 | lhs: NonTerminalSymbol { name }, 403 | rhs: Production { 404 | items: vec![ 405 | ProductionItem::NonTerminal(term_sym, RepetitionType::One), 406 | ProductionItem::NonTerminal( 407 | NonTerminalSymbol { 408 | name: "ws".to_string(), 409 | }, 410 | RepetitionType::One, 411 | ), 412 | ], 413 | }, 414 | })) 415 | } else { 416 | Err(JsonSchemaParseError::UnknownStringFormat( 417 | string_format.as_str().unwrap().to_string(), 418 | )) 419 | } 420 | } else { 421 | Ok(create_simple_string_grammar_item(name)) 422 | } 423 | } 424 | 425 | fn create_array_grammar_items( 426 | value: &serde_json::Value, 427 | g: &mut Grammar, 428 | name: String, 429 | c: &mut usize, 430 | ) -> Result { 431 | if let Some(items) = value.get("items") { 432 | let item_template_name = format!("symbol{}-item", c); 433 | let new_c = parse_json_schema_to_grammar(items, g, item_template_name.clone(), *c)?; 434 | *c = new_c; 435 | 436 | let rhs_start = vec![ 437 | ProductionItem::Terminal( 438 | TerminalSymbol { 439 | value: "[".to_string(), 440 | }, 441 | RepetitionType::One, 442 | ), 443 | ProductionItem::NonTerminal( 444 | NonTerminalSymbol { 445 | name: "ws".to_string(), 446 | }, 447 | RepetitionType::One, 448 | ), 449 | ]; 450 | 451 | let rhs_end = vec![ 452 | ProductionItem::Terminal( 453 | TerminalSymbol { 454 | value: "]".to_string(), 455 | }, 456 | RepetitionType::One, 457 | ), 458 | ProductionItem::NonTerminal( 459 | NonTerminalSymbol { 460 | name: "ws".to_string(), 461 | }, 462 | RepetitionType::One, 463 | ), 464 | ]; 465 | 466 | Ok(GrammarItem::Rule(Rule { 467 | lhs: NonTerminalSymbol { name }, 468 | rhs: Production { 469 | items: rhs_start 470 | .iter() 471 | .chain( 472 | [ 473 | ProductionItem::NonTerminal( 474 | NonTerminalSymbol { 475 | name: item_template_name.clone(), 476 | }, 477 | RepetitionType::ZeroOrMore, 478 | ), 479 | ProductionItem::NonTerminal( 480 | NonTerminalSymbol { 481 | name: "ws".to_string(), 482 | }, 483 | RepetitionType::One, 484 | ), 485 | ] 486 | .iter(), 487 | ) 488 | .chain(rhs_end.iter()) 489 | .cloned() 490 | .collect(), 491 | }, 492 | })) 493 | } else { 494 | Err(JsonSchemaParseError::ArrayTypeWithoutItems(name)) 495 | } 496 | } 497 | 498 | fn create_one_of_grammar_rules( 499 | value: &serde_json::Value, 500 | g: &mut Grammar, 501 | name: String, 502 | c: &mut usize, 503 | ) -> Result { 504 | let one_of_array = value 505 | .as_array() 506 | .ok_or(JsonSchemaParseError::ExpectedValueWithTypeArray( 507 | name.clone(), 508 | ))?; 509 | 510 | if one_of_array.is_empty() { 511 | return Err(JsonSchemaParseError::ArrayTypeWithoutItems(name)); 512 | } 513 | 514 | let mut possible_symbols: Vec = vec![]; 515 | let mut possible_names: Vec = vec![]; 516 | for (value, i) in one_of_array.iter().zip(0..) { 517 | let new_c = 518 | parse_json_schema_to_grammar(value, g, format!("symbol-{}-oneof-{}", c, i), *c)?; 519 | possible_symbols.push(Production { 520 | items: vec![ 521 | ProductionItem::NonTerminal( 522 | NonTerminalSymbol { 523 | name: "ws".to_string(), 524 | }, 525 | RepetitionType::One, 526 | ), 527 | ProductionItem::NonTerminal( 528 | NonTerminalSymbol { 529 | name: format!("symbol-{}-oneof-{}", c, i), 530 | }, 531 | RepetitionType::One, 532 | ), 533 | ProductionItem::NonTerminal( 534 | NonTerminalSymbol { 535 | name: "ws".to_string(), 536 | }, 537 | RepetitionType::One, 538 | ), 539 | ], 540 | }); 541 | possible_names.push(Production { 542 | items: vec![ProductionItem::NonTerminal( 543 | NonTerminalSymbol { 544 | name: format!("symbol-{}-oneof-{}", c, i), 545 | }, 546 | RepetitionType::One, 547 | )], 548 | }); 549 | *c = new_c; 550 | } 551 | // add production for oneof 552 | Ok(GrammarItem::Rule(Rule { 553 | lhs: NonTerminalSymbol { name }, 554 | rhs: Production { 555 | items: vec![ProductionItem::OneOf(possible_names)], 556 | }, 557 | })) 558 | } 559 | 560 | fn create_enum_grammar_items( 561 | value: &serde_json::Value, 562 | name: String, 563 | ) -> Result { 564 | let enum_array = value 565 | .as_array() 566 | .ok_or(JsonSchemaParseError::ExpectedValueWithTypeArray( 567 | name.clone(), 568 | ))?; 569 | 570 | if enum_array.is_empty() { 571 | return Err(JsonSchemaParseError::EnumTypeWithoutVariants(name)); 572 | } 573 | 574 | let mut possible_strings: Vec = vec![]; 575 | for value in enum_array { 576 | if let Some(value_as_string) = value.as_str() { 577 | possible_strings.push(Production { 578 | items: vec![ProductionItem::Terminal( 579 | TerminalSymbol { 580 | value: format!("\\\"{}\\\"", value_as_string), 581 | }, 582 | RepetitionType::One, 583 | )], 584 | }); 585 | } else { 586 | return Err(JsonSchemaParseError::ConstParseError("string".to_string())); 587 | } 588 | } 589 | // add production for enum 590 | Ok(GrammarItem::Rule(Rule { 591 | lhs: NonTerminalSymbol { name }, 592 | rhs: Production { 593 | items: vec![ProductionItem::OneOf(possible_strings)], 594 | }, 595 | })) 596 | } 597 | 598 | fn create_object_grammar_items( 599 | value: &serde_json::Value, 600 | g: &mut Grammar, 601 | name: String, 602 | c: &mut usize, 603 | ) -> Result { 604 | if let Some(properties) = value.get("properties") { 605 | let mut prop_rules = vec![]; 606 | let mut is_first = true; 607 | for (key, value) in properties.as_object().unwrap() { 608 | let new_c = parse_json_schema_to_grammar( 609 | value, 610 | g, 611 | format!("symbol{}-{}-value", c, key.replace("_", "-")), 612 | *c, 613 | )?; 614 | if !is_first { 615 | prop_rules.push(ProductionItem::Terminal( 616 | TerminalSymbol { 617 | value: ",".to_string(), 618 | }, 619 | RepetitionType::One, 620 | )); 621 | prop_rules.push(ProductionItem::NonTerminal( 622 | NonTerminalSymbol { 623 | name: "ws".to_string(), 624 | }, 625 | RepetitionType::One, 626 | )); 627 | } else { 628 | is_first = false; 629 | } 630 | prop_rules.push(ProductionItem::Terminal( 631 | TerminalSymbol { 632 | value: format!("\\\"{}\\\"", key), 633 | }, 634 | RepetitionType::One, 635 | )); 636 | prop_rules.push(ProductionItem::NonTerminal( 637 | NonTerminalSymbol { 638 | name: "ws".to_string(), 639 | }, 640 | RepetitionType::One, 641 | )); 642 | prop_rules.push(ProductionItem::Terminal( 643 | TerminalSymbol { 644 | value: ":".to_string(), 645 | }, 646 | RepetitionType::One, 647 | )); 648 | prop_rules.push(ProductionItem::NonTerminal( 649 | NonTerminalSymbol { 650 | name: "ws".to_string(), 651 | }, 652 | RepetitionType::One, 653 | )); 654 | prop_rules.push(ProductionItem::NonTerminal( 655 | NonTerminalSymbol { 656 | name: format!("symbol{}-{}-value", c, key.replace("_", "-")), 657 | }, 658 | RepetitionType::One, 659 | )); 660 | *c = new_c; 661 | } 662 | 663 | let rhs_start = vec![ 664 | ProductionItem::Terminal( 665 | TerminalSymbol { 666 | value: "{".to_string(), 667 | }, 668 | RepetitionType::One, 669 | ), 670 | ProductionItem::NonTerminal( 671 | NonTerminalSymbol { 672 | name: "ws".to_string(), 673 | }, 674 | RepetitionType::One, 675 | ), 676 | ]; 677 | 678 | let rhs_end = vec![ 679 | ProductionItem::Terminal( 680 | TerminalSymbol { 681 | value: "}".to_string(), 682 | }, 683 | RepetitionType::One, 684 | ), 685 | ProductionItem::NonTerminal( 686 | NonTerminalSymbol { 687 | name: "ws".to_string(), 688 | }, 689 | RepetitionType::One, 690 | ), 691 | ]; 692 | 693 | Ok(GrammarItem::Rule(Rule { 694 | lhs: NonTerminalSymbol { name: name.clone() }, 695 | rhs: Production { 696 | items: rhs_start 697 | .iter() 698 | .chain(prop_rules.iter()) 699 | .chain(rhs_end.iter()) 700 | .cloned() 701 | .collect(), 702 | }, 703 | })) 704 | } else { 705 | Err(JsonSchemaParseError::ObjectTypeWithoutProperties(name)) 706 | } 707 | } 708 | 709 | fn create_const_grammar_item( 710 | value: &serde_json::Value, 711 | name: String, 712 | ) -> Result { 713 | if value.is_string() { 714 | if let Some(v_as_string) = value.as_str() { 715 | Ok(GrammarItem::Rule(Rule { 716 | lhs: NonTerminalSymbol { name }, 717 | rhs: Production { 718 | items: vec![ProductionItem::Terminal( 719 | TerminalSymbol { 720 | value: format!("\\\"{}\\\"", v_as_string), 721 | }, 722 | RepetitionType::One, 723 | )], 724 | }, 725 | })) 726 | } else { 727 | Err(JsonSchemaParseError::ConstParseError("string".to_string())) 728 | } 729 | } else if value.is_number() { 730 | if let Some(v_as_number) = value.as_f64() { 731 | Ok(GrammarItem::Rule(Rule { 732 | lhs: NonTerminalSymbol { name }, 733 | rhs: Production { 734 | items: vec![ProductionItem::Terminal( 735 | TerminalSymbol { 736 | value: v_as_number.to_string(), 737 | }, 738 | RepetitionType::One, 739 | )], 740 | }, 741 | })) 742 | } else { 743 | Err(JsonSchemaParseError::ConstParseError("number".to_string())) 744 | } 745 | } else if value.is_boolean() { 746 | if let Some(v_as_boolean) = value.as_bool() { 747 | Ok(GrammarItem::Rule(Rule { 748 | lhs: NonTerminalSymbol { name }, 749 | rhs: Production { 750 | items: vec![ProductionItem::Terminal( 751 | TerminalSymbol { 752 | value: match v_as_boolean { 753 | true => "true".to_string(), 754 | false => "false".to_string(), 755 | }, 756 | }, 757 | RepetitionType::One, 758 | )], 759 | }, 760 | })) 761 | } else { 762 | Err(JsonSchemaParseError::ConstParseError("boolean".to_string())) 763 | } 764 | } else if value.is_array() { 765 | if let Some(v_as_array) = value.as_array() { 766 | let sub_consts_grammars = v_as_array 767 | .iter() 768 | .filter_map(|item| create_const_grammar_item(item, "x".to_string()).ok()) 769 | .filter_map(|grammar_item| { 770 | if let GrammarItem::Rule(grammar_rule) = grammar_item { 771 | Some(grammar_rule.rhs.items) 772 | } else { 773 | None 774 | } 775 | }) 776 | .collect::>(); 777 | 778 | let ws_terminal = ProductionItem::NonTerminal( 779 | NonTerminalSymbol { 780 | name: "ws".to_string(), 781 | }, 782 | RepetitionType::One, 783 | ); 784 | 785 | let seperator = vec![ 786 | ProductionItem::Terminal( 787 | TerminalSymbol { 788 | value: ",".to_string(), 789 | }, 790 | RepetitionType::One, 791 | ), 792 | ws_terminal.clone(), 793 | ]; 794 | 795 | let rhs_start = vec![ 796 | ProductionItem::Terminal( 797 | TerminalSymbol { 798 | value: "[".to_string(), 799 | }, 800 | RepetitionType::One, 801 | ), 802 | ws_terminal.clone(), 803 | ]; 804 | 805 | let rhs_end = vec![ 806 | ProductionItem::Terminal( 807 | TerminalSymbol { 808 | value: "]".to_string(), 809 | }, 810 | RepetitionType::One, 811 | ), 812 | ws_terminal.clone(), 813 | ]; 814 | 815 | // construct inner values with comma seperation correctly 816 | let num_items = sub_consts_grammars.len() - 1; 817 | let last = sub_consts_grammars.last().cloned().unwrap(); 818 | let flatten_inner = sub_consts_grammars 819 | .into_iter() 820 | .zip(vec![seperator.clone(); num_items]) 821 | .map(|(item, sep)| item.iter().chain(sep.iter()).cloned().collect::>()) 822 | .chain(vec![last]) 823 | .flatten() 824 | .collect::>(); 825 | 826 | Ok(GrammarItem::Rule(Rule { 827 | lhs: NonTerminalSymbol { name }, 828 | rhs: Production { 829 | items: rhs_start 830 | .iter() 831 | .chain(flatten_inner.iter()) 832 | .chain(rhs_end.iter()) 833 | .cloned() 834 | .collect(), 835 | }, 836 | })) 837 | } else { 838 | Err(JsonSchemaParseError::ConstParseError("array".to_string())) 839 | } 840 | } else { 841 | Err(JsonSchemaParseError::UnknownConstantValueType) 842 | } 843 | } 844 | 845 | pub(crate) fn parse_json_schema_to_grammar( 846 | value: &serde_json::Value, 847 | g: &mut Grammar, 848 | name: String, 849 | symbol_count: usize, 850 | ) -> Result { 851 | let mut c = symbol_count; 852 | c += 1; 853 | 854 | // if its a basic type, get the type name 855 | if let Some(one_of) = value.get("oneOf") { 856 | let rule = create_one_of_grammar_rules(one_of, g, name, &mut c)?; 857 | g.items.push(rule); 858 | } else if let Some(enum_val) = value.get("enum") { 859 | let rule = create_enum_grammar_items(enum_val, name)?; 860 | g.items.push(rule); 861 | } else if let Some(const_val) = value.get("const") { 862 | // if its not enum , probably constant value 863 | let rule = create_const_grammar_item(const_val, name)?; 864 | g.items.push(rule); 865 | } else if let Some(t) = value.get("type") { 866 | if t == "boolean" { 867 | g.items.push(create_boolean_grammar_item(name)); 868 | } else if t == "number" { 869 | g.items.push(create_number_grammar_item(name)); 870 | } else if t == "integer" { 871 | g.items.push(create_integer_grammar_item(name)); 872 | } else if t == "string" { 873 | let rule = dispatch_string_grammar_item(name, g, value)?; 874 | g.items.push(rule); 875 | } else if t == "array" { 876 | let rule = create_array_grammar_items(value, g, name, &mut c)?; 877 | g.items.push(rule); 878 | } else if t == "object" { 879 | let rule = create_object_grammar_items(value, g, name, &mut c)?; 880 | g.items.push(rule); 881 | } 882 | } else { 883 | return Err(JsonSchemaParseError::UnknownSchemaType); 884 | } 885 | 886 | Ok(c) 887 | } 888 | 889 | #[cfg(test)] 890 | mod json_schema_test { 891 | use crate::Grammar; 892 | use chrono::FixedOffset; 893 | use schemars::{JsonSchema, schema_for}; 894 | 895 | #[test] 896 | fn simple_json_schema_boolean() { 897 | #[derive(JsonSchema)] 898 | #[allow(dead_code)] 899 | #[schemars( 900 | title = "Enumerated Values", 901 | extend( 902 | "$id"="https://example.com/enumerated-values.schema.json" 903 | ) 904 | )] 905 | struct TestSchema(bool); 906 | 907 | let g = Grammar::from_json_schema_value(&schema_for!(TestSchema).to_value()).unwrap(); 908 | let s = g.to_string(); 909 | pretty_assertions::assert_eq!( 910 | s, 911 | r#"################################################ 912 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 913 | # $id: https://example.com/enumerated-values.schema.json 914 | # $schema: https://json-schema.org/draft/2020-12/schema 915 | # title: Enumerated Values 916 | ################################################ 917 | 918 | root ::= boolean ws 919 | 920 | ############################### 921 | # Primitive value type symbols 922 | ############################### 923 | boolean ::= "true" | "false" ws 924 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 925 | null ::= "null" ws 926 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 927 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 928 | ws ::= [ \t\n]* 929 | "# 930 | ); 931 | } 932 | 933 | #[test] 934 | fn simple_json_schema_number() { 935 | #[derive(JsonSchema)] 936 | #[allow(dead_code)] 937 | #[schemars( 938 | title = "Enumerated Values", 939 | extend( 940 | "$id"="https://example.com/enumerated-values.schema.json" 941 | ) 942 | )] 943 | struct TestSchema(f32); 944 | let g = Grammar::from_json_schema_value(&schema_for!(TestSchema).to_value()).unwrap(); 945 | let s = g.to_string(); 946 | pretty_assertions::assert_eq!( 947 | s, 948 | r#"################################################ 949 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 950 | # $id: https://example.com/enumerated-values.schema.json 951 | # $schema: https://json-schema.org/draft/2020-12/schema 952 | # title: Enumerated Values 953 | ################################################ 954 | 955 | root ::= number ws 956 | 957 | ############################### 958 | # Primitive value type symbols 959 | ############################### 960 | boolean ::= "true" | "false" ws 961 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 962 | null ::= "null" ws 963 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 964 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 965 | ws ::= [ \t\n]* 966 | "# 967 | ); 968 | } 969 | 970 | #[test] 971 | fn simple_json_schema_string() { 972 | #[derive(JsonSchema)] 973 | #[allow(dead_code)] 974 | #[schemars( 975 | title = "Enumerated Values", 976 | extend( 977 | "$id"="https://example.com/enumerated-values.schema.json" 978 | ) 979 | )] 980 | struct TestSchema(String); 981 | let g = Grammar::from_json_schema_value(&schema_for!(TestSchema).to_value()).unwrap(); 982 | let s = g.to_string(); 983 | pretty_assertions::assert_eq!( 984 | s, 985 | r#"################################################ 986 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 987 | # $id: https://example.com/enumerated-values.schema.json 988 | # $schema: https://json-schema.org/draft/2020-12/schema 989 | # title: Enumerated Values 990 | ################################################ 991 | 992 | root ::= string ws 993 | 994 | ############################### 995 | # Primitive value type symbols 996 | ############################### 997 | boolean ::= "true" | "false" ws 998 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 999 | null ::= "null" ws 1000 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1001 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1002 | ws ::= [ \t\n]* 1003 | "# 1004 | ); 1005 | } 1006 | 1007 | #[test] 1008 | fn simple_json_schema_date() { 1009 | #[derive(JsonSchema)] 1010 | #[allow(dead_code)] 1011 | #[schemars(extend("$id" = "https://example.com/schema.json"))] 1012 | struct DateTest { 1013 | date: chrono::NaiveDate, 1014 | } 1015 | let g = Grammar::from_json_schema_value(&schema_for!(DateTest).to_value()).unwrap(); 1016 | let s = g.to_string(); 1017 | pretty_assertions::assert_eq!( 1018 | s, 1019 | r#"################################################ 1020 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1021 | # $id: https://example.com/schema.json 1022 | # $schema: https://json-schema.org/draft/2020-12/schema 1023 | # title: DateTest 1024 | ################################################ 1025 | 1026 | symbol1-date-value ::= date ws 1027 | root ::= "{" ws "\"date\"" ws ":" ws symbol1-date-value "}" ws 1028 | 1029 | ############################### 1030 | # Primitive value type symbols 1031 | ############################### 1032 | boolean ::= "true" | "false" ws 1033 | date ::= "\"" [0-9]{4} "-" [0-9]{2} "-" [0-9]{2} "\"" 1034 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1035 | null ::= "null" ws 1036 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1037 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1038 | ws ::= [ \t\n]* 1039 | "# 1040 | ); 1041 | } 1042 | 1043 | #[test] 1044 | fn simple_json_schema_date_time() { 1045 | #[derive(JsonSchema)] 1046 | #[allow(dead_code)] 1047 | #[schemars(extend("$id" = "https://example.com/schema.json"))] 1048 | struct DateTest { 1049 | date: chrono::DateTime, 1050 | } 1051 | let g = Grammar::from_json_schema_value(&schema_for!(DateTest).to_value()).unwrap(); 1052 | let s = g.to_string(); 1053 | pretty_assertions::assert_eq!( 1054 | s, 1055 | r#"################################################ 1056 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1057 | # $id: https://example.com/schema.json 1058 | # $schema: https://json-schema.org/draft/2020-12/schema 1059 | # title: DateTest 1060 | ################################################ 1061 | 1062 | symbol1-date-value ::= datetime ws 1063 | root ::= "{" ws "\"date\"" ws ":" ws symbol1-date-value "}" ws 1064 | 1065 | ############################### 1066 | # Primitive value type symbols 1067 | ############################### 1068 | boolean ::= "true" | "false" ws 1069 | datetime ::= "\"" [0-9]{4} "-" [0-9]{2} "-" [0-9]{2} "T" [0-9]{2} ":" [0-9]{2} ":" [0-9]{2} ("Z" "\"") | ("+" | "-" [0-9]{2} ":" [0-9]{2}) "\"" 1070 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1071 | null ::= "null" ws 1072 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1073 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1074 | ws ::= [ \t\n]* 1075 | "# 1076 | ); 1077 | } 1078 | 1079 | #[test] 1080 | fn simple_json_schema_basic_object() { 1081 | #[derive(JsonSchema)] 1082 | #[allow(dead_code)] 1083 | #[schemars( 1084 | title = "Enumerated Values", 1085 | extend( 1086 | "$id"="https://example.com/enumerated-values.schema.json" 1087 | ) 1088 | )] 1089 | struct TestSchema { 1090 | a: bool, 1091 | b: f32, 1092 | c: String, 1093 | } 1094 | let g = Grammar::from_json_schema_value(&schema_for!(TestSchema).to_value()).unwrap(); 1095 | let s = g.to_string(); 1096 | pretty_assertions::assert_eq!( 1097 | s, 1098 | r#"################################################ 1099 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1100 | # $id: https://example.com/enumerated-values.schema.json 1101 | # $schema: https://json-schema.org/draft/2020-12/schema 1102 | # title: Enumerated Values 1103 | ################################################ 1104 | 1105 | symbol1-a-value ::= boolean ws 1106 | symbol2-b-value ::= number ws 1107 | symbol3-c-value ::= string ws 1108 | root ::= "{" ws "\"a\"" ws ":" ws symbol1-a-value "," ws "\"b\"" ws ":" ws symbol2-b-value "," ws "\"c\"" ws ":" ws symbol3-c-value "}" ws 1109 | 1110 | ############################### 1111 | # Primitive value type symbols 1112 | ############################### 1113 | boolean ::= "true" | "false" ws 1114 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1115 | null ::= "null" ws 1116 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1117 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1118 | ws ::= [ \t\n]* 1119 | "# 1120 | ) 1121 | } 1122 | 1123 | #[test] 1124 | fn simple_json_schema_nested_object() { 1125 | #[derive(JsonSchema)] 1126 | #[allow(dead_code)] 1127 | #[schemars(inline)] 1128 | struct Nested { 1129 | x: bool, 1130 | y: f32, 1131 | z: String, 1132 | } 1133 | #[derive(JsonSchema)] 1134 | #[allow(dead_code)] 1135 | #[schemars( 1136 | title = "Enumerated Values", 1137 | extend( 1138 | "$id"="https://example.com/enumerated-values.schema.json" 1139 | ) 1140 | )] 1141 | struct TestSchema { 1142 | a: bool, 1143 | b: f32, 1144 | c: Nested, 1145 | } 1146 | let g = Grammar::from_json_schema_value(&schema_for!(TestSchema).to_value()).unwrap(); 1147 | let s = g.to_string(); 1148 | pretty_assertions::assert_eq!( 1149 | s, 1150 | r#"################################################ 1151 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1152 | # $id: https://example.com/enumerated-values.schema.json 1153 | # $schema: https://json-schema.org/draft/2020-12/schema 1154 | # title: Enumerated Values 1155 | ################################################ 1156 | 1157 | symbol1-a-value ::= boolean ws 1158 | symbol2-b-value ::= number ws 1159 | symbol4-x-value ::= boolean ws 1160 | symbol5-y-value ::= number ws 1161 | symbol6-z-value ::= string ws 1162 | symbol3-c-value ::= "{" ws "\"x\"" ws ":" ws symbol4-x-value "," ws "\"y\"" ws ":" ws symbol5-y-value "," ws "\"z\"" ws ":" ws symbol6-z-value "}" ws 1163 | root ::= "{" ws "\"a\"" ws ":" ws symbol1-a-value "," ws "\"b\"" ws ":" ws symbol2-b-value "," ws "\"c\"" ws ":" ws symbol3-c-value "}" ws 1164 | 1165 | ############################### 1166 | # Primitive value type symbols 1167 | ############################### 1168 | boolean ::= "true" | "false" ws 1169 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1170 | null ::= "null" ws 1171 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1172 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1173 | ws ::= [ \t\n]* 1174 | "# 1175 | ) 1176 | } 1177 | 1178 | #[test] 1179 | fn simple_json_schema_oneof() { 1180 | let schema = r#" 1181 | { 1182 | "$schema": "https://json-schema.org/draft/2019-09/schema", 1183 | "oneOf": [ 1184 | { 1185 | "type" : "object", 1186 | "properties" : { 1187 | "firstName" : { 1188 | "type" : "string" 1189 | }, 1190 | "lastName" : { 1191 | "type" : "string" 1192 | }, 1193 | "sport" : { 1194 | "type" : "string" 1195 | } 1196 | } 1197 | }, 1198 | { 1199 | "type" : "number" 1200 | } 1201 | ] 1202 | } 1203 | "#; 1204 | //TODO Get this to work as `onfOf`, currently this results in `anyOf` being generated: 1205 | // https://github.com/GREsau/schemars/pull/108 1206 | // 1207 | //#[derive(JsonSchema)] 1208 | //#[allow(dead_code, non_snake_case)] 1209 | //#[schemars(inline)] 1210 | //struct Kind1 { 1211 | //firstName: String, 1212 | //lastName: String, 1213 | //sport: String 1214 | //} 1215 | //#[derive(JsonSchema)] 1216 | //#[allow(dead_code)] 1217 | //#[schemars(untagged)] 1218 | //enum TestSchema { 1219 | //Complex(Kind1), 1220 | //Simple(f32) 1221 | //} 1222 | let g = Grammar::from_json_schema(schema).unwrap(); 1223 | let s = g.to_string(); 1224 | pretty_assertions::assert_eq!( 1225 | s, 1226 | r#"################################################ 1227 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1228 | # $schema: https://json-schema.org/draft/2019-09/schema 1229 | ################################################ 1230 | 1231 | symbol2-firstName-value ::= string ws 1232 | symbol3-lastName-value ::= string ws 1233 | symbol4-sport-value ::= string ws 1234 | symbol-1-oneof-0 ::= "{" ws "\"firstName\"" ws ":" ws symbol2-firstName-value "," ws "\"lastName\"" ws ":" ws symbol3-lastName-value "," ws "\"sport\"" ws ":" ws symbol4-sport-value "}" ws 1235 | symbol-5-oneof-1 ::= number ws 1236 | root ::= symbol-1-oneof-0 | symbol-5-oneof-1 1237 | 1238 | ############################### 1239 | # Primitive value type symbols 1240 | ############################### 1241 | boolean ::= "true" | "false" ws 1242 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1243 | null ::= "null" ws 1244 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1245 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1246 | ws ::= [ \t\n]* 1247 | "#, 1248 | ) 1249 | } 1250 | 1251 | #[test] 1252 | fn simple_json_schema_enum() { 1253 | #[derive(JsonSchema)] 1254 | #[allow(dead_code, non_camel_case_types)] 1255 | #[schemars(title = "Enumerated Values")] 1256 | enum TestSchema { 1257 | red, 1258 | amber, 1259 | green, 1260 | } 1261 | println!( 1262 | "{}", 1263 | serde_json::to_string_pretty(&schema_for!(TestSchema).to_value()).unwrap() 1264 | ); 1265 | let g = Grammar::from_json_schema_value(&schema_for!(TestSchema).to_value()).unwrap(); 1266 | let s = g.to_string(); 1267 | 1268 | pretty_assertions::assert_eq!( 1269 | s, 1270 | r#"################################################ 1271 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1272 | # $schema: https://json-schema.org/draft/2020-12/schema 1273 | # title: Enumerated Values 1274 | ################################################ 1275 | 1276 | root ::= "\"red\"" | "\"amber\"" | "\"green\"" 1277 | 1278 | ############################### 1279 | # Primitive value type symbols 1280 | ############################### 1281 | boolean ::= "true" | "false" ws 1282 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1283 | null ::= "null" ws 1284 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1285 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1286 | ws ::= [ \t\n]* 1287 | "# 1288 | ) 1289 | } 1290 | 1291 | #[test] 1292 | fn simple_json_schema_value_string() { 1293 | // can not be created with schemars as far as I can tell 1294 | let schema = r#" 1295 | { 1296 | "$schema": "https://json-schema.org/draft/2019-09/schema", 1297 | "const": "red" 1298 | } 1299 | "#; 1300 | let g = Grammar::from_json_schema(schema).unwrap(); 1301 | let s = g.to_string(); 1302 | 1303 | pretty_assertions::assert_eq!( 1304 | s, 1305 | r#"################################################ 1306 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1307 | # $schema: https://json-schema.org/draft/2019-09/schema 1308 | ################################################ 1309 | 1310 | root ::= "\"red\"" 1311 | 1312 | ############################### 1313 | # Primitive value type symbols 1314 | ############################### 1315 | boolean ::= "true" | "false" ws 1316 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1317 | null ::= "null" ws 1318 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1319 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1320 | ws ::= [ \t\n]* 1321 | "# 1322 | ) 1323 | } 1324 | 1325 | #[test] 1326 | fn simple_json_schema_value_number() { 1327 | // can not be created with schemars as far as I can tell 1328 | let schema = r#" 1329 | { 1330 | "$schema": "https://json-schema.org/draft/2019-09/schema", 1331 | "const": 42 1332 | } 1333 | "#; 1334 | let g = Grammar::from_json_schema(schema).unwrap(); 1335 | let s = g.to_string(); 1336 | 1337 | pretty_assertions::assert_eq!( 1338 | s, 1339 | r#"################################################ 1340 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1341 | # $schema: https://json-schema.org/draft/2019-09/schema 1342 | ################################################ 1343 | 1344 | root ::= "42" 1345 | 1346 | ############################### 1347 | # Primitive value type symbols 1348 | ############################### 1349 | boolean ::= "true" | "false" ws 1350 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1351 | null ::= "null" ws 1352 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1353 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1354 | ws ::= [ \t\n]* 1355 | "# 1356 | ) 1357 | } 1358 | 1359 | #[test] 1360 | fn simple_json_schema_value_boolean() { 1361 | // can not be created with schemars as far as I can tell 1362 | let schema = r#" 1363 | { 1364 | "$schema": "https://json-schema.org/draft/2019-09/schema", 1365 | "const": true 1366 | } 1367 | "#; 1368 | let g = Grammar::from_json_schema(schema).unwrap(); 1369 | let s = g.to_string(); 1370 | 1371 | pretty_assertions::assert_eq!( 1372 | s, 1373 | r#"################################################ 1374 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1375 | # $schema: https://json-schema.org/draft/2019-09/schema 1376 | ################################################ 1377 | 1378 | root ::= "true" 1379 | 1380 | ############################### 1381 | # Primitive value type symbols 1382 | ############################### 1383 | boolean ::= "true" | "false" ws 1384 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1385 | null ::= "null" ws 1386 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1387 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1388 | ws ::= [ \t\n]* 1389 | "# 1390 | ) 1391 | } 1392 | 1393 | #[test] 1394 | fn simple_json_schema_array() { 1395 | let schema = r#" 1396 | { 1397 | "$schema": "https://json-schema.org/draft/2019-09/schema", 1398 | "type": "array", 1399 | "items": { 1400 | "type": "string" 1401 | } 1402 | } 1403 | "#; 1404 | let g = Grammar::from_json_schema(schema).unwrap(); 1405 | let s = g.to_string(); 1406 | 1407 | pretty_assertions::assert_eq!( 1408 | s, 1409 | r#"################################################ 1410 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1411 | # $schema: https://json-schema.org/draft/2019-09/schema 1412 | ################################################ 1413 | 1414 | symbol1-item ::= string ws 1415 | root ::= "[" ws symbol1-item* ws "]" ws 1416 | 1417 | ############################### 1418 | # Primitive value type symbols 1419 | ############################### 1420 | boolean ::= "true" | "false" ws 1421 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1422 | null ::= "null" ws 1423 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1424 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1425 | ws ::= [ \t\n]* 1426 | "# 1427 | ) 1428 | } 1429 | 1430 | #[test] 1431 | fn simple_json_kitchen_sink() { 1432 | // can not be created with schemars as far as I can tell because of `const` usage 1433 | let schema = r#" 1434 | { 1435 | "$schema": "https://json-schema.org/draft/2019-09/schema", 1436 | "type": "object", 1437 | "properties": { 1438 | "name": { 1439 | "type": "string" 1440 | }, 1441 | "age": { 1442 | "type": "number" 1443 | }, 1444 | "usesAI": { 1445 | "type": "boolean" 1446 | }, 1447 | "favoriteAnimal": { 1448 | "enum": [ 1449 | "dog", 1450 | "cat", 1451 | "none" 1452 | ] 1453 | }, 1454 | "currentAIModel": { 1455 | "oneOf": [ 1456 | { 1457 | "type": "object", 1458 | "properties": { 1459 | "type": { 1460 | "const": "hugging_face" 1461 | }, 1462 | "name": { 1463 | "type": "string" 1464 | } 1465 | } 1466 | }, 1467 | { 1468 | "type": "object", 1469 | "properties": { 1470 | "type": { 1471 | "const": "openai" 1472 | } 1473 | } 1474 | } 1475 | ] 1476 | }, 1477 | "favoriteColors": { 1478 | "type": "array", 1479 | "items": { 1480 | "type": "string" 1481 | } 1482 | } 1483 | } 1484 | } 1485 | "#; 1486 | let g = Grammar::from_json_schema(schema).unwrap(); 1487 | let s = g.to_string(); 1488 | 1489 | pretty_assertions::assert_eq!( 1490 | s, 1491 | r#"################################################ 1492 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1493 | # $schema: https://json-schema.org/draft/2019-09/schema 1494 | ################################################ 1495 | 1496 | symbol1-name-value ::= string ws 1497 | symbol2-age-value ::= number ws 1498 | symbol3-usesAI-value ::= boolean ws 1499 | symbol4-favoriteAnimal-value ::= "\"dog\"" | "\"cat\"" | "\"none\"" 1500 | symbol7-type-value ::= "\"hugging_face\"" 1501 | symbol8-name-value ::= string ws 1502 | symbol-6-oneof-0 ::= "{" ws "\"type\"" ws ":" ws symbol7-type-value "," ws "\"name\"" ws ":" ws symbol8-name-value "}" ws 1503 | symbol10-type-value ::= "\"openai\"" 1504 | symbol-9-oneof-1 ::= "{" ws "\"type\"" ws ":" ws symbol10-type-value "}" ws 1505 | symbol5-currentAIModel-value ::= symbol-6-oneof-0 | symbol-9-oneof-1 1506 | symbol12-item ::= string ws 1507 | symbol11-favoriteColors-value ::= "[" ws symbol12-item* ws "]" ws 1508 | root ::= "{" ws "\"name\"" ws ":" ws symbol1-name-value "," ws "\"age\"" ws ":" ws symbol2-age-value "," ws "\"usesAI\"" ws ":" ws symbol3-usesAI-value "," ws "\"favoriteAnimal\"" ws ":" ws symbol4-favoriteAnimal-value "," ws "\"currentAIModel\"" ws ":" ws symbol5-currentAIModel-value "," ws "\"favoriteColors\"" ws ":" ws symbol11-favoriteColors-value "}" ws 1509 | 1510 | ############################### 1511 | # Primitive value type symbols 1512 | ############################### 1513 | boolean ::= "true" | "false" ws 1514 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1515 | null ::= "null" ws 1516 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1517 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1518 | ws ::= [ \t\n]* 1519 | "# 1520 | ) 1521 | } 1522 | 1523 | #[test] 1524 | fn json_schema_property_underscores_are_sanitized_to_dashes() { 1525 | // schema with three properties containing underscores 1526 | let schema = r#" 1527 | { 1528 | "$schema": "https://json-schema.org/draft/2019-09/schema", 1529 | "type": "object", 1530 | "properties": { 1531 | "text_1": { "type": "string" }, 1532 | "text__1": { "type": "string" }, 1533 | "__text__": { "type": "string" } 1534 | } 1535 | } 1536 | "#; 1537 | let g = Grammar::from_json_schema(schema).unwrap(); 1538 | let s = g.to_string(); 1539 | 1540 | let expected = r#"################################################ 1541 | # DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR 1542 | # $schema: https://json-schema.org/draft/2019-09/schema 1543 | ################################################ 1544 | 1545 | symbol1-text-1-value ::= string ws 1546 | symbol2-text--1-value ::= string ws 1547 | symbol3---text---value ::= string ws 1548 | root ::= "{" ws "\"text_1\"" ws ":" ws symbol1-text-1-value "," ws "\"text__1\"" ws ":" ws symbol2-text--1-value "," ws "\"__text__\"" ws ":" ws symbol3---text---value "}" ws 1549 | 1550 | ############################### 1551 | # Primitive value type symbols 1552 | ############################### 1553 | boolean ::= "true" | "false" ws 1554 | integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) ws 1555 | null ::= "null" ws 1556 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1557 | string ::= "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 1558 | ws ::= [ \t\n]* 1559 | "#; 1560 | 1561 | pretty_assertions::assert_eq!(s, expected); 1562 | } 1563 | } 1564 | -------------------------------------------------------------------------------- /gbnf/src/lib.rs: -------------------------------------------------------------------------------- 1 | use json::JsonSchemaParseError; 2 | use json::parse_json_schema_to_grammar; 3 | use std::collections::BTreeMap; 4 | use std::fmt; 5 | use std::fmt::Display; 6 | use std::fmt::Formatter; 7 | 8 | pub mod json; 9 | 10 | // Represents a non-terminal symbol in the grammar. 11 | // examples: root, expr, term, ident, ws, num 12 | #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] 13 | pub struct NonTerminalSymbol { 14 | pub name: String, 15 | } 16 | 17 | // Represents a terminal symbol in the grammar. 18 | // examples: "=", "a", "b", "c", "1", "2", "3", " ", "true", "false", "\n" 19 | #[derive(Clone, Debug)] 20 | pub struct TerminalSymbol { 21 | pub value: String, 22 | } 23 | 24 | // Represents a production in the grammar. 25 | #[derive(Clone, Debug)] 26 | pub struct Production { 27 | pub items: Vec, 28 | } 29 | 30 | #[derive(Clone, Debug)] 31 | pub enum RepetitionType { 32 | // [a-z]+ 33 | ZeroOrMore, 34 | // [a-z]* 35 | OneOrMore, 36 | // [a-z]? 37 | ZeroOrOne, 38 | // [a-z] 39 | One, 40 | // [a-z]{m} 41 | Exact(usize), 42 | // [a-z]{m,} 43 | AtLeast(usize), 44 | // [a-z]{m,n}, 45 | Between((usize, usize)), 46 | // [a-z]{0,n} 47 | AtMost(usize), 48 | } 49 | 50 | #[derive(Clone, Debug)] 51 | pub enum CharacterSetItem { 52 | Character(char), 53 | Tab, 54 | NewLine, 55 | CharacterRange(char, char), 56 | Hex(String), 57 | Unicode(String), 58 | Return, 59 | Backslash, 60 | } 61 | 62 | #[derive(Clone, Debug)] 63 | pub struct CharacterSet { 64 | pub is_complement: bool, 65 | pub items: Vec, 66 | } 67 | 68 | #[derive(Clone, Debug)] 69 | pub struct ComplementCharacterSet { 70 | pub items: Vec, 71 | } 72 | 73 | // Represents different types of items that can be part of a production. 74 | #[derive(Clone, Debug)] 75 | pub enum ProductionItem { 76 | LineBreak, 77 | // example: # This is a comment 78 | Comment(String), 79 | Terminal(TerminalSymbol, RepetitionType), 80 | NonTerminal(NonTerminalSymbol, RepetitionType), 81 | // example: ( expr "=" ws term "\n" ) 82 | Group(Box, RepetitionType), 83 | // example: ident | num | "(" ws expr ")" ws 84 | OneOf(Vec), 85 | // examples: [a-z], [a-z0-9_]*, [0-9]+ 86 | CharacterSet(CharacterSet, RepetitionType), 87 | // Additional items as necessary 88 | } 89 | 90 | // Represents a grammar rule. 91 | // example: root ::= "yes' | "no" 92 | #[derive(Clone, Debug)] 93 | pub struct Rule { 94 | pub lhs: NonTerminalSymbol, 95 | pub rhs: Production, 96 | } 97 | 98 | // Represents an item in the grammar. 99 | #[derive(Clone, Debug)] 100 | pub enum GrammarItem { 101 | LineBreak, 102 | // example: # This is a comment 103 | Comment(String), 104 | Rule(Rule), 105 | } 106 | 107 | impl Display for GrammarItem { 108 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 109 | match self { 110 | GrammarItem::LineBreak => { 111 | write!(f, "\n") 112 | } 113 | GrammarItem::Comment(comment) => { 114 | write!(f, "#{}\n", comment) 115 | } 116 | GrammarItem::Rule(rule) => { 117 | write!(f, "{} ::= {}\n", rule.lhs, rule.rhs) 118 | } 119 | } 120 | } 121 | } 122 | 123 | // Represents the entire grammar. 124 | #[derive(Clone, Debug)] 125 | pub struct Grammar { 126 | pub items: Vec, 127 | // this container will later be linearized as rules, its purpose is 128 | // to hold grammar rules that occur multiple times to avoid duplicating 129 | // rules. This is very relevant for `primitive` type definitions 130 | pub recurring_items: BTreeMap, 131 | } 132 | 133 | impl Default for Grammar { 134 | fn default() -> Self { 135 | Self { 136 | items: Default::default(), 137 | recurring_items: Default::default(), 138 | } 139 | } 140 | } 141 | 142 | impl Display for NonTerminalSymbol { 143 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 144 | write!(f, "{}", self.name) 145 | } 146 | } 147 | 148 | impl Display for CharacterSet { 149 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 150 | let mut s = String::new(); 151 | s.push('['); 152 | if self.is_complement { 153 | s.push('^'); 154 | } 155 | for item in &self.items { 156 | match item { 157 | CharacterSetItem::Character(c) => { 158 | s.push(*c); 159 | } 160 | CharacterSetItem::CharacterRange(start, end) => { 161 | s.push(*start); 162 | s.push('-'); 163 | s.push(*end); 164 | } 165 | CharacterSetItem::Tab => { 166 | s.push_str("\\t"); 167 | } 168 | CharacterSetItem::NewLine => { 169 | s.push_str("\\n"); 170 | } 171 | CharacterSetItem::Hex(hex) => { 172 | s.push_str(&format!("\\x{}", hex)); 173 | } 174 | CharacterSetItem::Unicode(unicode) => { 175 | s.push_str(&format!("\\u{}", unicode)); 176 | } 177 | CharacterSetItem::Return => { 178 | s.push_str("\\r"); 179 | } 180 | CharacterSetItem::Backslash => { 181 | s.push_str("\\\\"); 182 | } 183 | } 184 | } 185 | s.push(']'); 186 | write!(f, "{}", s) 187 | } 188 | } 189 | 190 | impl Display for RepetitionType { 191 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 192 | match self { 193 | RepetitionType::ZeroOrMore => write!(f, "*"), 194 | RepetitionType::OneOrMore => write!(f, "+"), 195 | RepetitionType::ZeroOrOne => write!(f, "?"), 196 | RepetitionType::One => write!(f, ""), 197 | RepetitionType::Exact(num) => write!(f, "{{{num}}}"), 198 | RepetitionType::AtLeast(num) => write!(f, "{{{num},}}"), 199 | RepetitionType::Between((a, b)) => write!(f, "{{{a},{b}}}"), 200 | RepetitionType::AtMost(num) => write!(f, "{{0,{num}}}"), 201 | } 202 | } 203 | } 204 | 205 | impl Display for Production { 206 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 207 | let mut is_first_production = true; 208 | let mut s = String::new(); 209 | for item in &self.items { 210 | if is_first_production { 211 | is_first_production = false; 212 | } else { 213 | s.push(' '); 214 | } 215 | match item { 216 | ProductionItem::LineBreak => { 217 | s.push('\n'); 218 | } 219 | ProductionItem::Comment(comment) => { 220 | s.push_str(&format!("#{}\n", comment)); 221 | } 222 | ProductionItem::Terminal(terminal, rep) => { 223 | s.push_str(&format!("\"{}\"{}", terminal.value, rep)); 224 | } 225 | ProductionItem::NonTerminal(non_terminal, rep) => { 226 | s.push_str(format!("{}{}", &non_terminal.name.to_string(), rep).as_str()); 227 | } 228 | ProductionItem::Group(group, rep) => { 229 | s.push_str(&format!("({}){}", group, rep)); 230 | } 231 | ProductionItem::OneOf(one_of) => { 232 | let mut first = true; 233 | for production in one_of { 234 | if first { 235 | first = false; 236 | } else { 237 | s.push_str(" | "); 238 | } 239 | s.push_str(&format!("{}", production)); 240 | } 241 | } 242 | ProductionItem::CharacterSet(character_set, rep) => { 243 | s.push_str(&format!("{}{}", character_set, rep)); 244 | } 245 | } 246 | } 247 | write!(f, "{}", s) 248 | } 249 | } 250 | 251 | impl Display for Grammar { 252 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 253 | let mut s = String::new(); 254 | for item in &self.items { 255 | s.push_str(&item.to_string()) 256 | } 257 | for item in self.recurring_items.clone().into_iter().map(|(nts, prod)| { 258 | GrammarItem::Rule(Rule { 259 | lhs: nts, 260 | rhs: prod, 261 | }) 262 | }) { 263 | s.push_str(&item.to_string()); 264 | } 265 | write!(f, "{}", s) 266 | } 267 | } 268 | 269 | impl Grammar { 270 | pub fn from_json_schema(schema: &str) -> Result { 271 | let json = serde_json::from_str::(schema)?; 272 | Grammar::from_json_schema_value(&json) 273 | } 274 | pub fn from_json_schema_value(schema: &serde_json::Value) -> Result { 275 | let mut g = Grammar::default(); 276 | 277 | // add $id, $schema, title as commments at top of file 278 | g.items.push(GrammarItem::Comment( 279 | "###############################################".to_string(), 280 | )); 281 | g.items.push(GrammarItem::Comment( 282 | " DYNAMICALLY GENERATED JSON-SCHEMA GRAMMAR".to_string(), 283 | )); 284 | if let Some(id) = schema.get("$id") { 285 | g.items.push(GrammarItem::Comment(format!( 286 | " $id: {}", 287 | id.as_str().unwrap_or("") 288 | ))); 289 | } 290 | if let Some(schema) = schema.get("$schema") { 291 | g.items.push(GrammarItem::Comment(format!( 292 | " $schema: {}", 293 | schema.as_str().unwrap_or("") 294 | ))); 295 | } 296 | if let Some(title) = schema.get("title") { 297 | g.items.push(GrammarItem::Comment(format!( 298 | " title: {}", 299 | title.as_str().unwrap_or("") 300 | ))); 301 | } 302 | g.items.push(GrammarItem::Comment( 303 | "###############################################".to_string(), 304 | )); 305 | g.items.push(GrammarItem::LineBreak); 306 | 307 | parse_json_schema_to_grammar(schema, &mut g, "root".to_string(), 0)?; 308 | 309 | // add comment for primitives 310 | g.items.push(GrammarItem::LineBreak); 311 | g.items.push(GrammarItem::Comment( 312 | "##############################".to_string(), 313 | )); 314 | g.items.push(GrammarItem::Comment( 315 | " Primitive value type symbols".to_string(), 316 | )); 317 | g.items.push(GrammarItem::Comment( 318 | "##############################".to_string(), 319 | )); 320 | g.recurring_items.insert( 321 | NonTerminalSymbol { 322 | name: "null".to_string(), 323 | }, 324 | Production { 325 | items: vec![ 326 | ProductionItem::Terminal( 327 | TerminalSymbol { 328 | value: "null".to_string(), 329 | }, 330 | RepetitionType::One, 331 | ), 332 | ProductionItem::NonTerminal( 333 | NonTerminalSymbol { 334 | name: "ws".to_string(), 335 | }, 336 | RepetitionType::One, 337 | ), 338 | ], 339 | }, 340 | ); 341 | g.recurring_items.insert( 342 | NonTerminalSymbol { 343 | name: "boolean".to_string(), 344 | }, 345 | Production { 346 | items: vec![ 347 | ProductionItem::OneOf(vec![ 348 | Production { 349 | items: vec![ProductionItem::Terminal( 350 | TerminalSymbol { 351 | value: "true".to_string(), 352 | }, 353 | RepetitionType::One, 354 | )], 355 | }, 356 | Production { 357 | items: vec![ProductionItem::Terminal( 358 | TerminalSymbol { 359 | value: "false".to_string(), 360 | }, 361 | RepetitionType::One, 362 | )], 363 | }, 364 | ]), 365 | ProductionItem::NonTerminal( 366 | NonTerminalSymbol { 367 | name: "ws".to_string(), 368 | }, 369 | RepetitionType::One, 370 | ), 371 | ], 372 | }, 373 | ); 374 | g.recurring_items.insert( 375 | NonTerminalSymbol { 376 | name: "string".to_string(), 377 | }, 378 | Production { 379 | items: vec![ 380 | ProductionItem::Terminal( 381 | TerminalSymbol { 382 | value: r#"\""#.to_string(), 383 | }, 384 | RepetitionType::One, 385 | ), 386 | ProductionItem::Group( 387 | Box::new(Production { 388 | items: vec![ 389 | ProductionItem::OneOf(vec![ 390 | Production { 391 | items: vec![ProductionItem::CharacterSet( 392 | CharacterSet { 393 | is_complement: true, 394 | items: vec![ 395 | CharacterSetItem::Character('"'), 396 | CharacterSetItem::Backslash, 397 | ], 398 | }, 399 | RepetitionType::One, 400 | )], 401 | }, 402 | Production { 403 | items: vec![ProductionItem::Terminal( 404 | TerminalSymbol { 405 | value: r#"\\"#.to_string(), 406 | }, 407 | RepetitionType::One, 408 | )], 409 | }, 410 | ]), 411 | ProductionItem::Group( 412 | Box::new(Production { 413 | items: vec![ 414 | ProductionItem::OneOf(vec![ 415 | Production { 416 | items: vec![ProductionItem::CharacterSet( 417 | CharacterSet { 418 | is_complement: false, 419 | items: vec![ 420 | CharacterSetItem::Character('"'), 421 | CharacterSetItem::Backslash, 422 | CharacterSetItem::Character('/'), 423 | CharacterSetItem::Character('b'), 424 | CharacterSetItem::Character('f'), 425 | CharacterSetItem::Character('n'), 426 | CharacterSetItem::Character('r'), 427 | CharacterSetItem::Character('t'), 428 | ], 429 | }, 430 | RepetitionType::One, 431 | )], 432 | }, 433 | Production { 434 | items: vec![ProductionItem::Terminal( 435 | TerminalSymbol { 436 | value: "u".to_string(), 437 | }, 438 | RepetitionType::One, 439 | )], 440 | }, 441 | ]), 442 | ProductionItem::CharacterSet( 443 | CharacterSet { 444 | is_complement: false, 445 | items: vec![ 446 | CharacterSetItem::CharacterRange('0', '9'), 447 | CharacterSetItem::CharacterRange('a', 'f'), 448 | CharacterSetItem::CharacterRange('A', 'F'), 449 | ], 450 | }, 451 | RepetitionType::One, 452 | ), 453 | ProductionItem::CharacterSet( 454 | CharacterSet { 455 | is_complement: false, 456 | items: vec![ 457 | CharacterSetItem::CharacterRange('0', '9'), 458 | CharacterSetItem::CharacterRange('a', 'f'), 459 | CharacterSetItem::CharacterRange('A', 'F'), 460 | ], 461 | }, 462 | RepetitionType::One, 463 | ), 464 | ProductionItem::CharacterSet( 465 | CharacterSet { 466 | is_complement: false, 467 | items: vec![ 468 | CharacterSetItem::CharacterRange('0', '9'), 469 | CharacterSetItem::CharacterRange('a', 'f'), 470 | CharacterSetItem::CharacterRange('A', 'F'), 471 | ], 472 | }, 473 | RepetitionType::One, 474 | ), 475 | ProductionItem::CharacterSet( 476 | CharacterSet { 477 | is_complement: false, 478 | items: vec![ 479 | CharacterSetItem::CharacterRange('0', '9'), 480 | CharacterSetItem::CharacterRange('a', 'f'), 481 | CharacterSetItem::CharacterRange('A', 'F'), 482 | ], 483 | }, 484 | RepetitionType::One, 485 | ), 486 | ], 487 | }), 488 | RepetitionType::One, 489 | ), 490 | ], 491 | }), 492 | RepetitionType::ZeroOrMore, 493 | ), 494 | ProductionItem::Terminal( 495 | TerminalSymbol { 496 | value: r#"\""#.to_string(), 497 | }, 498 | RepetitionType::One, 499 | ), 500 | ProductionItem::NonTerminal( 501 | NonTerminalSymbol { 502 | name: "ws".to_string(), 503 | }, 504 | RepetitionType::One, 505 | ), 506 | ], 507 | }, 508 | ); 509 | g.recurring_items.insert( 510 | NonTerminalSymbol { 511 | name: "number".to_string(), 512 | }, 513 | Production { 514 | items: vec![ 515 | ProductionItem::Group( 516 | Box::new(Production { 517 | items: vec![ 518 | ProductionItem::Terminal( 519 | TerminalSymbol { 520 | value: "-".to_string(), 521 | }, 522 | RepetitionType::ZeroOrOne, 523 | ), 524 | ProductionItem::Group( 525 | Box::new(Production { 526 | items: vec![ProductionItem::OneOf(vec![ 527 | Production { 528 | items: vec![ProductionItem::CharacterSet( 529 | CharacterSet { 530 | is_complement: false, 531 | items: vec![ 532 | CharacterSetItem::CharacterRange( 533 | '0', '9', 534 | ), 535 | ], 536 | }, 537 | RepetitionType::One, 538 | )], 539 | }, 540 | Production { 541 | items: vec![ 542 | ProductionItem::CharacterSet( 543 | CharacterSet { 544 | is_complement: false, 545 | items: vec![ 546 | CharacterSetItem::CharacterRange( 547 | '1', '9', 548 | ), 549 | ], 550 | }, 551 | RepetitionType::One, 552 | ), 553 | ProductionItem::CharacterSet( 554 | CharacterSet { 555 | is_complement: false, 556 | items: vec![ 557 | CharacterSetItem::CharacterRange( 558 | '0', '9', 559 | ), 560 | ], 561 | }, 562 | RepetitionType::ZeroOrMore, 563 | ), 564 | ], 565 | }, 566 | ])], 567 | }), 568 | RepetitionType::One, 569 | ), 570 | ], 571 | }), 572 | RepetitionType::One, 573 | ), 574 | ProductionItem::Group( 575 | Box::new(Production { 576 | items: vec![ 577 | ProductionItem::Terminal( 578 | TerminalSymbol { 579 | value: ".".to_string(), 580 | }, 581 | RepetitionType::One, 582 | ), 583 | ProductionItem::CharacterSet( 584 | CharacterSet { 585 | is_complement: false, 586 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 587 | }, 588 | RepetitionType::OneOrMore, 589 | ), 590 | ], 591 | }), 592 | RepetitionType::ZeroOrOne, 593 | ), 594 | ProductionItem::Group( 595 | Box::new(Production { 596 | items: vec![ 597 | ProductionItem::CharacterSet( 598 | CharacterSet { 599 | is_complement: false, 600 | items: vec![ 601 | CharacterSetItem::Character('e'), 602 | CharacterSetItem::Character('E'), 603 | ], 604 | }, 605 | RepetitionType::One, 606 | ), 607 | ProductionItem::CharacterSet( 608 | CharacterSet { 609 | is_complement: false, 610 | items: vec![ 611 | CharacterSetItem::Character('-'), 612 | CharacterSetItem::Character('+'), 613 | ], 614 | }, 615 | RepetitionType::ZeroOrOne, 616 | ), 617 | ProductionItem::CharacterSet( 618 | CharacterSet { 619 | is_complement: false, 620 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 621 | }, 622 | RepetitionType::OneOrMore, 623 | ), 624 | ], 625 | }), 626 | RepetitionType::ZeroOrOne, 627 | ), 628 | ProductionItem::NonTerminal( 629 | NonTerminalSymbol { 630 | name: "ws".to_string(), 631 | }, 632 | RepetitionType::One, 633 | ), 634 | ], 635 | }, 636 | ); 637 | g.recurring_items.insert( 638 | NonTerminalSymbol { 639 | name: "integer".to_string(), 640 | }, 641 | Production { 642 | items: vec![ 643 | ProductionItem::Group( 644 | Box::new(Production { 645 | items: vec![ 646 | ProductionItem::Terminal( 647 | TerminalSymbol { 648 | value: "-".to_string(), 649 | }, 650 | RepetitionType::ZeroOrOne, 651 | ), 652 | ProductionItem::Group( 653 | Box::new(Production { 654 | items: vec![ProductionItem::OneOf(vec![ 655 | Production { 656 | items: vec![ProductionItem::CharacterSet( 657 | CharacterSet { 658 | is_complement: false, 659 | items: vec![ 660 | CharacterSetItem::CharacterRange( 661 | '0', '9', 662 | ), 663 | ], 664 | }, 665 | RepetitionType::One, 666 | )], 667 | }, 668 | Production { 669 | items: vec![ 670 | ProductionItem::CharacterSet( 671 | CharacterSet { 672 | is_complement: false, 673 | items: vec![ 674 | CharacterSetItem::CharacterRange( 675 | '1', '9', 676 | ), 677 | ], 678 | }, 679 | RepetitionType::One, 680 | ), 681 | ProductionItem::CharacterSet( 682 | CharacterSet { 683 | is_complement: false, 684 | items: vec![ 685 | CharacterSetItem::CharacterRange( 686 | '0', '9', 687 | ), 688 | ], 689 | }, 690 | RepetitionType::ZeroOrMore, 691 | ), 692 | ], 693 | }, 694 | ])], 695 | }), 696 | RepetitionType::One, 697 | ), 698 | ], 699 | }), 700 | RepetitionType::One, 701 | ), 702 | ProductionItem::NonTerminal( 703 | NonTerminalSymbol { 704 | name: "ws".to_string(), 705 | }, 706 | RepetitionType::One, 707 | ), 708 | ], 709 | }, 710 | ); 711 | g.recurring_items.insert( 712 | NonTerminalSymbol { 713 | name: "ws".to_string(), 714 | }, 715 | Production { 716 | items: vec![ProductionItem::CharacterSet( 717 | CharacterSet { 718 | is_complement: false, 719 | items: vec![ 720 | CharacterSetItem::Character(' '), 721 | CharacterSetItem::Tab, 722 | CharacterSetItem::NewLine, 723 | ], 724 | }, 725 | RepetitionType::ZeroOrMore, 726 | )], 727 | }, 728 | ); 729 | Ok(g) 730 | } 731 | } 732 | 733 | #[cfg(test)] 734 | mod tests { 735 | use super::*; 736 | 737 | #[test] 738 | fn simple_0() { 739 | // root ::= "yes" | "no" 740 | let g = Grammar { 741 | items: vec![GrammarItem::Rule(Rule { 742 | lhs: NonTerminalSymbol { 743 | name: "root".to_string(), 744 | }, 745 | rhs: Production { 746 | items: vec![ProductionItem::OneOf(vec![ 747 | Production { 748 | items: vec![ProductionItem::Terminal( 749 | TerminalSymbol { 750 | value: "yes".to_string(), 751 | }, 752 | RepetitionType::One, 753 | )], 754 | }, 755 | Production { 756 | items: vec![ProductionItem::Terminal( 757 | TerminalSymbol { 758 | value: "no".to_string(), 759 | }, 760 | RepetitionType::One, 761 | )], 762 | }, 763 | ])], 764 | }, 765 | })], 766 | ..Default::default() 767 | }; 768 | let s = g.to_string(); 769 | pretty_assertions::assert_eq!(s, "root ::= \"yes\" | \"no\"\n"); 770 | } 771 | 772 | #[test] 773 | fn simple_1() { 774 | //# This is a comment 775 | // root ::= answers 776 | // answers := "yes" | "no" 777 | 778 | let g = Grammar { 779 | items: vec![ 780 | GrammarItem::Comment(" This is a comment".to_string()), 781 | GrammarItem::Rule(Rule { 782 | lhs: NonTerminalSymbol { 783 | name: "root".to_string(), 784 | }, 785 | rhs: Production { 786 | items: vec![ProductionItem::NonTerminal( 787 | NonTerminalSymbol { 788 | name: "answers".to_string(), 789 | }, 790 | RepetitionType::One, 791 | )], 792 | }, 793 | }), 794 | GrammarItem::Rule(Rule { 795 | lhs: NonTerminalSymbol { 796 | name: "answers".to_string(), 797 | }, 798 | rhs: Production { 799 | items: vec![ProductionItem::OneOf(vec![ 800 | Production { 801 | items: vec![ProductionItem::Terminal( 802 | TerminalSymbol { 803 | value: "yes".to_string(), 804 | }, 805 | RepetitionType::One, 806 | )], 807 | }, 808 | Production { 809 | items: vec![ProductionItem::Terminal( 810 | TerminalSymbol { 811 | value: "no".to_string(), 812 | }, 813 | RepetitionType::One, 814 | )], 815 | }, 816 | ])], 817 | }, 818 | }), 819 | ], 820 | ..Default::default() 821 | }; 822 | let s = g.to_string(); 823 | pretty_assertions::assert_eq!( 824 | s, 825 | "# This is a comment\nroot ::= answers\nanswers ::= \"yes\" | \"no\"\n" 826 | ); 827 | } 828 | 829 | #[test] 830 | fn simple_2() { 831 | // # A probably incorrect grammar for japanese word 832 | // root ::= jp-char+ 833 | // jp-char ::= hiragana | katakana 834 | // hiragana ::= [ぁ-ゟ] 835 | // katakana ::= [ァ-ヿ] 836 | 837 | let g = Grammar { 838 | items: vec![ 839 | GrammarItem::Comment(" A probably incorrect grammar for japanese word".to_string()), 840 | GrammarItem::Rule(Rule { 841 | lhs: NonTerminalSymbol { 842 | name: "root".to_string(), 843 | }, 844 | rhs: Production { 845 | items: vec![ProductionItem::NonTerminal( 846 | NonTerminalSymbol { 847 | name: "jp-char".to_string(), 848 | }, 849 | RepetitionType::OneOrMore, 850 | )], 851 | }, 852 | }), 853 | GrammarItem::Rule(Rule { 854 | lhs: NonTerminalSymbol { 855 | name: "jp-char".to_string(), 856 | }, 857 | rhs: Production { 858 | items: vec![ProductionItem::OneOf(vec![ 859 | Production { 860 | items: vec![ProductionItem::NonTerminal( 861 | NonTerminalSymbol { 862 | name: "hiragana".to_string(), 863 | }, 864 | RepetitionType::One, 865 | )], 866 | }, 867 | Production { 868 | items: vec![ProductionItem::NonTerminal( 869 | NonTerminalSymbol { 870 | name: "katakana".to_string(), 871 | }, 872 | RepetitionType::One, 873 | )], 874 | }, 875 | ])], 876 | }, 877 | }), 878 | GrammarItem::Rule(Rule { 879 | lhs: NonTerminalSymbol { 880 | name: "hiragana".to_string(), 881 | }, 882 | rhs: Production { 883 | items: vec![ProductionItem::CharacterSet( 884 | CharacterSet { 885 | is_complement: false, 886 | items: vec![CharacterSetItem::CharacterRange('ぁ', 'ゟ')], 887 | }, 888 | RepetitionType::One, 889 | )], 890 | }, 891 | }), 892 | GrammarItem::Rule(Rule { 893 | lhs: NonTerminalSymbol { 894 | name: "katakana".to_string(), 895 | }, 896 | rhs: Production { 897 | items: vec![ProductionItem::CharacterSet( 898 | CharacterSet { 899 | is_complement: false, 900 | items: vec![CharacterSetItem::CharacterRange('ァ', 'ヿ')], 901 | }, 902 | RepetitionType::One, 903 | )], 904 | }, 905 | }), 906 | ], 907 | ..Default::default() 908 | }; 909 | let s = g.to_string(); 910 | pretty_assertions::assert_eq!( 911 | s, 912 | "# A probably incorrect grammar for japanese word\nroot ::= jp-char+\njp-char ::= hiragana | katakana\nhiragana ::= [ぁ-ゟ]\nkatakana ::= [ァ-ヿ]\n" 913 | ); 914 | } 915 | 916 | #[test] 917 | fn japanese() { 918 | // # A probably incorrect grammar for Japanese 919 | // root ::= jp-char+ ([ \t\n] jp-char+)* 920 | // jp-char ::= hiragana | katakana | punctuation | cjk 921 | // hiragana ::= [ぁ-ゟ] 922 | // katakana ::= [ァ-ヿ] 923 | // punctuation ::= [、-〾] 924 | // cjk ::= [一-鿿] 925 | 926 | let g = Grammar { 927 | items: vec![ 928 | GrammarItem::Comment(" A probably incorrect grammar for Japanese".to_string()), 929 | GrammarItem::Rule(Rule { 930 | lhs: NonTerminalSymbol { 931 | name: "root".to_string(), 932 | }, 933 | rhs: Production { 934 | items: vec![ 935 | ProductionItem::NonTerminal( 936 | NonTerminalSymbol { 937 | name: "jp-char".to_string(), 938 | }, 939 | RepetitionType::OneOrMore, 940 | ), 941 | ProductionItem::Group( 942 | Box::new(Production { 943 | items: vec![ 944 | ProductionItem::CharacterSet( 945 | CharacterSet { 946 | is_complement: false, 947 | items: vec![ 948 | CharacterSetItem::Character(' '), 949 | CharacterSetItem::Character('\t'), 950 | CharacterSetItem::Character('\n'), 951 | ], 952 | }, 953 | RepetitionType::One, 954 | ), 955 | ProductionItem::NonTerminal( 956 | NonTerminalSymbol { 957 | name: "jp-char".to_string(), 958 | }, 959 | RepetitionType::OneOrMore, 960 | ), 961 | ], 962 | }), 963 | RepetitionType::ZeroOrMore, 964 | ), 965 | ], 966 | }, 967 | }), 968 | GrammarItem::Rule(Rule { 969 | lhs: NonTerminalSymbol { 970 | name: "jp-char".to_string(), 971 | }, 972 | rhs: Production { 973 | items: vec![ProductionItem::OneOf(vec![ 974 | Production { 975 | items: vec![ProductionItem::NonTerminal( 976 | NonTerminalSymbol { 977 | name: "hiragana".to_string(), 978 | }, 979 | RepetitionType::One, 980 | )], 981 | }, 982 | Production { 983 | items: vec![ProductionItem::NonTerminal( 984 | NonTerminalSymbol { 985 | name: "katakana".to_string(), 986 | }, 987 | RepetitionType::One, 988 | )], 989 | }, 990 | Production { 991 | items: vec![ProductionItem::NonTerminal( 992 | NonTerminalSymbol { 993 | name: "punctuation".to_string(), 994 | }, 995 | RepetitionType::One, 996 | )], 997 | }, 998 | Production { 999 | items: vec![ProductionItem::NonTerminal( 1000 | NonTerminalSymbol { 1001 | name: "cjk".to_string(), 1002 | }, 1003 | RepetitionType::One, 1004 | )], 1005 | }, 1006 | ])], 1007 | }, 1008 | }), 1009 | GrammarItem::Rule(Rule { 1010 | lhs: NonTerminalSymbol { 1011 | name: "hiragana".to_string(), 1012 | }, 1013 | rhs: Production { 1014 | items: vec![ProductionItem::CharacterSet( 1015 | CharacterSet { 1016 | is_complement: false, 1017 | items: vec![CharacterSetItem::CharacterRange('ぁ', 'ゟ')], 1018 | }, 1019 | RepetitionType::One, 1020 | )], 1021 | }, 1022 | }), 1023 | GrammarItem::Rule(Rule { 1024 | lhs: NonTerminalSymbol { 1025 | name: "katakana".to_string(), 1026 | }, 1027 | rhs: Production { 1028 | items: vec![ProductionItem::CharacterSet( 1029 | CharacterSet { 1030 | is_complement: false, 1031 | items: vec![CharacterSetItem::CharacterRange('ァ', 'ヿ')], 1032 | }, 1033 | RepetitionType::One, 1034 | )], 1035 | }, 1036 | }), 1037 | GrammarItem::Rule(Rule { 1038 | lhs: NonTerminalSymbol { 1039 | name: "punctuation".to_string(), 1040 | }, 1041 | rhs: Production { 1042 | items: vec![ProductionItem::CharacterSet( 1043 | CharacterSet { 1044 | is_complement: false, 1045 | items: vec![CharacterSetItem::CharacterRange('、', '〾')], 1046 | }, 1047 | RepetitionType::One, 1048 | )], 1049 | }, 1050 | }), 1051 | GrammarItem::Rule(Rule { 1052 | lhs: NonTerminalSymbol { 1053 | name: "cjk".to_string(), 1054 | }, 1055 | rhs: Production { 1056 | items: vec![ProductionItem::CharacterSet( 1057 | CharacterSet { 1058 | is_complement: false, 1059 | items: vec![CharacterSetItem::CharacterRange('一', '鿿')], 1060 | }, 1061 | RepetitionType::One, 1062 | )], 1063 | }, 1064 | }), 1065 | ], 1066 | ..Default::default() 1067 | }; 1068 | let s = g.to_string(); 1069 | pretty_assertions::assert_eq!( 1070 | s, 1071 | "# A probably incorrect grammar for Japanese\nroot ::= jp-char+ ([ \t\n] jp-char+)*\njp-char ::= hiragana | katakana | punctuation | cjk\nhiragana ::= [ぁ-ゟ]\nkatakana ::= [ァ-ヿ]\npunctuation ::= [、-〾]\ncjk ::= [一-鿿]\n" 1072 | ); 1073 | } 1074 | 1075 | #[test] 1076 | fn arithmatic() { 1077 | // root ::= (expr "=" ws term "\n")+ 1078 | // expr ::= term ([-+*/] term)* 1079 | // term ::= ident | num | "(" ws expr ")" ws 1080 | // ident ::= [a-z] [a-z0-9_]* ws 1081 | // num ::= [0-9]+ ws 1082 | // ws ::= [ \t\n]* 1083 | 1084 | let g = Grammar { 1085 | items: vec![ 1086 | GrammarItem::Rule(Rule { 1087 | lhs: NonTerminalSymbol { 1088 | name: "root".to_string(), 1089 | }, 1090 | rhs: Production { 1091 | items: vec![ProductionItem::Group( 1092 | Box::new(Production { 1093 | items: vec![ 1094 | ProductionItem::NonTerminal( 1095 | NonTerminalSymbol { 1096 | name: "expr".to_string(), 1097 | }, 1098 | RepetitionType::One, 1099 | ), 1100 | ProductionItem::Terminal( 1101 | TerminalSymbol { 1102 | value: "=".to_string(), 1103 | }, 1104 | RepetitionType::One, 1105 | ), 1106 | ProductionItem::NonTerminal( 1107 | NonTerminalSymbol { 1108 | name: "ws".to_string(), 1109 | }, 1110 | RepetitionType::One, 1111 | ), 1112 | ProductionItem::NonTerminal( 1113 | NonTerminalSymbol { 1114 | name: "term".to_string(), 1115 | }, 1116 | RepetitionType::One, 1117 | ), 1118 | ProductionItem::Terminal( 1119 | TerminalSymbol { 1120 | value: "\\n".to_string(), 1121 | }, 1122 | RepetitionType::One, 1123 | ), 1124 | ], 1125 | }), 1126 | RepetitionType::OneOrMore, 1127 | )], 1128 | }, 1129 | }), 1130 | GrammarItem::Rule(Rule { 1131 | lhs: NonTerminalSymbol { 1132 | name: "expr".to_string(), 1133 | }, 1134 | rhs: Production { 1135 | items: vec![ 1136 | ProductionItem::NonTerminal( 1137 | NonTerminalSymbol { 1138 | name: "term".to_string(), 1139 | }, 1140 | RepetitionType::One, 1141 | ), 1142 | ProductionItem::Group( 1143 | Box::new(Production { 1144 | items: vec![ 1145 | ProductionItem::CharacterSet( 1146 | CharacterSet { 1147 | is_complement: false, 1148 | items: vec![ 1149 | CharacterSetItem::Character('-'), 1150 | CharacterSetItem::Character('+'), 1151 | CharacterSetItem::Character('*'), 1152 | CharacterSetItem::Character('/'), 1153 | ], 1154 | }, 1155 | RepetitionType::One, 1156 | ), 1157 | ProductionItem::NonTerminal( 1158 | NonTerminalSymbol { 1159 | name: "term".to_string(), 1160 | }, 1161 | RepetitionType::One, 1162 | ), 1163 | ], 1164 | }), 1165 | RepetitionType::ZeroOrMore, 1166 | ), 1167 | ], 1168 | }, 1169 | }), 1170 | GrammarItem::Rule(Rule { 1171 | lhs: NonTerminalSymbol { 1172 | name: "term".to_string(), 1173 | }, 1174 | rhs: Production { 1175 | items: vec![ProductionItem::OneOf(vec![ 1176 | Production { 1177 | items: vec![ProductionItem::NonTerminal( 1178 | NonTerminalSymbol { 1179 | name: "ident".to_string(), 1180 | }, 1181 | RepetitionType::One, 1182 | )], 1183 | }, 1184 | Production { 1185 | items: vec![ProductionItem::NonTerminal( 1186 | NonTerminalSymbol { 1187 | name: "num".to_string(), 1188 | }, 1189 | RepetitionType::One, 1190 | )], 1191 | }, 1192 | Production { 1193 | items: vec![ 1194 | ProductionItem::Terminal( 1195 | TerminalSymbol { 1196 | value: "(".to_string(), 1197 | }, 1198 | RepetitionType::One, 1199 | ), 1200 | ProductionItem::NonTerminal( 1201 | NonTerminalSymbol { 1202 | name: "ws".to_string(), 1203 | }, 1204 | RepetitionType::One, 1205 | ), 1206 | ProductionItem::NonTerminal( 1207 | NonTerminalSymbol { 1208 | name: "expr".to_string(), 1209 | }, 1210 | RepetitionType::One, 1211 | ), 1212 | ProductionItem::Terminal( 1213 | TerminalSymbol { 1214 | value: ")".to_string(), 1215 | }, 1216 | RepetitionType::One, 1217 | ), 1218 | ProductionItem::NonTerminal( 1219 | NonTerminalSymbol { 1220 | name: "ws".to_string(), 1221 | }, 1222 | RepetitionType::One, 1223 | ), 1224 | ], 1225 | }, 1226 | ])], 1227 | }, 1228 | }), 1229 | GrammarItem::Rule(Rule { 1230 | lhs: NonTerminalSymbol { 1231 | name: "ident".to_string(), 1232 | }, 1233 | rhs: Production { 1234 | items: vec![ 1235 | ProductionItem::CharacterSet( 1236 | CharacterSet { 1237 | is_complement: false, 1238 | items: vec![CharacterSetItem::CharacterRange('a', 'z')], 1239 | }, 1240 | RepetitionType::One, 1241 | ), 1242 | ProductionItem::CharacterSet( 1243 | CharacterSet { 1244 | is_complement: false, 1245 | items: vec![ 1246 | CharacterSetItem::CharacterRange('a', 'z'), 1247 | CharacterSetItem::CharacterRange('0', '9'), 1248 | CharacterSetItem::Character('_'), 1249 | ], 1250 | }, 1251 | RepetitionType::ZeroOrMore, 1252 | ), 1253 | ProductionItem::NonTerminal( 1254 | NonTerminalSymbol { 1255 | name: "ws".to_string(), 1256 | }, 1257 | RepetitionType::One, 1258 | ), 1259 | ], 1260 | }, 1261 | }), 1262 | GrammarItem::Rule(Rule { 1263 | lhs: NonTerminalSymbol { 1264 | name: "num".to_string(), 1265 | }, 1266 | rhs: Production { 1267 | items: vec![ 1268 | ProductionItem::CharacterSet( 1269 | CharacterSet { 1270 | is_complement: false, 1271 | items: vec![CharacterSetItem::CharacterRange('0', '9')], 1272 | }, 1273 | RepetitionType::OneOrMore, 1274 | ), 1275 | ProductionItem::NonTerminal( 1276 | NonTerminalSymbol { 1277 | name: "ws".to_string(), 1278 | }, 1279 | RepetitionType::One, 1280 | ), 1281 | ], 1282 | }, 1283 | }), 1284 | GrammarItem::Rule(Rule { 1285 | lhs: NonTerminalSymbol { 1286 | name: "ws".to_string(), 1287 | }, 1288 | rhs: Production { 1289 | items: vec![ProductionItem::CharacterSet( 1290 | CharacterSet { 1291 | is_complement: false, 1292 | items: vec![ 1293 | CharacterSetItem::Character(' '), 1294 | CharacterSetItem::Tab, 1295 | CharacterSetItem::NewLine, 1296 | ], 1297 | }, 1298 | RepetitionType::ZeroOrMore, 1299 | )], 1300 | }, 1301 | }), 1302 | ], 1303 | ..Default::default() 1304 | }; 1305 | let s = g.to_string(); 1306 | pretty_assertions::assert_eq!( 1307 | s, 1308 | "root ::= (expr \"=\" ws term \"\\n\")+\nexpr ::= term ([-+*/] term)*\nterm ::= ident | num | \"(\" ws expr \")\" ws\nident ::= [a-z] [a-z0-9_]* ws\nnum ::= [0-9]+ ws\nws ::= [ \\t\\n]*\n" 1309 | ); 1310 | } 1311 | 1312 | #[test] 1313 | fn chess() { 1314 | // # Specifies chess moves as a list in algebraic notation, using PGN conventions 1315 | // # Force first move to "1. ", then any 1-2 digit number after, relying on model to follow the pattern 1316 | // root ::= "1. " move " " move "\n" ([1-9] [0-9]? ". " move " " move "\n")+ 1317 | // move ::= (pawn | nonpawn | castle) [+#]? 1318 | // # piece type, optional file/rank, optional capture, dest file & rank 1319 | // nonpawn ::= [NBKQR] [a-h]? [1-8]? "x"? [a-h] [1-8] 1320 | // # optional file & capture, dest file & rank, optional promotion 1321 | // pawn ::= ([a-h] "x")? [a-h] [1-8] ("=" [NBKQR])? 1322 | // castle ::= "O-O" "-O"? 1323 | 1324 | let g = Grammar{ 1325 | items: vec![ 1326 | GrammarItem::Comment(" Specifies chess moves as a list in algebraic notation, using PGN conventions".to_string()), 1327 | GrammarItem::Comment(" Force first move to \"1. \", then any 1-2 digit number after, relying on model to follow the pattern".to_string()), 1328 | GrammarItem::Rule(Rule{ 1329 | lhs: NonTerminalSymbol{ 1330 | name: "root".to_string(), 1331 | }, 1332 | rhs: Production{ 1333 | items: vec![ 1334 | ProductionItem::Terminal(TerminalSymbol{ 1335 | value: "1. ".to_string(), 1336 | }, RepetitionType::One), 1337 | ProductionItem::NonTerminal(NonTerminalSymbol{ 1338 | name: "move".to_string(), 1339 | }, RepetitionType::One), 1340 | ProductionItem::Terminal(TerminalSymbol{ 1341 | value: " ".to_string(), 1342 | }, RepetitionType::One), 1343 | ProductionItem::NonTerminal(NonTerminalSymbol{ 1344 | name: "move".to_string(), 1345 | }, RepetitionType::One), 1346 | ProductionItem::Terminal(TerminalSymbol{ 1347 | value: "\\n".to_string(), 1348 | }, RepetitionType::One), 1349 | ProductionItem::Group(Box::new(Production{ 1350 | items: vec![ 1351 | ProductionItem::CharacterSet(CharacterSet{ 1352 | is_complement: false, 1353 | items: vec![ 1354 | CharacterSetItem::CharacterRange('1', '9'), 1355 | ], 1356 | }, RepetitionType::One), 1357 | ProductionItem::CharacterSet(CharacterSet{ 1358 | is_complement: false, 1359 | items: vec![ 1360 | CharacterSetItem::CharacterRange('0', '9'), 1361 | ], 1362 | }, RepetitionType::ZeroOrOne), 1363 | ProductionItem::Terminal(TerminalSymbol{ 1364 | value: ". ".to_string(), 1365 | }, RepetitionType::One), 1366 | ProductionItem::NonTerminal(NonTerminalSymbol{ 1367 | name: "move".to_string(), 1368 | }, RepetitionType::One), 1369 | ProductionItem::Terminal(TerminalSymbol{ 1370 | value: " ".to_string(), 1371 | }, RepetitionType::One), 1372 | ProductionItem::NonTerminal(NonTerminalSymbol{ 1373 | name: "move".to_string(), 1374 | }, RepetitionType::One), 1375 | ProductionItem::Terminal(TerminalSymbol{ 1376 | value: "\\n".to_string(), 1377 | }, RepetitionType::One), 1378 | ], 1379 | }), RepetitionType::OneOrMore), 1380 | ], 1381 | }, 1382 | }), 1383 | GrammarItem::Rule(Rule{ 1384 | lhs: NonTerminalSymbol{ 1385 | name: "move".to_string(), 1386 | }, 1387 | rhs: Production{ 1388 | items: vec![ 1389 | ProductionItem::Group(Box::new(Production{ 1390 | items: vec![ 1391 | ProductionItem::OneOf(vec![ 1392 | Production{ 1393 | items: vec![ 1394 | ProductionItem::NonTerminal(NonTerminalSymbol{ 1395 | name: "pawn".to_string(), 1396 | }, RepetitionType::One), 1397 | ], 1398 | }, 1399 | Production{ 1400 | items: vec![ 1401 | ProductionItem::NonTerminal(NonTerminalSymbol{ 1402 | name: "nonpawn".to_string(), 1403 | }, RepetitionType::One), 1404 | ], 1405 | }, 1406 | Production{ 1407 | items: vec![ 1408 | ProductionItem::NonTerminal(NonTerminalSymbol{ 1409 | name: "castle".to_string(), 1410 | }, RepetitionType::One), 1411 | ], 1412 | }, 1413 | ]), 1414 | ], 1415 | }), RepetitionType::One), 1416 | ProductionItem::CharacterSet(CharacterSet{ 1417 | is_complement: false, 1418 | items: vec![ 1419 | CharacterSetItem::Character('+'), 1420 | CharacterSetItem::Character('#'), 1421 | ], 1422 | }, RepetitionType::ZeroOrOne), 1423 | ], 1424 | }, 1425 | }), 1426 | GrammarItem::Comment(" piece type, optional file/rank, optional capture, dest file & rank".to_string()), 1427 | GrammarItem::Rule(Rule{ 1428 | lhs: NonTerminalSymbol{ 1429 | name: "nonpawn".to_string(), 1430 | }, 1431 | rhs: Production{ 1432 | items: vec![ 1433 | ProductionItem::CharacterSet(CharacterSet{ 1434 | is_complement: false, 1435 | items: vec![ 1436 | CharacterSetItem::Character('N'), 1437 | CharacterSetItem::Character('B'), 1438 | CharacterSetItem::Character('K'), 1439 | CharacterSetItem::Character('Q'), 1440 | CharacterSetItem::Character('R'), 1441 | ], 1442 | }, RepetitionType::One), 1443 | ProductionItem::CharacterSet(CharacterSet{ 1444 | is_complement: false, 1445 | items: vec![ 1446 | CharacterSetItem::CharacterRange('a', 'h'), 1447 | ], 1448 | }, RepetitionType::ZeroOrOne), 1449 | ProductionItem::CharacterSet(CharacterSet{ 1450 | is_complement: false, 1451 | items: vec![ 1452 | CharacterSetItem::CharacterRange('1', '8'), 1453 | ], 1454 | }, RepetitionType::ZeroOrOne), 1455 | ProductionItem::Terminal(TerminalSymbol{ 1456 | value: "x".to_string(), 1457 | }, RepetitionType::ZeroOrOne), 1458 | ProductionItem::CharacterSet(CharacterSet{ 1459 | is_complement: false, 1460 | items: vec![ 1461 | CharacterSetItem::CharacterRange('a', 'h'), 1462 | ], 1463 | }, RepetitionType::One), 1464 | ProductionItem::CharacterSet(CharacterSet{ 1465 | is_complement: false, 1466 | items: vec![ 1467 | CharacterSetItem::CharacterRange('1', '8'), 1468 | ], 1469 | }, RepetitionType::One), 1470 | ], 1471 | }, 1472 | }), 1473 | GrammarItem::Comment(" optional file & capture, dest file & rank, optional promotion".to_string()), 1474 | GrammarItem::Rule(Rule{ 1475 | lhs: NonTerminalSymbol{ 1476 | name: "pawn".to_string(), 1477 | }, 1478 | rhs: Production{ 1479 | items: vec![ 1480 | ProductionItem::Group(Box::new(Production{ 1481 | items: vec![ 1482 | ProductionItem::CharacterSet(CharacterSet{ 1483 | is_complement: false, 1484 | items: vec![ 1485 | CharacterSetItem::CharacterRange('a', 'h'), 1486 | ], 1487 | }, RepetitionType::One), 1488 | ProductionItem::Terminal(TerminalSymbol{ 1489 | value: "x".to_string(), 1490 | }, RepetitionType::One), 1491 | ], 1492 | }), RepetitionType::ZeroOrOne), 1493 | ProductionItem::CharacterSet(CharacterSet{ 1494 | is_complement: false, 1495 | items: vec![ 1496 | CharacterSetItem::CharacterRange('a', 'h'), 1497 | ], 1498 | }, RepetitionType::One), 1499 | ProductionItem::CharacterSet(CharacterSet{ 1500 | is_complement: false, 1501 | items: vec![ 1502 | CharacterSetItem::CharacterRange('1', '8'), 1503 | ], 1504 | }, RepetitionType::One), 1505 | ProductionItem::Group(Box::new(Production{ 1506 | items: vec![ 1507 | ProductionItem::Terminal(TerminalSymbol{ 1508 | value: "=".to_string(), 1509 | }, RepetitionType::One), 1510 | ProductionItem::CharacterSet(CharacterSet{ 1511 | is_complement: false, 1512 | items: vec![ 1513 | CharacterSetItem::Character('N'), 1514 | CharacterSetItem::Character('B'), 1515 | CharacterSetItem::Character('K'), 1516 | CharacterSetItem::Character('Q'), 1517 | CharacterSetItem::Character('R'), 1518 | ], 1519 | }, RepetitionType::One), 1520 | ], 1521 | }), RepetitionType::ZeroOrOne), 1522 | ], 1523 | }, 1524 | }), 1525 | GrammarItem::Rule(Rule{ 1526 | lhs: NonTerminalSymbol{ 1527 | name: "castle".to_string(), 1528 | }, 1529 | rhs: Production{ 1530 | items: vec![ 1531 | ProductionItem::Terminal(TerminalSymbol{ 1532 | value: "O-O".to_string(), 1533 | }, RepetitionType::One), 1534 | ProductionItem::Terminal(TerminalSymbol{ 1535 | value: "-O".to_string(), 1536 | }, RepetitionType::ZeroOrOne), 1537 | ], 1538 | }, 1539 | }), 1540 | ], 1541 | ..Default::default() 1542 | }; 1543 | let s = g.to_string(); 1544 | pretty_assertions::assert_eq!( 1545 | s, 1546 | "# Specifies chess moves as a list in algebraic notation, using PGN conventions\n# Force first move to \"1. \", then any 1-2 digit number after, relying on model to follow the pattern\nroot ::= \"1. \" move \" \" move \"\\n\" ([1-9] [0-9]? \". \" move \" \" move \"\\n\")+\nmove ::= (pawn | nonpawn | castle) [+#]?\n# piece type, optional file/rank, optional capture, dest file & rank\nnonpawn ::= [NBKQR] [a-h]? [1-8]? \"x\"? [a-h] [1-8]\n# optional file & capture, dest file & rank, optional promotion\npawn ::= ([a-h] \"x\")? [a-h] [1-8] (\"=\" [NBKQR])?\ncastle ::= \"O-O\" \"-O\"?\n" 1547 | ); 1548 | } 1549 | 1550 | #[test] 1551 | fn list() { 1552 | // root ::= item+ 1553 | // # Excludes various line break characters 1554 | // item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n" 1555 | 1556 | let g = Grammar { 1557 | items: vec![ 1558 | GrammarItem::Rule(Rule { 1559 | lhs: NonTerminalSymbol { 1560 | name: "root".to_string(), 1561 | }, 1562 | rhs: Production { 1563 | items: vec![ProductionItem::NonTerminal( 1564 | NonTerminalSymbol { 1565 | name: "item".to_string(), 1566 | }, 1567 | RepetitionType::OneOrMore, 1568 | )], 1569 | }, 1570 | }), 1571 | GrammarItem::Comment(" Excludes various line break characters".to_string()), 1572 | GrammarItem::Rule(Rule { 1573 | lhs: NonTerminalSymbol { 1574 | name: "item".to_string(), 1575 | }, 1576 | rhs: Production { 1577 | items: vec![ 1578 | ProductionItem::Terminal( 1579 | TerminalSymbol { 1580 | value: "- ".to_string(), 1581 | }, 1582 | RepetitionType::One, 1583 | ), 1584 | ProductionItem::CharacterSet( 1585 | CharacterSet { 1586 | is_complement: true, 1587 | items: vec![ 1588 | CharacterSetItem::Return, 1589 | CharacterSetItem::NewLine, 1590 | CharacterSetItem::Hex("0b".to_string()), 1591 | CharacterSetItem::Hex("0c".to_string()), 1592 | CharacterSetItem::Hex("85".to_string()), 1593 | CharacterSetItem::Unicode("2028".to_string()), 1594 | CharacterSetItem::Unicode("2029".to_string()), 1595 | ], 1596 | }, 1597 | RepetitionType::OneOrMore, 1598 | ), 1599 | ProductionItem::Terminal( 1600 | TerminalSymbol { 1601 | value: "\\n".to_string(), 1602 | }, 1603 | RepetitionType::One, 1604 | ), 1605 | ], 1606 | }, 1607 | }), 1608 | ], 1609 | ..Default::default() 1610 | }; 1611 | 1612 | let s = g.to_string(); 1613 | pretty_assertions::assert_eq!( 1614 | s, 1615 | "root ::= item+\n# Excludes various line break characters\nitem ::= \"- \" [^\\r\\n\\x0b\\x0c\\x85\\u2028\\u2029]+ \"\\n\"\n" 1616 | ); 1617 | } 1618 | 1619 | #[test] 1620 | fn json() { 1621 | // root ::= object 1622 | // value ::= object | array | string | number | ("true" | "false" | "null") ws 1623 | // object ::= 1624 | // "{" ws ( 1625 | // string ":" ws value 1626 | // ("," ws string ":" ws value)* 1627 | // )? "}" ws 1628 | //array ::= 1629 | // "[" ws ( 1630 | // value 1631 | // ("," ws value)* 1632 | // )? "]" ws 1633 | //string ::= 1634 | // "\"" ( 1635 | // [^"\\] | 1636 | // "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes 1637 | // )* "\"" ws 1638 | //number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 1639 | //# Optional space: by convention, applied in this grammar after literal chars when allowed 1640 | //ws ::= ([ \t\n] ws)? 1641 | 1642 | let g = Grammar { 1643 | items: vec![ 1644 | GrammarItem::Rule(Rule { 1645 | lhs: NonTerminalSymbol { 1646 | name: "root".to_string(), 1647 | }, 1648 | rhs: Production { 1649 | items: vec![ProductionItem::NonTerminal( 1650 | NonTerminalSymbol { 1651 | name: "object".to_string(), 1652 | }, 1653 | RepetitionType::One, 1654 | )], 1655 | }, 1656 | }), 1657 | GrammarItem::Rule(Rule { 1658 | lhs: NonTerminalSymbol { 1659 | name: "value".to_string(), 1660 | }, 1661 | rhs: Production { 1662 | items: vec![ 1663 | ProductionItem::OneOf(vec![ 1664 | Production { 1665 | items: vec![ProductionItem::NonTerminal( 1666 | NonTerminalSymbol { 1667 | name: "object".to_string(), 1668 | }, 1669 | RepetitionType::One, 1670 | )], 1671 | }, 1672 | Production { 1673 | items: vec![ProductionItem::NonTerminal( 1674 | NonTerminalSymbol { 1675 | name: "array".to_string(), 1676 | }, 1677 | RepetitionType::One, 1678 | )], 1679 | }, 1680 | Production { 1681 | items: vec![ProductionItem::NonTerminal( 1682 | NonTerminalSymbol { 1683 | name: "string".to_string(), 1684 | }, 1685 | RepetitionType::One, 1686 | )], 1687 | }, 1688 | Production { 1689 | items: vec![ProductionItem::NonTerminal( 1690 | NonTerminalSymbol { 1691 | name: "number".to_string(), 1692 | }, 1693 | RepetitionType::One, 1694 | )], 1695 | }, 1696 | Production { 1697 | items: vec![ProductionItem::Group( 1698 | Box::new(Production { 1699 | items: vec![ProductionItem::OneOf(vec![ 1700 | Production { 1701 | items: vec![ProductionItem::Terminal( 1702 | TerminalSymbol { 1703 | value: "true".to_string(), 1704 | }, 1705 | RepetitionType::One, 1706 | )], 1707 | }, 1708 | Production { 1709 | items: vec![ProductionItem::Terminal( 1710 | TerminalSymbol { 1711 | value: "false".to_string(), 1712 | }, 1713 | RepetitionType::One, 1714 | )], 1715 | }, 1716 | Production { 1717 | items: vec![ProductionItem::Terminal( 1718 | TerminalSymbol { 1719 | value: "null".to_string(), 1720 | }, 1721 | RepetitionType::One, 1722 | )], 1723 | }, 1724 | ])], 1725 | }), 1726 | RepetitionType::One, 1727 | )], 1728 | }, 1729 | ]), 1730 | ProductionItem::NonTerminal( 1731 | NonTerminalSymbol { 1732 | name: "ws".to_string(), 1733 | }, 1734 | RepetitionType::One, 1735 | ), 1736 | ], 1737 | }, 1738 | }), 1739 | GrammarItem::Rule(Rule { 1740 | lhs: NonTerminalSymbol { 1741 | name: "object".to_string(), 1742 | }, 1743 | rhs: Production { 1744 | items: vec![ 1745 | ProductionItem::Terminal( 1746 | TerminalSymbol { 1747 | value: "{".to_string(), 1748 | }, 1749 | RepetitionType::One, 1750 | ), 1751 | ProductionItem::NonTerminal( 1752 | NonTerminalSymbol { 1753 | name: "ws".to_string(), 1754 | }, 1755 | RepetitionType::One, 1756 | ), 1757 | ProductionItem::Group( 1758 | Box::new(Production { 1759 | items: vec![ 1760 | ProductionItem::NonTerminal( 1761 | NonTerminalSymbol { 1762 | name: "string".to_string(), 1763 | }, 1764 | RepetitionType::One, 1765 | ), 1766 | ProductionItem::Terminal( 1767 | TerminalSymbol { 1768 | value: ":".to_string(), 1769 | }, 1770 | RepetitionType::One, 1771 | ), 1772 | ProductionItem::NonTerminal( 1773 | NonTerminalSymbol { 1774 | name: "ws".to_string(), 1775 | }, 1776 | RepetitionType::One, 1777 | ), 1778 | ProductionItem::NonTerminal( 1779 | NonTerminalSymbol { 1780 | name: "value".to_string(), 1781 | }, 1782 | RepetitionType::One, 1783 | ), 1784 | ProductionItem::Group( 1785 | Box::new(Production { 1786 | items: vec![ 1787 | ProductionItem::Terminal( 1788 | TerminalSymbol { 1789 | value: ",".to_string(), 1790 | }, 1791 | RepetitionType::One, 1792 | ), 1793 | ProductionItem::NonTerminal( 1794 | NonTerminalSymbol { 1795 | name: "ws".to_string(), 1796 | }, 1797 | RepetitionType::One, 1798 | ), 1799 | ProductionItem::NonTerminal( 1800 | NonTerminalSymbol { 1801 | name: "string".to_string(), 1802 | }, 1803 | RepetitionType::One, 1804 | ), 1805 | ProductionItem::Terminal( 1806 | TerminalSymbol { 1807 | value: ":".to_string(), 1808 | }, 1809 | RepetitionType::One, 1810 | ), 1811 | ProductionItem::NonTerminal( 1812 | NonTerminalSymbol { 1813 | name: "ws".to_string(), 1814 | }, 1815 | RepetitionType::One, 1816 | ), 1817 | ProductionItem::NonTerminal( 1818 | NonTerminalSymbol { 1819 | name: "value".to_string(), 1820 | }, 1821 | RepetitionType::One, 1822 | ), 1823 | ], 1824 | }), 1825 | RepetitionType::ZeroOrMore, 1826 | ), 1827 | ], 1828 | }), 1829 | RepetitionType::ZeroOrOne, 1830 | ), 1831 | ProductionItem::Terminal( 1832 | TerminalSymbol { 1833 | value: "}".to_string(), 1834 | }, 1835 | RepetitionType::One, 1836 | ), 1837 | ProductionItem::NonTerminal( 1838 | NonTerminalSymbol { 1839 | name: "ws".to_string(), 1840 | }, 1841 | RepetitionType::One, 1842 | ), 1843 | ], 1844 | }, 1845 | }), 1846 | GrammarItem::Rule(Rule { 1847 | lhs: NonTerminalSymbol { 1848 | name: "array".to_string(), 1849 | }, 1850 | rhs: Production { 1851 | items: vec![ 1852 | ProductionItem::Terminal( 1853 | TerminalSymbol { 1854 | value: "[".to_string(), 1855 | }, 1856 | RepetitionType::One, 1857 | ), 1858 | ProductionItem::NonTerminal( 1859 | NonTerminalSymbol { 1860 | name: "ws".to_string(), 1861 | }, 1862 | RepetitionType::One, 1863 | ), 1864 | ProductionItem::Group( 1865 | Box::new(Production { 1866 | items: vec![ 1867 | ProductionItem::NonTerminal( 1868 | NonTerminalSymbol { 1869 | name: "value".to_string(), 1870 | }, 1871 | RepetitionType::One, 1872 | ), 1873 | ProductionItem::Group( 1874 | Box::new(Production { 1875 | items: vec![ 1876 | ProductionItem::Terminal( 1877 | TerminalSymbol { 1878 | value: ",".to_string(), 1879 | }, 1880 | RepetitionType::One, 1881 | ), 1882 | ProductionItem::NonTerminal( 1883 | NonTerminalSymbol { 1884 | name: "ws".to_string(), 1885 | }, 1886 | RepetitionType::One, 1887 | ), 1888 | ProductionItem::NonTerminal( 1889 | NonTerminalSymbol { 1890 | name: "value".to_string(), 1891 | }, 1892 | RepetitionType::One, 1893 | ), 1894 | ], 1895 | }), 1896 | RepetitionType::ZeroOrMore, 1897 | ), 1898 | ], 1899 | }), 1900 | RepetitionType::ZeroOrOne, 1901 | ), 1902 | ProductionItem::Terminal( 1903 | TerminalSymbol { 1904 | value: "]".to_string(), 1905 | }, 1906 | RepetitionType::One, 1907 | ), 1908 | ProductionItem::NonTerminal( 1909 | NonTerminalSymbol { 1910 | name: "ws".to_string(), 1911 | }, 1912 | RepetitionType::One, 1913 | ), 1914 | ], 1915 | }, 1916 | }), 1917 | GrammarItem::Rule(Rule { 1918 | lhs: NonTerminalSymbol { 1919 | name: "string".to_string(), 1920 | }, 1921 | rhs: Production { 1922 | items: vec![ 1923 | ProductionItem::Terminal( 1924 | TerminalSymbol { 1925 | value: r#"\""#.to_string(), 1926 | }, 1927 | RepetitionType::One, 1928 | ), 1929 | ProductionItem::Group( 1930 | Box::new(Production { 1931 | items: vec![ 1932 | ProductionItem::OneOf(vec![ 1933 | Production { 1934 | items: vec![ 1935 | ProductionItem::CharacterSet( 1936 | CharacterSet { 1937 | is_complement: true, 1938 | items: vec![ 1939 | CharacterSetItem::Character('"'), 1940 | CharacterSetItem::Backslash, 1941 | ], 1942 | }, 1943 | RepetitionType::One, 1944 | ) 1945 | ], 1946 | }, 1947 | Production { 1948 | items: vec![ 1949 | ProductionItem::Terminal( 1950 | TerminalSymbol { 1951 | value: r#"\\"#.to_string(), 1952 | }, 1953 | RepetitionType::One, 1954 | ), 1955 | ], 1956 | }, 1957 | ]), 1958 | ProductionItem::Group(Box::new(Production{ 1959 | items: vec![ 1960 | ProductionItem::OneOf( 1961 | vec![ 1962 | Production { 1963 | items: vec![ 1964 | ProductionItem::CharacterSet( 1965 | CharacterSet { 1966 | is_complement: false, 1967 | items: vec![ 1968 | CharacterSetItem::Character('"'), 1969 | CharacterSetItem::Backslash, 1970 | CharacterSetItem::Character('/'), 1971 | CharacterSetItem::Character('b'), 1972 | CharacterSetItem::Character('f'), 1973 | CharacterSetItem::Character('n'), 1974 | CharacterSetItem::Character('r'), 1975 | CharacterSetItem::Character('t'), 1976 | ], 1977 | }, 1978 | RepetitionType::One, 1979 | ) 1980 | ] 1981 | }, 1982 | Production { 1983 | items: vec![ 1984 | ProductionItem::Terminal( 1985 | TerminalSymbol { 1986 | value: "u".to_string(), 1987 | }, 1988 | RepetitionType::One, 1989 | ), 1990 | ] 1991 | }, 1992 | ] 1993 | ), 1994 | ProductionItem::CharacterSet( 1995 | CharacterSet { 1996 | is_complement: false, 1997 | items: vec![ 1998 | CharacterSetItem::CharacterRange('0', '9'), 1999 | CharacterSetItem::CharacterRange('a', 'f'), 2000 | CharacterSetItem::CharacterRange('A', 'F'), 2001 | ], 2002 | }, 2003 | RepetitionType::One, 2004 | ), 2005 | ProductionItem::CharacterSet( 2006 | CharacterSet { 2007 | is_complement: false, 2008 | items: vec![ 2009 | CharacterSetItem::CharacterRange('0', '9'), 2010 | CharacterSetItem::CharacterRange('a', 'f'), 2011 | CharacterSetItem::CharacterRange('A', 'F'), 2012 | ], 2013 | }, 2014 | RepetitionType::One, 2015 | ), 2016 | ProductionItem::CharacterSet( 2017 | CharacterSet { 2018 | is_complement: false, 2019 | items: vec![ 2020 | CharacterSetItem::CharacterRange('0', '9'), 2021 | CharacterSetItem::CharacterRange('a', 'f'), 2022 | CharacterSetItem::CharacterRange('A', 'F'), 2023 | ], 2024 | }, 2025 | RepetitionType::One, 2026 | ), 2027 | ProductionItem::CharacterSet( 2028 | CharacterSet { 2029 | is_complement: false, 2030 | items: vec![ 2031 | CharacterSetItem::CharacterRange('0', '9'), 2032 | CharacterSetItem::CharacterRange('a', 'f'), 2033 | CharacterSetItem::CharacterRange('A', 'F'), 2034 | ], 2035 | }, 2036 | RepetitionType::One, 2037 | ), 2038 | ] 2039 | }), RepetitionType::One), 2040 | ], 2041 | }), 2042 | RepetitionType::ZeroOrMore, 2043 | ), 2044 | ProductionItem::Terminal( 2045 | TerminalSymbol { 2046 | value: r#"\""#.to_string(), 2047 | }, 2048 | RepetitionType::One, 2049 | ), 2050 | ProductionItem::NonTerminal( 2051 | NonTerminalSymbol { 2052 | name: "ws".to_string(), 2053 | }, 2054 | RepetitionType::One, 2055 | ), 2056 | ], 2057 | }, 2058 | }), 2059 | GrammarItem::Rule(Rule { 2060 | lhs: NonTerminalSymbol { 2061 | name: "number".to_string(), 2062 | }, 2063 | rhs: Production { 2064 | items: vec![ 2065 | ProductionItem::Group( 2066 | Box::new(Production { 2067 | items: vec![ 2068 | ProductionItem::Terminal( 2069 | TerminalSymbol { 2070 | value: "-".to_string(), 2071 | }, 2072 | RepetitionType::ZeroOrOne, 2073 | ), 2074 | ProductionItem::Group( 2075 | Box::new(Production { 2076 | items: vec![ 2077 | ProductionItem::OneOf(vec![ 2078 | Production { 2079 | items: vec![ProductionItem::CharacterSet( 2080 | CharacterSet { 2081 | is_complement: false, 2082 | items: vec![ 2083 | CharacterSetItem::CharacterRange( 2084 | '0', '9', 2085 | ), 2086 | ], 2087 | }, 2088 | RepetitionType::One, 2089 | )], 2090 | }, 2091 | Production { 2092 | items: vec![ 2093 | ProductionItem::CharacterSet( 2094 | CharacterSet { 2095 | is_complement: false, 2096 | items: vec![ 2097 | CharacterSetItem::CharacterRange( 2098 | '1', '9', 2099 | ), 2100 | ], 2101 | }, 2102 | RepetitionType::One, 2103 | ), 2104 | ProductionItem::CharacterSet( 2105 | CharacterSet { 2106 | is_complement: false, 2107 | items: vec![ 2108 | CharacterSetItem::CharacterRange( 2109 | '0', '9', 2110 | ), 2111 | ], 2112 | }, 2113 | RepetitionType::ZeroOrMore, 2114 | ), 2115 | ], 2116 | }, 2117 | ]), 2118 | ], 2119 | }), 2120 | RepetitionType::One, 2121 | ), 2122 | ], 2123 | }), 2124 | RepetitionType::One, 2125 | ), 2126 | ProductionItem::Group( 2127 | Box::new(Production { 2128 | items: vec![ 2129 | ProductionItem::Terminal( 2130 | TerminalSymbol { 2131 | value: ".".to_string(), 2132 | }, 2133 | RepetitionType::One, 2134 | ), 2135 | ProductionItem::CharacterSet( 2136 | CharacterSet { 2137 | is_complement: false, 2138 | items: vec![CharacterSetItem::CharacterRange( 2139 | '0', '9', 2140 | )], 2141 | }, 2142 | RepetitionType::OneOrMore, 2143 | ), 2144 | ], 2145 | }), 2146 | RepetitionType::ZeroOrOne, 2147 | ), 2148 | ProductionItem::Group( 2149 | Box::new(Production { 2150 | items: vec![ 2151 | ProductionItem::CharacterSet( 2152 | CharacterSet { 2153 | is_complement: false, 2154 | items: vec![ 2155 | CharacterSetItem::Character('e'), 2156 | CharacterSetItem::Character('E'), 2157 | ], 2158 | }, 2159 | RepetitionType::One, 2160 | ), 2161 | ProductionItem::CharacterSet( 2162 | CharacterSet { 2163 | is_complement: false, 2164 | items: vec![ 2165 | CharacterSetItem::Character('-'), 2166 | CharacterSetItem::Character('+'), 2167 | ], 2168 | }, 2169 | RepetitionType::ZeroOrOne, 2170 | ), 2171 | ProductionItem::CharacterSet( 2172 | CharacterSet { 2173 | is_complement: false, 2174 | items: vec![ 2175 | CharacterSetItem::CharacterRange( 2176 | '0', '9', 2177 | ), 2178 | ], 2179 | }, 2180 | RepetitionType::OneOrMore, 2181 | ), 2182 | ], 2183 | }), 2184 | RepetitionType::ZeroOrOne, 2185 | ), 2186 | ProductionItem::NonTerminal( 2187 | NonTerminalSymbol { 2188 | name: "ws".to_string(), 2189 | }, 2190 | RepetitionType::One, 2191 | ), 2192 | ], 2193 | }, 2194 | }), 2195 | GrammarItem::Comment(" Optional space: by convention, applied in this grammar after literal chars when allowed".to_string()), 2196 | GrammarItem::Rule(Rule { 2197 | lhs: NonTerminalSymbol { 2198 | name: "ws".to_string(), 2199 | }, 2200 | rhs: Production { 2201 | items: vec![ProductionItem::Group( 2202 | Box::new(Production { 2203 | items: vec![ 2204 | ProductionItem::CharacterSet( 2205 | CharacterSet { 2206 | is_complement: false, 2207 | items: vec![ 2208 | CharacterSetItem::Character(' '), 2209 | CharacterSetItem::Tab, 2210 | CharacterSetItem::NewLine, 2211 | ], 2212 | }, 2213 | RepetitionType::One, 2214 | ), 2215 | ProductionItem::NonTerminal( 2216 | NonTerminalSymbol { 2217 | name: "ws".to_string(), 2218 | }, 2219 | RepetitionType::One, 2220 | ), 2221 | ], 2222 | }), 2223 | RepetitionType::ZeroOrOne, 2224 | )], 2225 | }, 2226 | }), 2227 | ], 2228 | ..Default::default() 2229 | }; 2230 | 2231 | let s = g.to_string(); 2232 | pretty_assertions::assert_eq!( 2233 | s, 2234 | "root ::= object\nvalue ::= object | array | string | number | (\"true\" | \"false\" | \"null\") ws\nobject ::= \"{\" ws (string \":\" ws value (\",\" ws string \":\" ws value)*)? \"}\" ws\narray ::= \"[\" ws (value (\",\" ws value)*)? \"]\" ws\nstring ::= \"\\\"\" ([^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* \"\\\"\" ws\nnumber ::= (\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? ws\n# Optional space: by convention, applied in this grammar after literal chars when allowed\nws ::= ([ \\t\\n] ws)?\n" 2235 | ); 2236 | } 2237 | 2238 | } 2239 | --------------------------------------------------------------------------------