├── .github ├── ci.rs └── workflows │ └── ci.yaml ├── .gitignore ├── Cargo.toml ├── README.md ├── bors.toml ├── rust.ungram ├── src ├── error.rs ├── lexer.rs ├── lib.rs └── parser.rs ├── ungrammar.ungram └── ungrammar2json ├── Cargo.toml └── src └── main.rs /.github/ci.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | env, fs, 3 | process::{self, Command, ExitStatus, Stdio}, 4 | time::Instant, 5 | }; 6 | 7 | type Error = Box; 8 | type Result = std::result::Result; 9 | 10 | fn main() { 11 | if let Err(err) = try_main() { 12 | eprintln!("{}", err); 13 | process::exit(1); 14 | } 15 | } 16 | 17 | fn try_main() -> Result<()> { 18 | let cwd = env::current_dir()?; 19 | let cargo_toml = cwd.join("Cargo.toml"); 20 | assert!( 21 | cargo_toml.exists(), 22 | "Cargo.toml not found, cwd: {}", 23 | cwd.display() 24 | ); 25 | 26 | { 27 | let _s = Section::new("BUILD"); 28 | shell("cargo test --workspace --no-run")?; 29 | } 30 | 31 | { 32 | let _s = Section::new("TEST"); 33 | shell("cargo test --workspace")?; 34 | } 35 | 36 | let current_branch = shell_output("git branch --show-current")?; 37 | if ¤t_branch == "master" { 38 | let _s = Section::new("PUBLISH"); 39 | let manifest = fs::read_to_string(&cargo_toml)?; 40 | let version = get_field(&manifest, "version")?; 41 | let tag = format!("v{}", version); 42 | let tags = shell_output("git tag --list")?; 43 | 44 | if !tags.contains(&tag) { 45 | let token = env::var("CRATES_IO_TOKEN").unwrap(); 46 | shell(&format!("git tag v{}", version))?; 47 | shell(&format!("cargo publish --token {}", token))?; 48 | shell("git push --tags")?; 49 | } 50 | } 51 | Ok(()) 52 | } 53 | 54 | fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> { 55 | for line in text.lines() { 56 | let words = line.split_ascii_whitespace().collect::>(); 57 | match words.as_slice() { 58 | [n, "=", v, ..] if n.trim() == name => { 59 | assert!(v.starts_with('"') && v.ends_with('"')); 60 | return Ok(&v[1..v.len() - 1]); 61 | } 62 | _ => (), 63 | } 64 | } 65 | Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))? 66 | } 67 | 68 | fn shell(cmd: &str) -> Result<()> { 69 | let status = command(cmd).status()?; 70 | check_status(status) 71 | } 72 | 73 | fn shell_output(cmd: &str) -> Result { 74 | let output = command(cmd).stderr(Stdio::inherit()).output()?; 75 | check_status(output.status)?; 76 | let res = String::from_utf8(output.stdout)?; 77 | Ok(res.trim().to_string()) 78 | } 79 | 80 | fn command(cmd: &str) -> Command { 81 | eprintln!("> {}", cmd); 82 | let words = cmd.split_ascii_whitespace().collect::>(); 83 | let (cmd, args) = words.split_first().unwrap(); 84 | let mut res = Command::new(cmd); 85 | res.args(args); 86 | res 87 | } 88 | 89 | fn check_status(status: ExitStatus) -> Result<()> { 90 | if !status.success() { 91 | Err(format!("$status: {}", status))?; 92 | } 93 | Ok(()) 94 | } 95 | 96 | struct Section { 97 | name: &'static str, 98 | start: Instant, 99 | } 100 | 101 | impl Section { 102 | fn new(name: &'static str) -> Section { 103 | println!("::group::{}", name); 104 | let start = Instant::now(); 105 | Section { name, start } 106 | } 107 | } 108 | 109 | impl Drop for Section { 110 | fn drop(&mut self) { 111 | eprintln!("{}: {:.2?}", self.name, self.start.elapsed()); 112 | println!("::endgroup::"); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - master 7 | - staging 8 | - trying 9 | 10 | env: 11 | CARGO_INCREMENTAL: 0 12 | CARGO_NET_RETRY: 10 13 | CI: 1 14 | RUST_BACKTRACE: short 15 | RUSTFLAGS: -D warnings 16 | RUSTUP_MAX_RETRIES: 10 17 | 18 | jobs: 19 | rust: 20 | name: Rust 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Checkout repository 25 | uses: actions/checkout@v2 26 | 27 | - name: Install Rust toolchain 28 | uses: actions-rs/toolchain@v1 29 | with: 30 | toolchain: stable 31 | profile: minimal 32 | override: true 33 | 34 | - run: rustc ./.github/ci.rs && ./ci 35 | env: 36 | CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /ci 2 | /Cargo.lock 3 | /target 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ungrammar" 3 | description = "A DSL for describing concrete syntax trees" 4 | version = "1.16.1" 5 | license = "MIT OR Apache-2.0" 6 | repository = "https://github.com/rust-analyzer/ungrammar" 7 | edition = "2018" 8 | 9 | exclude = ["/bors.toml", "/.github"] 10 | 11 | [workspace] 12 | members = ["ungrammar2json"] 13 | 14 | [dependencies] 15 | # nope 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ungrammar 2 | 3 | A DSL for specifying concrete syntax trees. 4 | 5 | See the [blog post][post] for an introduction. 6 | 7 | See [./rust.ungram](./rust.ungram) for an example. 8 | 9 | ## Editor support 10 | 11 | - Vim 12 | - [vim-ungrammar][] 13 | - [ungrammar.vim][] 14 | - VSCode 15 | - [ungrammar-tools][] 16 | 17 | [post]: 18 | https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html 19 | [vim-ungrammar]: https://github.com/Iron-E/vim-ungrammar 20 | [ungrammar.vim]: https://github.com/drtychai/ungrammar.vim 21 | [ungrammar-tools]: https://github.com/azdavis/ungrammar-tools 22 | -------------------------------------------------------------------------------- /bors.toml: -------------------------------------------------------------------------------- 1 | status = [ "Rust" ] 2 | delete_merged_branches = true 3 | -------------------------------------------------------------------------------- /rust.ungram: -------------------------------------------------------------------------------- 1 | // Note this grammar file does not reflect the current language as this file is no longer maintained. 2 | 3 | // Rust Un-Grammar. 4 | // 5 | // This grammar specifies the structure of Rust's concrete syntax tree. 6 | // It does not specify parsing rules (ambiguities, precedence, etc are out of scope). 7 | // Tokens are processed -- contextual keywords are recognised, compound operators glued. 8 | // 9 | // Legend: 10 | // 11 | // // -- comment 12 | // Name = -- non-terminal definition 13 | // 'ident' -- token (terminal) 14 | // A B -- sequence 15 | // A | B -- alternation 16 | // A* -- zero or more repetition 17 | // A? -- zero or one repetition 18 | // (A) -- same as A 19 | // label:A -- suggested name for field of AST node 20 | 21 | //*************************// 22 | // Names, Paths and Macros // 23 | //*************************// 24 | 25 | Name = 26 | 'ident' | 'self' 27 | 28 | NameRef = 29 | 'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self' 30 | 31 | Lifetime = 32 | 'lifetime_ident' 33 | 34 | Path = 35 | (qualifier:Path '::')? segment:PathSegment 36 | 37 | PathSegment = 38 | '::'? NameRef 39 | | NameRef GenericArgList? 40 | | NameRef ParamList RetType? 41 | | '<' PathType ('as' PathType)? '>' 42 | 43 | GenericArgList = 44 | '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>' 45 | 46 | GenericArg = 47 | TypeArg 48 | | AssocTypeArg 49 | | LifetimeArg 50 | | ConstArg 51 | 52 | TypeArg = 53 | Type 54 | 55 | AssocTypeArg = 56 | NameRef GenericParamList? (':' TypeBoundList | '=' Type) 57 | 58 | LifetimeArg = 59 | Lifetime 60 | 61 | ConstArg = 62 | Expr 63 | 64 | MacroCall = 65 | Attr* Path '!' TokenTree ';'? 66 | 67 | TokenTree = 68 | '(' ')' 69 | | '{' '}' 70 | | '[' ']' 71 | 72 | MacroItems = 73 | Item* 74 | 75 | MacroStmts = 76 | statements:Stmt* 77 | Expr? 78 | 79 | //*************************// 80 | // Items // 81 | //*************************// 82 | 83 | SourceFile = 84 | 'shebang'? 85 | Attr* 86 | Item* 87 | 88 | Item = 89 | Const 90 | | Enum 91 | | ExternBlock 92 | | ExternCrate 93 | | Fn 94 | | Impl 95 | | MacroCall 96 | | MacroRules 97 | | MacroDef 98 | | Module 99 | | Static 100 | | Struct 101 | | Trait 102 | | TypeAlias 103 | | Union 104 | | Use 105 | 106 | MacroRules = 107 | Attr* Visibility? 108 | 'macro_rules' '!' Name 109 | TokenTree 110 | 111 | MacroDef = 112 | Attr* Visibility? 113 | 'macro' Name args:TokenTree? 114 | body:TokenTree 115 | 116 | Module = 117 | Attr* Visibility? 118 | 'mod' Name 119 | (ItemList | ';') 120 | 121 | ItemList = 122 | '{' Attr* Item* '}' 123 | 124 | ExternCrate = 125 | Attr* Visibility? 126 | 'extern' 'crate' NameRef Rename? ';' 127 | 128 | Rename = 129 | 'as' (Name | '_') 130 | 131 | Use = 132 | Attr* Visibility? 133 | 'use' UseTree ';' 134 | 135 | UseTree = 136 | (Path? '::')? ('*' | UseTreeList) 137 | | Path Rename? 138 | 139 | UseTreeList = 140 | '{' (UseTree (',' UseTree)* ','?)? '}' 141 | 142 | Fn = 143 | Attr* Visibility? 144 | 'default'? 'const'? 'async'? 'unsafe'? Abi? 145 | 'fn' Name GenericParamList? ParamList RetType? WhereClause? 146 | (body:BlockExpr | ';') 147 | 148 | Abi = 149 | 'extern' 'string'? 150 | 151 | ParamList = 152 | '('( 153 | SelfParam 154 | | (SelfParam ',')? (Param (',' Param)* ','?)? 155 | )')' 156 | | '|' (Param (',' Param)* ','?)? '|' 157 | 158 | SelfParam = 159 | Attr* ( 160 | ('&' Lifetime?)? 'mut'? Name 161 | | 'mut'? Name ':' Type 162 | ) 163 | 164 | Param = 165 | Attr* ( 166 | Pat (':' Type)? 167 | | Type 168 | | '...' 169 | ) 170 | 171 | RetType = 172 | '->' Type 173 | 174 | TypeAlias = 175 | Attr* Visibility? 176 | 'default'? 177 | 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause? 178 | ('=' Type)? ';' 179 | 180 | Struct = 181 | Attr* Visibility? 182 | 'struct' Name GenericParamList? ( 183 | WhereClause? (RecordFieldList | ';') 184 | | TupleFieldList WhereClause? ';' 185 | ) 186 | 187 | RecordFieldList = 188 | '{' fields:(RecordField (',' RecordField)* ','?)? '}' 189 | 190 | RecordField = 191 | Attr* Visibility? 192 | Name ':' Type 193 | 194 | TupleFieldList = 195 | '(' fields:(TupleField (',' TupleField)* ','?)? ')' 196 | 197 | TupleField = 198 | Attr* Visibility? 199 | Type 200 | 201 | FieldList = 202 | RecordFieldList 203 | | TupleFieldList 204 | 205 | Enum = 206 | Attr* Visibility? 207 | 'enum' Name GenericParamList? WhereClause? 208 | VariantList 209 | 210 | VariantList = 211 | '{' (Variant (',' Variant)* ','?)? '}' 212 | 213 | Variant = 214 | Attr* Visibility? 215 | Name FieldList? ('=' Expr)? 216 | 217 | Union = 218 | Attr* Visibility? 219 | 'union' Name GenericParamList? WhereClause? 220 | RecordFieldList 221 | 222 | // A Data Type. 223 | // 224 | // Not used directly in the grammar, but handy to have anyway. 225 | Adt = 226 | Enum 227 | | Struct 228 | | Union 229 | 230 | Const = 231 | Attr* Visibility? 232 | 'default'? 233 | 'const' (Name | '_') ':' Type 234 | ('=' body:Expr)? ';' 235 | 236 | Static = 237 | Attr* Visibility? 238 | 'static' 'mut'? Name ':' Type 239 | ('=' body:Expr)? ';' 240 | 241 | Trait = 242 | Attr* Visibility? 243 | 'unsafe'? 'auto'? 244 | 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause? 245 | AssocItemList 246 | 247 | AssocItemList = 248 | '{' Attr* AssocItem* '}' 249 | 250 | AssocItem = 251 | Const 252 | | Fn 253 | | MacroCall 254 | | TypeAlias 255 | 256 | Impl = 257 | Attr* Visibility? 258 | 'default'? 'unsafe'? 259 | 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause? 260 | AssocItemList 261 | 262 | ExternBlock = 263 | Attr* 'unsafe'? Abi ExternItemList 264 | 265 | ExternItemList = 266 | '{' Attr* ExternItem* '}' 267 | 268 | ExternItem = 269 | Fn 270 | | MacroCall 271 | | Static 272 | | TypeAlias 273 | 274 | GenericParamList = 275 | '<' (GenericParam (',' GenericParam)* ','?)? '>' 276 | 277 | GenericParam = 278 | ConstParam 279 | | LifetimeParam 280 | | TypeParam 281 | 282 | TypeParam = 283 | Attr* Name (':' TypeBoundList?)? 284 | ('=' default_type:Type)? 285 | 286 | ConstParam = 287 | Attr* 'const' Name ':' Type 288 | ('=' default_val:Expr)? 289 | 290 | LifetimeParam = 291 | Attr* Lifetime (':' TypeBoundList?)? 292 | 293 | WhereClause = 294 | 'where' predicates:(WherePred (',' WherePred)* ','?) 295 | 296 | WherePred = 297 | ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList? 298 | 299 | Visibility = 300 | 'pub' ('(' 'in'? Path ')')? 301 | 302 | Attr = 303 | '#' '!'? '[' Meta ']' 304 | 305 | Meta = 306 | Path ('=' Expr | TokenTree)? 307 | 308 | //****************************// 309 | // Statements and Expressions // 310 | //****************************// 311 | 312 | Stmt = 313 | ';' 314 | | ExprStmt 315 | | Item 316 | | LetStmt 317 | 318 | LetStmt = 319 | Attr* 'let' Pat (':' Type)? 320 | '=' initializer:Expr 321 | LetElse? 322 | ';' 323 | 324 | LetElse = 325 | 'else' BlockExpr 326 | 327 | ExprStmt = 328 | Expr ';'? 329 | 330 | Expr = 331 | ArrayExpr 332 | | AwaitExpr 333 | | BinExpr 334 | | BlockExpr 335 | | BoxExpr 336 | | BreakExpr 337 | | CallExpr 338 | | CastExpr 339 | | ClosureExpr 340 | | ContinueExpr 341 | | FieldExpr 342 | | ForExpr 343 | | IfExpr 344 | | IndexExpr 345 | | Literal 346 | | LoopExpr 347 | | MacroCall 348 | | MacroStmts 349 | | MatchExpr 350 | | MethodCallExpr 351 | | ParenExpr 352 | | PathExpr 353 | | PrefixExpr 354 | | RangeExpr 355 | | RecordExpr 356 | | RefExpr 357 | | ReturnExpr 358 | | TryExpr 359 | | TupleExpr 360 | | WhileExpr 361 | | YieldExpr 362 | | LetExpr 363 | | UnderscoreExpr 364 | 365 | Literal = 366 | Attr* value:( 367 | 'int_number' | 'float_number' 368 | | 'string' | 'raw_string' 369 | | 'byte_string' | 'raw_byte_string' 370 | | 'true' | 'false' 371 | | 'char' | 'byte' 372 | ) 373 | 374 | PathExpr = 375 | Attr* Path 376 | 377 | StmtList = 378 | '{' 379 | Attr* 380 | statements:Stmt* 381 | tail_expr:Expr? 382 | '}' 383 | 384 | RefExpr = 385 | Attr* '&' ('raw' | 'mut' | 'const') Expr 386 | 387 | TryExpr = 388 | Attr* Expr '?' 389 | 390 | BlockExpr = 391 | Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList 392 | 393 | PrefixExpr = 394 | Attr* op:('-' | '!' | '*') Expr 395 | 396 | BinExpr = 397 | Attr* 398 | lhs:Expr 399 | op:( 400 | '||' | '&&' 401 | | '==' | '!=' | '<=' | '>=' | '<' | '>' 402 | | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&' 403 | | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^=' 404 | ) 405 | rhs:Expr 406 | 407 | CastExpr = 408 | Attr* Expr 'as' Type 409 | 410 | ParenExpr = 411 | Attr* '(' Attr* Expr ')' 412 | 413 | ArrayExpr = 414 | Attr* '[' Attr* ( 415 | (Expr (',' Expr)* ','?)? 416 | | Expr ';' Expr 417 | ) ']' 418 | 419 | IndexExpr = 420 | Attr* base:Expr '[' index:Expr ']' 421 | 422 | TupleExpr = 423 | Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')' 424 | 425 | RecordExpr = 426 | Path RecordExprFieldList 427 | 428 | RecordExprFieldList = 429 | '{' 430 | Attr* 431 | fields:(RecordExprField (',' RecordExprField)* ','?)? 432 | ('..' spread:Expr?)? 433 | '}' 434 | 435 | RecordExprField = 436 | Attr* (NameRef ':')? Expr 437 | 438 | CallExpr = 439 | Attr* Expr ArgList 440 | 441 | ArgList = 442 | '(' args:(Expr (',' Expr)* ','?)? ')' 443 | 444 | MethodCallExpr = 445 | Attr* receiver:Expr '.' NameRef GenericArgList? ArgList 446 | 447 | FieldExpr = 448 | Attr* Expr '.' NameRef 449 | 450 | ClosureExpr = 451 | Attr* 'static'? 'async'? 'move'? ParamList RetType? 452 | body:Expr 453 | 454 | IfExpr = 455 | Attr* 'if' condition:Expr then_branch:BlockExpr 456 | ('else' else_branch:(IfExpr | BlockExpr))? 457 | 458 | LoopExpr = 459 | Attr* Label? 'loop' 460 | loop_body:BlockExpr 461 | 462 | ForExpr = 463 | Attr* Label? 'for' Pat 'in' iterable:Expr 464 | loop_body:BlockExpr 465 | 466 | WhileExpr = 467 | Attr* Label? 'while' condition:Expr 468 | loop_body:BlockExpr 469 | 470 | Label = 471 | Lifetime ':' 472 | 473 | BreakExpr = 474 | Attr* 'break' Lifetime? Expr? 475 | 476 | ContinueExpr = 477 | Attr* 'continue' Lifetime? 478 | 479 | RangeExpr = 480 | Attr* start:Expr? op:('..' | '..=') end:Expr? 481 | 482 | MatchExpr = 483 | Attr* 'match' Expr MatchArmList 484 | 485 | MatchArmList = 486 | '{' 487 | Attr* 488 | arms:MatchArm* 489 | '}' 490 | 491 | MatchArm = 492 | Attr* Pat guard:MatchGuard? '=>' Expr ','? 493 | 494 | MatchGuard = 495 | 'if' condition:Expr 496 | 497 | ReturnExpr = 498 | Attr* 'return' Expr? 499 | 500 | YieldExpr = 501 | Attr* 'yield' Expr? 502 | 503 | LetExpr = 504 | Attr* 'let' Pat '=' Expr 505 | 506 | UnderscoreExpr = 507 | Attr* '_' 508 | 509 | AwaitExpr = 510 | Attr* Expr '.' 'await' 511 | 512 | BoxExpr = 513 | Attr* 'box' Expr 514 | 515 | //*************************// 516 | // Types // 517 | //*************************// 518 | 519 | Type = 520 | ArrayType 521 | | DynTraitType 522 | | FnPtrType 523 | | ForType 524 | | ImplTraitType 525 | | InferType 526 | | MacroType 527 | | NeverType 528 | | ParenType 529 | | PathType 530 | | PtrType 531 | | RefType 532 | | SliceType 533 | | TupleType 534 | 535 | ParenType = 536 | '(' Type ')' 537 | 538 | NeverType = 539 | '!' 540 | 541 | MacroType = 542 | MacroCall 543 | 544 | PathType = 545 | Path 546 | 547 | TupleType = 548 | '(' fields:(Type (',' Type)* ','?)? ')' 549 | 550 | PtrType = 551 | '*' ('const' | 'mut') Type 552 | 553 | RefType = 554 | '&' Lifetime? 'mut'? Type 555 | 556 | ArrayType = 557 | '[' Type ';' Expr ']' 558 | 559 | SliceType = 560 | '[' Type ']' 561 | 562 | InferType = 563 | '_' 564 | 565 | FnPtrType = 566 | 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType? 567 | 568 | ForType = 569 | 'for' GenericParamList Type 570 | 571 | ImplTraitType = 572 | 'impl' TypeBoundList 573 | 574 | DynTraitType = 575 | 'dyn' TypeBoundList 576 | 577 | TypeBoundList = 578 | bounds:(TypeBound ('+' TypeBound)* '+'?) 579 | 580 | TypeBound = 581 | Lifetime 582 | | ('?' | '~' 'const')? Type 583 | 584 | //************************// 585 | // Patterns // 586 | //************************// 587 | 588 | Pat = 589 | IdentPat 590 | | BoxPat 591 | | RestPat 592 | | LiteralPat 593 | | MacroPat 594 | | OrPat 595 | | ParenPat 596 | | PathPat 597 | | WildcardPat 598 | | RangePat 599 | | RecordPat 600 | | RefPat 601 | | SlicePat 602 | | TuplePat 603 | | TupleStructPat 604 | | ConstBlockPat 605 | 606 | LiteralPat = 607 | Literal 608 | 609 | IdentPat = 610 | Attr* 'ref'? 'mut'? Name ('@' Pat)? 611 | 612 | WildcardPat = 613 | '_' 614 | 615 | RangePat = 616 | // 1.. 617 | start:Pat op:('..' | '..=') 618 | // 1..2 619 | | start:Pat op:('..' | '..=') end:Pat 620 | // ..2 621 | | op:('..' | '..=') end:Pat 622 | 623 | RefPat = 624 | '&' 'mut'? Pat 625 | 626 | RecordPat = 627 | Path RecordPatFieldList 628 | 629 | RecordPatFieldList = 630 | '{' 631 | fields:(RecordPatField (',' RecordPatField)* ','?)? 632 | RestPat? 633 | '}' 634 | 635 | RecordPatField = 636 | Attr* (NameRef ':')? Pat 637 | 638 | TupleStructPat = 639 | Path '(' fields:(Pat (',' Pat)* ','?)? ')' 640 | 641 | TuplePat = 642 | '(' fields:(Pat (',' Pat)* ','?)? ')' 643 | 644 | ParenPat = 645 | '(' Pat ')' 646 | 647 | SlicePat = 648 | '[' (Pat (',' Pat)* ','?)? ']' 649 | 650 | PathPat = 651 | Path 652 | 653 | OrPat = 654 | (Pat ('|' Pat)* '|'?) 655 | 656 | BoxPat = 657 | 'box' Pat 658 | 659 | RestPat = 660 | Attr* '..' 661 | 662 | MacroPat = 663 | MacroCall 664 | 665 | ConstBlockPat = 666 | 'const' BlockExpr 667 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! Boilerplate error definitions. 2 | use std::fmt; 3 | 4 | use crate::lexer::Location; 5 | 6 | /// A type alias for std's Result with the Error as our error type. 7 | pub type Result = std::result::Result; 8 | 9 | /// An error encountered when parsing a Grammar. 10 | #[derive(Debug)] 11 | pub struct Error { 12 | pub(crate) message: String, 13 | pub(crate) location: Option, 14 | } 15 | 16 | impl fmt::Display for Error { 17 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 18 | if let Some(loc) = self.location { 19 | // Report 1-based indices, to match text editors 20 | write!(f, "{}:{}: ", loc.line + 1, loc.column + 1)? 21 | } 22 | write!(f, "{}", self.message) 23 | } 24 | } 25 | 26 | impl std::error::Error for Error {} 27 | 28 | impl Error { 29 | pub(crate) fn with_location(self, location: Location) -> Error { 30 | Error { 31 | location: Some(location), 32 | ..self 33 | } 34 | } 35 | } 36 | 37 | macro_rules! _format_err { 38 | ($($tt:tt)*) => { 39 | $crate::error::Error { 40 | message: format!($($tt)*), 41 | location: None, 42 | } 43 | }; 44 | } 45 | pub(crate) use _format_err as format_err; 46 | 47 | macro_rules! _bail { 48 | ($($tt:tt)*) => { return Err($crate::error::format_err!($($tt)*)) }; 49 | } 50 | pub(crate) use _bail as bail; 51 | -------------------------------------------------------------------------------- /src/lexer.rs: -------------------------------------------------------------------------------- 1 | //! Simple hand-written ungrammar lexer 2 | use crate::error::{bail, Result}; 3 | 4 | #[derive(Debug, Eq, PartialEq)] 5 | pub(crate) enum TokenKind { 6 | Node(String), 7 | Token(String), 8 | Eq, 9 | Star, 10 | Pipe, 11 | QMark, 12 | Colon, 13 | LParen, 14 | RParen, 15 | } 16 | 17 | #[derive(Debug)] 18 | pub(crate) struct Token { 19 | pub(crate) kind: TokenKind, 20 | pub(crate) loc: Location, 21 | } 22 | 23 | #[derive(Copy, Clone, Default, Debug)] 24 | pub(crate) struct Location { 25 | pub(crate) line: usize, 26 | pub(crate) column: usize, 27 | } 28 | 29 | impl Location { 30 | fn advance(&mut self, text: &str) { 31 | match text.rfind('\n') { 32 | Some(idx) => { 33 | self.line += text.chars().filter(|&it| it == '\n').count(); 34 | self.column = text[idx + 1..].chars().count(); 35 | } 36 | None => self.column += text.chars().count(), 37 | } 38 | } 39 | } 40 | 41 | pub(crate) fn tokenize(mut input: &str) -> Result> { 42 | let mut res = Vec::new(); 43 | let mut loc = Location::default(); 44 | while !input.is_empty() { 45 | let old_input = input; 46 | skip_ws(&mut input); 47 | skip_comment(&mut input); 48 | if old_input.len() == input.len() { 49 | match advance(&mut input) { 50 | Ok(kind) => { 51 | res.push(Token { kind, loc }); 52 | } 53 | Err(err) => return Err(err.with_location(loc)), 54 | } 55 | } 56 | let consumed = old_input.len() - input.len(); 57 | loc.advance(&old_input[..consumed]); 58 | } 59 | 60 | Ok(res) 61 | } 62 | 63 | fn skip_ws(input: &mut &str) { 64 | *input = input.trim_start_matches(is_whitespace) 65 | } 66 | fn skip_comment(input: &mut &str) { 67 | if input.starts_with("//") { 68 | let idx = input.find('\n').map_or(input.len(), |it| it + 1); 69 | *input = &input[idx..] 70 | } 71 | } 72 | 73 | fn advance(input: &mut &str) -> Result { 74 | let mut chars = input.chars(); 75 | let c = chars.next().unwrap(); 76 | let res = match c { 77 | '=' => TokenKind::Eq, 78 | '*' => TokenKind::Star, 79 | '?' => TokenKind::QMark, 80 | '(' => TokenKind::LParen, 81 | ')' => TokenKind::RParen, 82 | '|' => TokenKind::Pipe, 83 | ':' => TokenKind::Colon, 84 | '\'' => { 85 | let mut buf = String::new(); 86 | loop { 87 | match chars.next() { 88 | None => bail!("unclosed token literal"), 89 | Some('\\') => match chars.next() { 90 | Some(c) if is_escapable(c) => buf.push(c), 91 | _ => bail!("invalid escape in token literal"), 92 | }, 93 | Some('\'') => break, 94 | Some(c) => buf.push(c), 95 | } 96 | } 97 | TokenKind::Token(buf) 98 | } 99 | c if is_ident_char(c) => { 100 | let mut buf = String::new(); 101 | buf.push(c); 102 | loop { 103 | match chars.clone().next() { 104 | Some(c) if is_ident_char(c) => { 105 | chars.next(); 106 | buf.push(c); 107 | } 108 | _ => break, 109 | } 110 | } 111 | TokenKind::Node(buf) 112 | } 113 | '\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"), 114 | c => bail!("unexpected character: `{}`", c), 115 | }; 116 | 117 | *input = chars.as_str(); 118 | Ok(res) 119 | } 120 | 121 | fn is_escapable(c: char) -> bool { 122 | matches!(c, '\\' | '\'') 123 | } 124 | fn is_whitespace(c: char) -> bool { 125 | matches!(c, ' ' | '\t' | '\n') 126 | } 127 | fn is_ident_char(c: char) -> bool { 128 | matches!(c, 'a'..='z' | 'A'..='Z' | '_') 129 | } 130 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Ungrammar -- a DSL for specifying concrete syntax tree grammar. 2 | //! 3 | //! Producing a parser is an explicit non-goal -- it's ok for this grammar to be 4 | //! ambiguous, non LL, non LR, etc. 5 | //! 6 | //! See this 7 | //! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html) 8 | //! for details. 9 | 10 | #![deny(missing_debug_implementations)] 11 | #![deny(missing_docs)] 12 | #![deny(rust_2018_idioms)] 13 | 14 | mod error; 15 | mod lexer; 16 | mod parser; 17 | 18 | use std::{ops, str::FromStr}; 19 | 20 | pub use error::{Error, Result}; 21 | 22 | /// Returns a Rust grammar. 23 | pub fn rust_grammar() -> Grammar { 24 | let src = include_str!("../rust.ungram"); 25 | src.parse().unwrap() 26 | } 27 | 28 | /// A node, like `A = 'b' | 'c'`. 29 | /// 30 | /// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a 31 | /// [`NodeData`]. 32 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 33 | pub struct Node(usize); 34 | 35 | /// A token, denoted with single quotes, like `'+'` or `'struct'`. 36 | /// 37 | /// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a 38 | /// [`TokenData`]. 39 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 40 | pub struct Token(usize); 41 | 42 | /// An Ungrammar grammar. 43 | #[derive(Default, Debug)] 44 | pub struct Grammar { 45 | nodes: Vec, 46 | tokens: Vec, 47 | } 48 | 49 | impl FromStr for Grammar { 50 | type Err = Error; 51 | fn from_str(s: &str) -> Result { 52 | let tokens = lexer::tokenize(s)?; 53 | parser::parse(tokens) 54 | } 55 | } 56 | 57 | impl Grammar { 58 | /// Returns an iterator over all nodes in the grammar. 59 | pub fn iter(&self) -> impl Iterator + '_ { 60 | (0..self.nodes.len()).map(Node) 61 | } 62 | 63 | /// Returns an iterator over all tokens in the grammar. 64 | pub fn tokens(&self) -> impl Iterator + '_ { 65 | (0..self.tokens.len()).map(Token) 66 | } 67 | } 68 | 69 | impl ops::Index for Grammar { 70 | type Output = NodeData; 71 | fn index(&self, Node(index): Node) -> &NodeData { 72 | &self.nodes[index] 73 | } 74 | } 75 | 76 | impl ops::Index for Grammar { 77 | type Output = TokenData; 78 | fn index(&self, Token(index): Token) -> &TokenData { 79 | &self.tokens[index] 80 | } 81 | } 82 | 83 | /// Data about a node. 84 | #[derive(Debug)] 85 | pub struct NodeData { 86 | /// The name of the node. 87 | /// 88 | /// In the rule `A = 'b' | 'c'`, this is `"A"`. 89 | pub name: String, 90 | /// The rule for this node. 91 | /// 92 | /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`. 93 | pub rule: Rule, 94 | } 95 | 96 | /// Data about a token. 97 | #[derive(Debug)] 98 | pub struct TokenData { 99 | /// The name of the token. 100 | pub name: String, 101 | } 102 | 103 | /// A production rule. 104 | #[derive(Debug, Clone, Eq, PartialEq)] 105 | pub enum Rule { 106 | /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule). 107 | Labeled { 108 | /// The label. 109 | label: String, 110 | /// The rule. 111 | rule: Box, 112 | }, 113 | /// A node, like `A`. 114 | Node(Node), 115 | /// A token, like `'struct'`. 116 | Token(Token), 117 | /// A sequence of rules, like `'while' '(' Expr ')' Stmt`. 118 | Seq(Vec), 119 | /// An alternative between many rules, like `'+' | '-' | '*' | '/'`. 120 | Alt(Vec), 121 | /// An optional rule, like `A?`. 122 | Opt(Box), 123 | /// A repeated rule, like `A*`. 124 | Rep(Box), 125 | } 126 | 127 | #[test] 128 | fn smoke() { 129 | let grammar = include_str!("../ungrammar.ungram"); 130 | let grammar = grammar.parse::().unwrap(); 131 | drop(grammar) 132 | } 133 | 134 | #[test] 135 | fn test_rust_grammar() { 136 | let _ = rust_grammar(); 137 | } 138 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | //! Simple hand-written ungrammar parser. 2 | use std::collections::HashMap; 3 | 4 | use crate::{ 5 | error::{bail, format_err, Result}, 6 | lexer::{self, TokenKind}, 7 | Grammar, Node, NodeData, Rule, Token, TokenData, 8 | }; 9 | 10 | macro_rules! bail { 11 | ($loc:expr, $($tt:tt)*) => {{ 12 | let err = $crate::error::format_err!($($tt)*) 13 | .with_location($loc); 14 | return Err(err); 15 | }}; 16 | } 17 | 18 | pub(crate) fn parse(tokens: Vec) -> Result { 19 | let mut p = Parser::new(tokens); 20 | while !p.is_eof() { 21 | node(&mut p)?; 22 | } 23 | p.finish() 24 | } 25 | 26 | #[derive(Default)] 27 | struct Parser { 28 | grammar: Grammar, 29 | tokens: Vec, 30 | node_table: HashMap, 31 | token_table: HashMap, 32 | } 33 | 34 | const DUMMY_RULE: Rule = Rule::Node(Node(!0)); 35 | 36 | impl Parser { 37 | fn new(mut tokens: Vec) -> Parser { 38 | tokens.reverse(); 39 | Parser { 40 | tokens, 41 | ..Parser::default() 42 | } 43 | } 44 | 45 | fn peek(&self) -> Option<&lexer::Token> { 46 | self.peek_n(0) 47 | } 48 | fn peek_n(&self, n: usize) -> Option<&lexer::Token> { 49 | self.tokens.iter().nth_back(n) 50 | } 51 | fn bump(&mut self) -> Result { 52 | self.tokens 53 | .pop() 54 | .ok_or_else(|| format_err!("unexpected EOF")) 55 | } 56 | fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> { 57 | let token = self.bump()?; 58 | if token.kind != kind { 59 | bail!(token.loc, "unexpected token, expected `{}`", what); 60 | } 61 | Ok(()) 62 | } 63 | fn is_eof(&self) -> bool { 64 | self.tokens.is_empty() 65 | } 66 | fn finish(self) -> Result { 67 | for node_data in &self.grammar.nodes { 68 | if matches!(node_data.rule, DUMMY_RULE) { 69 | crate::error::bail!("Undefined node: {}", node_data.name) 70 | } 71 | } 72 | Ok(self.grammar) 73 | } 74 | fn intern_node(&mut self, name: String) -> Node { 75 | let len = self.node_table.len(); 76 | let grammar = &mut self.grammar; 77 | *self.node_table.entry(name.clone()).or_insert_with(|| { 78 | grammar.nodes.push(NodeData { 79 | name, 80 | rule: DUMMY_RULE, 81 | }); 82 | Node(len) 83 | }) 84 | } 85 | fn intern_token(&mut self, name: String) -> Token { 86 | let len = self.token_table.len(); 87 | let grammar = &mut self.grammar; 88 | *self.token_table.entry(name.clone()).or_insert_with(|| { 89 | grammar.tokens.push(TokenData { name }); 90 | Token(len) 91 | }) 92 | } 93 | } 94 | 95 | fn node(p: &mut Parser) -> Result<()> { 96 | let token = p.bump()?; 97 | let node = match token.kind { 98 | TokenKind::Node(it) => p.intern_node(it), 99 | _ => bail!(token.loc, "expected ident"), 100 | }; 101 | p.expect(TokenKind::Eq, "=")?; 102 | if !matches!(p.grammar[node].rule, DUMMY_RULE) { 103 | bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name) 104 | } 105 | 106 | let rule = rule(p)?; 107 | p.grammar.nodes[node.0].rule = rule; 108 | Ok(()) 109 | } 110 | 111 | fn rule(p: &mut Parser) -> Result { 112 | if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() { 113 | bail!( 114 | *loc, 115 | "The first element in a sequence of productions or alternatives \ 116 | must not have a leading pipe (`|`)" 117 | ); 118 | } 119 | 120 | let lhs = seq_rule(p)?; 121 | let mut alt = vec![lhs]; 122 | while let Some(token) = p.peek() { 123 | if token.kind != TokenKind::Pipe { 124 | break; 125 | } 126 | p.bump()?; 127 | let rule = seq_rule(p)?; 128 | alt.push(rule) 129 | } 130 | let res = if alt.len() == 1 { 131 | alt.pop().unwrap() 132 | } else { 133 | Rule::Alt(alt) 134 | }; 135 | Ok(res) 136 | } 137 | 138 | fn seq_rule(p: &mut Parser) -> Result { 139 | let lhs = atom_rule(p)?; 140 | 141 | let mut seq = vec![lhs]; 142 | while let Some(rule) = opt_atom_rule(p)? { 143 | seq.push(rule) 144 | } 145 | let res = if seq.len() == 1 { 146 | seq.pop().unwrap() 147 | } else { 148 | Rule::Seq(seq) 149 | }; 150 | Ok(res) 151 | } 152 | 153 | fn atom_rule(p: &mut Parser) -> Result { 154 | match opt_atom_rule(p)? { 155 | Some(it) => Ok(it), 156 | None => { 157 | let token = p.bump()?; 158 | bail!(token.loc, "unexpected token") 159 | } 160 | } 161 | } 162 | 163 | fn opt_atom_rule(p: &mut Parser) -> Result> { 164 | let token = match p.peek() { 165 | Some(it) => it, 166 | None => return Ok(None), 167 | }; 168 | let mut res = match &token.kind { 169 | TokenKind::Node(name) => { 170 | if let Some(lookahead) = p.peek_n(1) { 171 | match lookahead.kind { 172 | TokenKind::Eq => return Ok(None), 173 | TokenKind::Colon => { 174 | let label = name.clone(); 175 | p.bump()?; 176 | p.bump()?; 177 | let rule = atom_rule(p)?; 178 | let res = Rule::Labeled { 179 | label, 180 | rule: Box::new(rule), 181 | }; 182 | return Ok(Some(res)); 183 | } 184 | _ => (), 185 | } 186 | } 187 | match p.peek_n(1) { 188 | Some(token) if token.kind == TokenKind::Eq => return Ok(None), 189 | _ => (), 190 | } 191 | let name = name.clone(); 192 | p.bump()?; 193 | let node = p.intern_node(name); 194 | Rule::Node(node) 195 | } 196 | TokenKind::Token(name) => { 197 | let name = name.clone(); 198 | p.bump()?; 199 | let token = p.intern_token(name); 200 | Rule::Token(token) 201 | } 202 | TokenKind::LParen => { 203 | p.bump()?; 204 | let rule = rule(p)?; 205 | p.expect(TokenKind::RParen, ")")?; 206 | rule 207 | } 208 | _ => return Ok(None), 209 | }; 210 | 211 | if let Some(token) = p.peek() { 212 | match &token.kind { 213 | TokenKind::QMark => { 214 | p.bump()?; 215 | res = Rule::Opt(Box::new(res)); 216 | } 217 | TokenKind::Star => { 218 | p.bump()?; 219 | res = Rule::Rep(Box::new(res)); 220 | } 221 | _ => (), 222 | } 223 | } 224 | Ok(Some(res)) 225 | } 226 | -------------------------------------------------------------------------------- /ungrammar.ungram: -------------------------------------------------------------------------------- 1 | /// ungrammar for ungrammar 2 | Grammar = 3 | Node * 4 | 5 | Node = 6 | name:'ident' '=' Rule 7 | 8 | Rule = 9 | 'ident' 10 | | 'token_ident' 11 | | Rule * 12 | | Rule ( '|' Rule) * 13 | | Rule '?' 14 | | Rule '*' 15 | | '(' Rule ')' 16 | | label:'ident' ':' Rule 17 | -------------------------------------------------------------------------------- /ungrammar2json/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ungrammar2json" 3 | description = "Convert ungrammar files to JSON" 4 | version = "1.0.0" 5 | license = "MIT OR Apache-2.0" 6 | repository = "https://github.com/matklad/ungrammar" 7 | authors = ["Aleksey Kladov "] 8 | edition = "2018" 9 | 10 | [dependencies] 11 | write-json = "0.1.1" 12 | ungrammar = { path = "../", version = "1.1.0" } 13 | -------------------------------------------------------------------------------- /ungrammar2json/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | env, 3 | io::{self, Read}, 4 | process, 5 | }; 6 | 7 | use ungrammar::{Grammar, Rule}; 8 | 9 | fn main() { 10 | if let Err(err) = try_main() { 11 | eprintln!("{}", err); 12 | process::exit(101); 13 | } 14 | } 15 | 16 | fn try_main() -> io::Result<()> { 17 | if env::args().count() != 1 { 18 | eprintln!("Usage: ungrammar2json < grammar.ungram > grammar.json"); 19 | return Ok(()); 20 | } 21 | let grammar = read_stdin()?; 22 | let grammar = grammar 23 | .parse::() 24 | .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; 25 | 26 | let mut buf = String::new(); 27 | grammar_to_json(&grammar, write_json::object(&mut buf)); 28 | println!("{}", buf); 29 | Ok(()) 30 | } 31 | 32 | fn read_stdin() -> io::Result { 33 | let mut buf = String::new(); 34 | io::stdin().lock().read_to_string(&mut buf)?; 35 | Ok(buf) 36 | } 37 | 38 | fn grammar_to_json(grammar: &Grammar, mut obj: write_json::Object<'_>) { 39 | for node in grammar.iter() { 40 | let node = &grammar[node]; 41 | rule_to_json(grammar, &node.rule, obj.object(&node.name)); 42 | } 43 | } 44 | 45 | fn rule_to_json(grammar: &Grammar, rule: &Rule, mut obj: write_json::Object) { 46 | match rule { 47 | Rule::Labeled { label, rule } => { 48 | obj.string("label", label); 49 | rule_to_json(grammar, rule, obj.object("rule")) 50 | } 51 | Rule::Node(node) => { 52 | obj.string("node", &grammar[*node].name); 53 | } 54 | Rule::Token(token) => { 55 | obj.string("token", &grammar[*token].name); 56 | } 57 | Rule::Seq(rules) | Rule::Alt(rules) => { 58 | let tag = match rule { 59 | Rule::Seq(_) => "seq", 60 | Rule::Alt(_) => "alt", 61 | _ => unreachable!(), 62 | }; 63 | let mut array = obj.array(tag); 64 | for rule in rules { 65 | rule_to_json(grammar, rule, array.object()); 66 | } 67 | } 68 | Rule::Opt(arg) | Rule::Rep(arg) => { 69 | let tag = match rule { 70 | Rule::Opt(_) => "opt", 71 | Rule::Rep(_) => "rep", 72 | _ => unreachable!(), 73 | }; 74 | rule_to_json(grammar, arg, obj.object(tag)); 75 | } 76 | } 77 | } 78 | --------------------------------------------------------------------------------