├── .gitignore ├── Cargo.toml ├── README.md ├── src └── main.rs └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "example-fault-tolerant-parser" 3 | version = "0.1.0" 4 | authors = ["Eyal Kalderon "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | nom = "5.1" 9 | nom_locate = "2.0" 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Example fault-tolerant parser using nom 2 | 3 | This Rust crate was based on the work of "Syntax error recovery in parsing 4 | expression grammars" (Medeiros, S. and Fabio Mascarenhas, 2018), adapted to work 5 | with `nom`, an open source parser combinator library. 6 | 7 | From blog article: [Error recovery with parser combinators (using nom)][blog] 8 | 9 | [blog]: https://www.eyalkalderon.com/nom-error-recovery 10 | 11 | ## License 12 | 13 | ```text 14 | This is free and unencumbered software released into the public domain. 15 | 16 | Anyone is free to copy, modify, publish, use, compile, sell, or 17 | distribute this software, either in source code form or as a compiled 18 | binary, for any purpose, commercial or non-commercial, and by any 19 | means. 20 | 21 | In jurisdictions that recognize copyright laws, the author or authors 22 | of this software dedicate any and all copyright interest in the 23 | software to the public domain. We make this dedication for the benefit 24 | of the public at large and to the detriment of our heirs and 25 | successors. We intend this dedication to be an overt act of 26 | relinquishment in perpetuity of all present and future rights to this 27 | software under copyright law. 28 | 29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 32 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 33 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 34 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 35 | OTHER DEALINGS IN THE SOFTWARE. 36 | 37 | For more information, please refer to 38 | ``` 39 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::cell::RefCell; 2 | use std::ops::Range; 3 | 4 | use nom::branch::alt; 5 | use nom::bytes::complete::{take, take_till1, take_while}; 6 | use nom::character::complete::{anychar, char}; 7 | use nom::combinator::{all_consuming, map, not, recognize, rest, verify}; 8 | use nom::sequence::{delimited, preceded, terminated}; 9 | 10 | type LocatedSpan<'a> = nom_locate::LocatedSpan<&'a str, State<'a>>; 11 | type IResult<'a, T> = nom::IResult, T>; 12 | 13 | trait ToRange { 14 | fn to_range(&self) -> Range; 15 | } 16 | 17 | impl<'a> ToRange for LocatedSpan<'a> { 18 | fn to_range(&self) -> Range { 19 | let start = self.location_offset(); 20 | let end = start + self.fragment().len(); 21 | start..end 22 | } 23 | } 24 | 25 | #[derive(Debug)] 26 | struct Error(Range, String); 27 | 28 | #[derive(Clone, Debug)] 29 | struct State<'a>(&'a RefCell>); 30 | 31 | impl<'a> State<'a> { 32 | pub fn report_error(&self, error: Error) { 33 | self.0.borrow_mut().push(error); 34 | } 35 | } 36 | 37 | fn expect<'a, F, E, T>(parser: F, error_msg: E) -> impl Fn(LocatedSpan<'a>) -> IResult> 38 | where 39 | F: Fn(LocatedSpan<'a>) -> IResult, 40 | E: ToString, 41 | { 42 | move |input| match parser(input) { 43 | Ok((remaining, out)) => Ok((remaining, Some(out))), 44 | Err(nom::Err::Error((input, _))) | Err(nom::Err::Failure((input, _))) => { 45 | let err = Error(input.to_range(), error_msg.to_string()); 46 | input.extra.report_error(err); 47 | Ok((input, None)) 48 | } 49 | Err(err) => Err(err), 50 | } 51 | } 52 | 53 | #[derive(Debug)] 54 | struct Ident(String); 55 | 56 | #[derive(Debug)] 57 | enum Expr { 58 | Ident(Ident), 59 | Paren(Box), 60 | Error, 61 | } 62 | 63 | fn ident(input: LocatedSpan) -> IResult { 64 | let first = verify(anychar, |c| c.is_ascii_alphabetic() || *c == '_'); 65 | let rest = take_while(|c: char| c.is_ascii_alphanumeric() || "_-'".contains(c)); 66 | let ident = recognize(preceded(first, rest)); 67 | map(ident, |span: LocatedSpan| { 68 | Expr::Ident(Ident(span.fragment().to_string())) 69 | })(input) 70 | } 71 | 72 | fn paren(input: LocatedSpan) -> IResult { 73 | let paren = delimited( 74 | char('('), 75 | expect(expr, "expected expression after `(`"), 76 | expect(char(')'), "missing `)`"), 77 | ); 78 | 79 | map(paren, |inner| { 80 | Expr::Paren(Box::new(inner.unwrap_or(Expr::Error))) 81 | })(input) 82 | } 83 | 84 | fn error(input: LocatedSpan) -> IResult { 85 | map(take_till1(|c| c == ')'), |span: LocatedSpan| { 86 | let err = Error(span.to_range(), format!("unexpected `{}`", span.fragment())); 87 | span.extra.report_error(err); 88 | Expr::Error 89 | })(input) 90 | } 91 | 92 | fn expr(input: LocatedSpan) -> IResult { 93 | alt((paren, ident, error))(input) 94 | } 95 | 96 | fn source_file(input: LocatedSpan) -> IResult { 97 | let expr = alt((expr, map(take(0usize), |_| Expr::Error))); 98 | terminated(expr, preceded(expect(not(anychar), "expected EOF"), rest))(input) 99 | } 100 | 101 | fn parse(source: &str) -> (Expr, Vec) { 102 | let errors = RefCell::new(Vec::new()); 103 | let input = LocatedSpan::new_extra(source, State(&errors)); 104 | let (_, expr) = all_consuming(source_file)(input).expect("parser cannot fail"); 105 | (expr, errors.into_inner()) 106 | } 107 | 108 | fn main() { 109 | for input in &["foo", "(foo)", "(foo))", "(%", "(", "%", "()", ""] { 110 | println!("{:7} {:?}", input, parse(input)); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "arrayvec" 5 | version = "0.4.12" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" 8 | dependencies = [ 9 | "nodrop", 10 | ] 11 | 12 | [[package]] 13 | name = "bitflags" 14 | version = "1.2.1" 15 | source = "registry+https://github.com/rust-lang/crates.io-index" 16 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 17 | 18 | [[package]] 19 | name = "bytecount" 20 | version = "0.6.0" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | checksum = "b0017894339f586ccb943b01b9555de56770c11cda818e7e3d8bd93f4ed7f46e" 23 | 24 | [[package]] 25 | name = "cfg-if" 26 | version = "0.1.9" 27 | source = "registry+https://github.com/rust-lang/crates.io-index" 28 | checksum = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" 29 | 30 | [[package]] 31 | name = "example-fault-tolerant-parser" 32 | version = "0.1.0" 33 | dependencies = [ 34 | "nom", 35 | "nom_locate", 36 | ] 37 | 38 | [[package]] 39 | name = "lexical-core" 40 | version = "0.6.7" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "f86d66d380c9c5a685aaac7a11818bdfa1f733198dfd9ec09c70b762cd12ad6f" 43 | dependencies = [ 44 | "arrayvec", 45 | "bitflags", 46 | "cfg-if", 47 | "rustc_version", 48 | "ryu", 49 | "static_assertions", 50 | ] 51 | 52 | [[package]] 53 | name = "memchr" 54 | version = "2.3.3" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" 57 | 58 | [[package]] 59 | name = "nodrop" 60 | version = "0.1.14" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" 63 | 64 | [[package]] 65 | name = "nom" 66 | version = "5.1.1" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "0b471253da97532da4b61552249c521e01e736071f71c1a4f7ebbfbf0a06aad6" 69 | dependencies = [ 70 | "lexical-core", 71 | "memchr", 72 | "version_check", 73 | ] 74 | 75 | [[package]] 76 | name = "nom_locate" 77 | version = "2.0.0" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "1e4726500a3d0297dd38edc169d919ad997a9931b4645b59ce0231e88536e213" 80 | dependencies = [ 81 | "bytecount", 82 | "memchr", 83 | "nom", 84 | ] 85 | 86 | [[package]] 87 | name = "rustc_version" 88 | version = "0.2.3" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" 91 | dependencies = [ 92 | "semver", 93 | ] 94 | 95 | [[package]] 96 | name = "ryu" 97 | version = "1.0.3" 98 | source = "registry+https://github.com/rust-lang/crates.io-index" 99 | checksum = "535622e6be132bccd223f4bb2b8ac8d53cda3c7a6394944d3b2b33fb974f9d76" 100 | 101 | [[package]] 102 | name = "semver" 103 | version = "0.9.0" 104 | source = "registry+https://github.com/rust-lang/crates.io-index" 105 | checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" 106 | dependencies = [ 107 | "semver-parser", 108 | ] 109 | 110 | [[package]] 111 | name = "semver-parser" 112 | version = "0.7.0" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" 115 | 116 | [[package]] 117 | name = "static_assertions" 118 | version = "0.3.4" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" 121 | 122 | [[package]] 123 | name = "version_check" 124 | version = "0.9.1" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" 127 | --------------------------------------------------------------------------------