├── README.md ├── faster-pest ├── examples │ ├── lightgrep │ │ ├── input.txt │ │ ├── Sujet.pdf │ │ ├── grammar.pest │ │ └── main.rs │ ├── csv │ │ ├── grammar.pest │ │ ├── README.md │ │ ├── input.csv │ │ └── main.rs │ ├── ini │ │ ├── input.ini │ │ ├── grammar.pest │ │ ├── README.md │ │ └── main.rs │ ├── po │ │ ├── grammar.pest │ │ ├── input.po │ │ └── main.rs │ └── json │ │ ├── grammar.pest │ │ ├── input.json │ │ └── main.rs ├── src │ ├── lib.rs │ ├── error.rs │ ├── pairs.rs │ └── ident.rs ├── Cargo.toml ├── benches │ ├── lightgrep.rs │ ├── csv.rs │ └── json.rs └── README.md ├── .gitignore ├── Cargo.toml ├── faster-pest-generator ├── Cargo.toml └── src │ ├── ids.rs │ ├── tree_inspection.rs │ ├── expr_codegen.rs │ ├── lib.rs │ └── optimizer.rs └── faster-pest-derive ├── src ├── pattern_rule_silent.rs ├── pattern_rule_method.rs ├── pattern_expr_str.rs ├── pattern_expr_seq.rs ├── pattern_expr_rep_character.rs ├── pattern_expr_opt.rs ├── pattern_expr_rep.rs ├── pattern_expr_character.rs ├── pattern_expr_insens.rs ├── pattern_expr_neg.rs ├── pattern_expr_choice.rs ├── pattern_outer.rs ├── pattern_rule.rs └── lib.rs ├── Cargo.toml └── Cargo.lock /README.md: -------------------------------------------------------------------------------- 1 | faster-pest/README.md -------------------------------------------------------------------------------- /faster-pest/examples/lightgrep/input.txt: -------------------------------------------------------------------------------- 1 | (([a-b]|c)|[abc]|v|[cde]a[fdg]|.a|.|[e-p]) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | tests/test2.rs 4 | .vscode 5 | fp_code.rs 6 | -------------------------------------------------------------------------------- /faster-pest/examples/lightgrep/Sujet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mubelotix/faster-pest/HEAD/faster-pest/examples/lightgrep/Sujet.pdf -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = [ 4 | "faster-pest", 5 | "faster-pest-generator", 6 | "faster-pest-derive", 7 | ] -------------------------------------------------------------------------------- /faster-pest/examples/csv/grammar.pest: -------------------------------------------------------------------------------- 1 | field = { (ASCII_DIGIT | "." | "-")+ } 2 | record = { field ~ ("," ~ field)* } 3 | file = { SOI ~ (record ~ ("\r\n" | "\n"))* ~ EOI } 4 | -------------------------------------------------------------------------------- /faster-pest/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub use faster_pest_derive::Parser; 2 | 3 | mod error; 4 | pub use error::*; 5 | mod pairs; 6 | pub use pairs::*; 7 | mod ident; 8 | pub use ident::*; 9 | -------------------------------------------------------------------------------- /faster-pest/examples/ini/input.ini: -------------------------------------------------------------------------------- 1 | username = noha 2 | password = plain_text 3 | salt = NaCl 4 | 5 | [server_1] 6 | interface=eth0 7 | ip=127.0.0.1 8 | document_root=/var/www/example.org 9 | 10 | [empty_section] 11 | 12 | [second_server] 13 | document_root=/var/www/example.com 14 | ip= 15 | interface=eth1 16 | -------------------------------------------------------------------------------- /faster-pest/examples/ini/grammar.pest: -------------------------------------------------------------------------------- 1 | WHITESPACE = _{ " " } 2 | 3 | char = _{ ASCII_ALPHANUMERIC | "." | "_" | "/" } 4 | name = { char+ } 5 | value = { char* } 6 | section = { "[" ~ name ~ "]" } 7 | property = { name ~ "=" ~ value } 8 | file = { 9 | SOI ~ 10 | ((section | property)? ~ NEWLINE)* ~ 11 | EOI 12 | } 13 | -------------------------------------------------------------------------------- /faster-pest-generator/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "faster-pest-generator" 3 | version = "0.2.0-alpha.1" 4 | edition = "2021" 5 | description = "proc-macro for faster-pest" 6 | license = "GPL-3.0-or-later" 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | 10 | [dependencies] 11 | pest_meta = "2.7" 12 | -------------------------------------------------------------------------------- /faster-pest/examples/csv/README.md: -------------------------------------------------------------------------------- 1 | ## CSV Parser Example 2 | 3 | This example comes from the [pest book](https://pest.rs/book/examples/csv.html). 4 | It uses the exact same code, showing that `faster-pest` is a drop-in replacement for `pest`. 5 | 6 | Running the example: 7 | ```bash 8 | cargo run --example csv 9 | ``` 10 | 11 | If outdated, feel free to send a PR. 12 | -------------------------------------------------------------------------------- /faster-pest/examples/ini/README.md: -------------------------------------------------------------------------------- 1 | ## INI Parser Example 2 | 3 | This example comes from the [pest book](https://pest.rs/book/examples/ini.html). 4 | It uses the exact same code, showing that `faster-pest` is a drop-in replacement for `pest`. 5 | 6 | Running the example: 7 | ```bash 8 | cargo run --example csv 9 | ``` 10 | 11 | If outdated, feel free to send a PR. 12 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_rule_silent.rs: -------------------------------------------------------------------------------- 1 | pub fn parse_RuleVariant<'i, 'b>(input: &'i [u8], idents: &'b mut Vec<(Ident<'i>, usize)>) -> Result<&'i [u8], Error> { 2 | parse_top_expr_id(input, formatted_idents) 3 | } 4 | 5 | pub fn quick_parse_RuleVariant<'i, 'b>(input: &'i [u8], idents: &'b mut Vec<(Ident<'i>, usize)>) -> Option<&'i [u8]> { 6 | quick_parse_top_expr_id(input, formatted_idents) 7 | } 8 | -------------------------------------------------------------------------------- /faster-pest/examples/po/grammar.pest: -------------------------------------------------------------------------------- 1 | WSP = _{ " "* } 2 | 3 | linecontent = {( 4 | (!"\"" ~ !"\\" ~ ANY)+ | "\\\"" | "\\" 5 | )*} 6 | line = _{WSP ~ "\"" ~ linecontent ~ "\"" ~ WSP ~ NEWLINE} 7 | 8 | msgctx = {"msgctx" ~ (WSP ~ NEWLINE | " " ~ (line)*)} 9 | msgid = {"msgid " ~ (line)+} 10 | msgstr = {"msgstr " ~ (line)+} 11 | 12 | entry = {msgctx? ~ msgid ~ msgstr} 13 | 14 | file = {(SOI ~ (entry ~ NEWLINE*)+ ~ EOI)} 15 | -------------------------------------------------------------------------------- /faster-pest-derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "faster-pest-derive" 3 | authors = ["Mubelotix "] 4 | description = "proc-macro for faster-pest" 5 | license = "GPL-3.0-or-later" 6 | repository = "https://github.com/Mubelotix/faster-pest" 7 | version = "0.2.0-alpha.1" 8 | edition = "2021" 9 | 10 | [lib] 11 | proc-macro = true 12 | 13 | [dependencies] 14 | syn = "1.0" 15 | proc-macro2 = "1.0" 16 | faster-pest-generator = { path="../faster-pest-generator" } 17 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_rule_method.rs: -------------------------------------------------------------------------------- 1 | impl StructIdent { 2 | pub fn parse_RuleVariant(input: &str) -> Result, Error> { 3 | let mut idents = Vec::with_capacity(500); 4 | if quick_parse_RuleVariant(input.as_bytes(), &mut idents).is_some() { 5 | return Ok(unsafe { IdentList::from_idents(idents) }); 6 | } 7 | idents.clear(); 8 | parse_RuleVariant(input.as_bytes(), &mut idents)?; 9 | Ok(unsafe { IdentList::from_idents(idents) }) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /faster-pest/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "faster-pest" 3 | authors = ["Mubelotix "] 4 | readme = "README.md" 5 | license = "GPL-3.0-or-later" 6 | repository = "https://github.com/Mubelotix/faster-pest" 7 | description = "A generator for high-performance Pest parsers, bringing your grammar to the next level" 8 | version = "0.2.0-alpha.1" 9 | edition = "2021" 10 | 11 | [dependencies] 12 | pest = "2.7" 13 | faster-pest-derive = { path="../faster-pest-derive" } 14 | 15 | [dev-dependencies] 16 | pest_derive = "2.7" 17 | serde = "1.0" 18 | serde_json = "1.0" 19 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_str.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i>(input: &'i [u8]) -> Result<&'i [u8], Error> { 3 | if input.starts_with(bexpr_str) { 4 | Ok(unsafe { input.get_unchecked(expr_str.len()..) }) 5 | } else { 6 | Err(Error::new(ErrorKind::ExpectedValue(expr_str), unsafe{std::str::from_utf8_unchecked(input)}, r#"expr_id expr_pest"#)) 7 | } 8 | } 9 | pub fn quick_parse_expr_id<'i>(input: &'i [u8]) -> Option<&'i [u8]> { 10 | if input.starts_with(bexpr_str) { 11 | Some(unsafe { input.get_unchecked(expr_str.len()..) }) 12 | } else { 13 | None 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /faster-pest/examples/json/grammar.pest: -------------------------------------------------------------------------------- 1 | WSP = _{ (" " | "\n" | "\t" | "\r")* } 2 | 3 | escaped_char = { "\\"~("\""|"\\"|"/"|"b"|"f"|"n"|"r"|"t" /* todo u */) } 4 | string = { ((!"\"" ~ !"\\" ~ ANY)+ | escaped_char)* } 5 | outer_string = _{ "\"" ~ string ~ "\"" } 6 | 7 | array = { "[" ~ ((value ~ ("," ~ value)* ~ "]") | (WSP ~ "]")) } 8 | boolean = { "true" | "false" } 9 | null = { "null" } 10 | number = { (ASCII_DIGIT | "-" | ".")* } 11 | 12 | value = _{ WSP ~ (outer_string | object | array | boolean | null | number) ~ WSP } 13 | property = { WSP ~ outer_string ~ WSP ~ ":" ~ value } 14 | object = { ("{" ~ property ~ ("," ~ property)* ~ "}") | ("{" ~ WSP ~ "}") } 15 | 16 | file = {(SOI ~ value ~ EOI)} 17 | -------------------------------------------------------------------------------- /faster-pest/examples/lightgrep/grammar.pest: -------------------------------------------------------------------------------- 1 | caractere = { 'a'..'z' | 'A'..'Z' | '0'..'9' | "!" | "\"" | "#" | "%" | "&" | "," | "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "^" | "_" | "`" | "{" | " | " | "}" | "~" } 2 | caractere_echape = { "|" | "$" | "(" | ")" | "[" | "]" | "*" | "?" | "." | "-" } 3 | joker = { "." } 4 | 5 | er = { erc ~ ("|" ~ erc)* } 6 | erc = { erb+ } 7 | erb = { ere ~ ere_rep? } 8 | ere = _{ 9 | ("(" ~ er ~ ")") | 10 | lettre | 11 | joker | 12 | ("[" ~ ens_lettre ~ "]") | 13 | ("[" ~ "^" ~ ens_lettre ~ "]") 14 | } 15 | ere_rep = { "*" | "?" | "+" } 16 | 17 | ens_lettre = { element_ens_lettre+ } 18 | element_ens_lettre = { (lettre ~ "-" ~ lettre) | lettre } 19 | lettre = _{ caractere | ("\\" ~ caractere_echape) } 20 | 21 | file = { SOI ~ er ~ ("\n")* ~ EOI } 22 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_seq.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i, 'b>( 3 | mut input: &'i [u8], 4 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 5 | ) -> Result<&'i [u8], Error> { 6 | 7 | // TODO note 8 | 9 | input = parse_seq_item_id(input, seq_idents).map_err(|e| e.with_trace(r#"expr_id-seq_n expr_pest"#))?; //WSP while let Ok(new_input) = parse_WHITESPACE(input, idents) { input = new_input } 10 | 11 | Ok(input) 12 | } 13 | pub fn quick_parse_expr_id<'i, 'b>( 14 | mut input: &'i [u8], 15 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 16 | ) -> Option<&'i [u8]> { 17 | input = quick_parse_seq_item_id(input, seq_idents)?; //WSP while let Some(new_input) = quick_parse_WHITESPACE(input, idents) { input = new_input } 18 | 19 | Some(input) 20 | } 21 | -------------------------------------------------------------------------------- /faster-pest/examples/csv/input.csv: -------------------------------------------------------------------------------- 1 | 65279,1179403647,1463895090 2 | 3.1415927,2.7182817,1.618034 3 | -40,-273.15 4 | 13,42 5 | 65537 6 | 65279,1179403647,1463895090 7 | 3.1415927,2.7182817,1.618034 8 | -40,-273.15 9 | 13,42 10 | 65537 11 | 65279,1179403647,1463895090 12 | 3.1415927,2.7182817,1.618034 13 | -40,-273.15 14 | 13,42 15 | 65537 16 | 65279,1179403647,1463895090 17 | 3.1415927,2.7182817,1.618034 18 | -40,-273.15 19 | 13,42 20 | 65537 21 | 65279,1179403647,1463895090 22 | 3.1415927,2.7182817,1.618034 23 | -40,-273.15 24 | 13,42 25 | 65537 26 | 65279,1179403647,1463895090 27 | 3.1415927,2.7182817,1.618034 28 | -40,-273.15 29 | 13,42 30 | 65537 31 | 65279,1179403647,1463895090 32 | 3.1415927,2.7182817,1.618034 33 | -40,-273.15 34 | 13,42 35 | 65537 36 | 65279,1179403647,1463895090 37 | 3.1415927,2.7182817,1.618034 38 | -40,-273.15 39 | 13,42 40 | 65537 41 | -------------------------------------------------------------------------------- /faster-pest/examples/po/input.po: -------------------------------------------------------------------------------- 1 | msgctx 2 | msgid "" 3 | msgstr "" 4 | "Language: en\n" 5 | "Quote: \"test\"" 6 | 7 | msgctx "STILL ALIVE" 8 | "- by Jonathan Coulton" 9 | msgid "This was a triumph." 10 | msgstr "C'était un triomphe." 11 | 12 | msgid "I'm making a note here: huge success." 13 | msgstr "J'en prend note ici: énorme succès." 14 | 15 | msgid "It's hard to overstate my satisfaction." 16 | msgstr "Il est difficile de surévaluer ma satisfaction." 17 | 18 | msgctx 19 | msgid "Aperture Science" 20 | msgstr "" 21 | 22 | msgid "We do what we must" 23 | " because" 24 | " we can." 25 | msgstr "On fait notre devoir, car c'est notre pouvoir." 26 | 27 | msgid "For the good of all of us..." 28 | msgstr "Pour le bien de nous tous..." 29 | 30 | msgid "- except the ones who are dead." 31 | msgstr "- à part pour ceux qui sont morts." 32 | -------------------------------------------------------------------------------- /faster-pest-generator/src/ids.rs: -------------------------------------------------------------------------------- 1 | use crate::*; 2 | use std::collections::HashMap; 3 | 4 | pub struct IdRegistry { 5 | ids: HashMap, 6 | next: usize, 7 | } 8 | 9 | impl IdRegistry { 10 | pub fn new() -> Self { 11 | Self { 12 | ids: HashMap::new(), 13 | next: 0, 14 | } 15 | } 16 | 17 | pub fn id(&mut self, expr: &FPestExpr) -> String { 18 | match expr { 19 | FPestExpr::Ident(ident) => ident.to_string(), 20 | expr => { 21 | let id = format!("{:?}", expr); 22 | let id = self.ids.entry(id).or_insert_with(|| { 23 | let id = self.next; 24 | self.next += 1; 25 | id 26 | }); 27 | format!("anon_{id:0>4}") 28 | } 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_rep_character.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i, 'b>( 3 | mut input: &'i [u8], 4 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 5 | ) -> Result<&'i [u8], Error> { 6 | let i = input.iter().position(|c| !(character_condition)).unwrap_or(input.len()); 7 | //NON-EMPTY if i == 0 { 8 | //NON-EMPTY return Err(Error::new(ErrorKind::Expected("character_condition"), unsafe{std::str::from_utf8_unchecked(input)}, "expr_id expr_pest")); 9 | //NON-EMPTY } 10 | Ok(unsafe { input.get_unchecked(i..) }) 11 | } 12 | pub fn quick_parse_expr_id<'i, 'b>( 13 | mut input: &'i [u8], 14 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 15 | ) -> Option<&'i [u8]> { let i = input.iter().position(|c| !(character_condition)).unwrap_or(input.len()); 16 | //NON-EMPTY if i == 0 { 17 | //NON-EMPTY return None; 18 | //NON-EMPTY } 19 | Some(unsafe { input.get_unchecked(i..) }) 20 | } 21 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_opt.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i, 'b>( 3 | input: &'i [u8], 4 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 5 | ) -> Result<&'i [u8], Error> { 6 | //SIG-IDENTS let idents_len = idents.len(); 7 | if let Ok(input) = parse_inner_eid( 8 | input, 9 | //SIG-IDENTS idents 10 | ) { 11 | Ok(input) 12 | } else { 13 | //SIG-IDENTS unsafe { idents.set_len(idents_len); } 14 | Ok(input) 15 | } 16 | } 17 | pub fn quick_parse_expr_id<'i, 'b>( 18 | input: &'i [u8], 19 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 20 | ) -> Option<&'i [u8]> { 21 | //SIG-IDENTS let idents_len = idents.len(); 22 | if let Some(input) = quick_parse_inner_eid( 23 | input, 24 | //SIG-IDENTS idents 25 | ) { 26 | Some(input) 27 | } else { 28 | //SIG-IDENTS unsafe { idents.set_len(idents_len); } 29 | Some(input) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_rep.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i, 'b>( 3 | mut input: &'i [u8], 4 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 5 | ) -> Result<&'i [u8], Error> { 6 | //NON-EMPTY input = parse_inner_eid(input, inner_idents)?; 7 | while let Ok(new_input) = parse_inner_eid(input, inner_idents) { 8 | input = new_input; 9 | //WSP //while let Ok(new_input) = parse_WHITESPACE(input, idents) { input = new_input } 10 | } 11 | Ok(input) 12 | } 13 | pub fn quick_parse_expr_id<'i, 'b>( 14 | mut input: &'i [u8], 15 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 16 | ) -> Option<&'i [u8]> { //NON-EMPTY input = quick_parse_inner_eid(input, inner_idents)?; 17 | while let Some(new_input) = quick_parse_inner_eid(input, inner_idents) { 18 | input = new_input; 19 | //WSP //while let Some(new_input) = quick_parse_WHITESPACE(input, idents) { input = new_input } 20 | } 21 | Some(input) 22 | } 23 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_character.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i>(input: &'i [u8]) -> Result<&'i [u8], Error> { 3 | if !input.is_empty() { 4 | let c = unsafe { input.get_unchecked(0) }; 5 | if character_condition { 6 | Ok(unsafe { input.get_unchecked(1..) }) 7 | } else { 8 | Err(Error::new(ErrorKind::Expected("unknown"), unsafe{std::str::from_utf8_unchecked(input)}, "expr_id expr_pest")) // TODO: remove unknown 9 | } 10 | } else { 11 | Err(Error::new(ErrorKind::Expected("unknown"), unsafe{std::str::from_utf8_unchecked(input)}, "expr_id expr_pest")) 12 | } 13 | } 14 | pub fn quick_parse_expr_id<'i>(input: &'i [u8]) -> Option<&'i [u8]> { 15 | if !input.is_empty() { 16 | let c = unsafe { input.get_unchecked(0) }; 17 | if character_condition { 18 | Some(unsafe { input.get_unchecked(1..) }) 19 | } else { 20 | None 21 | } 22 | } else { 23 | None 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_insens.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i>(input: &'i [u8]) -> Result<&'i [u8], Error> { 3 | if input.len() < expr_len_str { 4 | return Err(Error::new(ErrorKind::ExpectedValue(expr_str), unsafe{std::str::from_utf8_unchecked(input)}, r#"expr_id expr_pest"#)); 5 | } 6 | for i in 0..expr_len_str { 7 | if input[i] != bexpr_str[i] && input[i] != bexpr_inv_str[i] { 8 | return Err(Error::new(ErrorKind::ExpectedValue(expr_str), unsafe{std::str::from_utf8_unchecked(input)}, r#"expr_id expr_pest"#)); 9 | } 10 | } 11 | Ok(unsafe { input.get_unchecked(expr_len_str..) }) 12 | } 13 | pub fn quick_parse_expr_id<'i>(input: &'i [u8]) -> Option<&'i [u8]> { 14 | if input.len() < expr_len_str { 15 | return None; 16 | } 17 | for i in 0..expr_len_str { 18 | if input[i] != bexpr_str[i] && input[i] != bexpr_inv_str[i] { 19 | return None; 20 | } 21 | } 22 | Some(unsafe { input.get_unchecked(expr_len_str..) }) 23 | } 24 | -------------------------------------------------------------------------------- /faster-pest/examples/po/main.rs: -------------------------------------------------------------------------------- 1 | use faster_pest::*; 2 | use std::collections::HashMap; 3 | 4 | #[derive(Parser)] 5 | #[grammar = "faster-pest/examples/po/grammar.pest"] 6 | pub struct POParser; 7 | 8 | fn main() { 9 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/po/input.po") { 10 | Ok(s) => s, 11 | Err(_) => match std::fs::read_to_string("examples/po/input.po") { 12 | Ok(s) => s, 13 | Err(e) => panic!("cannot read file: {}", e) 14 | } 15 | }; 16 | 17 | let output = POParser::parse_file(&unparsed_file).map_err(|e| e.print(unparsed_file.as_str())).expect("unsuccessful parse"); 18 | let file = output.into_iter().next().expect("couldn't find file rule"); 19 | 20 | for line in file.children() { 21 | match line.as_rule() { 22 | Rule::entry => { 23 | 24 | 25 | println!("{:#?}", line.children()); 26 | } 27 | //Rule::EOI => (), 28 | _ => unreachable!(), 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_neg.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i, 'b>( 3 | input: &'i [u8], 4 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 5 | ) -> Result<&'i [u8], Error> { 6 | //SIG-IDENTS let idents_len = idents.len(); 7 | if parse_inner_id( 8 | input, 9 | //SIG-IDENTS idents 10 | ).is_err() { 11 | //SIG-IDENTS unsafe { idents.set_len(idents_len); } 12 | Ok(input) 13 | } else { 14 | Err(Error::new(ErrorKind::NegPredFailed("inner_id"), unsafe{std::str::from_utf8_unchecked(input)}, r#"expr_id expr_pest"#)) 15 | } 16 | } 17 | pub fn quick_parse_expr_id<'i, 'b>( 18 | input: &'i [u8], 19 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 20 | ) -> Option<&'i [u8]> { 21 | //SIG-IDENTS let idents_len = idents.len(); 22 | if quick_parse_inner_id( 23 | input, 24 | //SIG-IDENTS idents 25 | ).is_none() { 26 | //SIG-IDENTS unsafe { idents.set_len(idents_len); } // TODO: remove this 27 | Some(input) 28 | } else { 29 | None 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_expr_choice.rs: -------------------------------------------------------------------------------- 1 | // expr_pest 2 | pub fn parse_expr_id<'i, 'b>( 3 | input: &'i [u8], 4 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 5 | ) -> Result<&'i [u8], Error> { 6 | //SIG-IDENTS let idents_len = idents.len(); 7 | 8 | if let Some(input) = quick_parse_choice_item_id(input, choice_idents) { return Ok(input); } unsafe { idents.set_len(idents_len); } 9 | 10 | // TODO: remove last set_len 11 | 12 | let mut errors = Vec::new(); 13 | errors.push(parse_choice_item_id(input, choice_idents).unwrap_err()); 14 | 15 | //SIG-IDENTS unsafe { idents.set_len(idents_len); } 16 | 17 | Err(Error::new(ErrorKind::All(errors), unsafe{std::str::from_utf8_unchecked(input)}, r#"expr_id expr_pest"#)) 18 | } 19 | pub fn quick_parse_expr_id<'i, 'b>( 20 | input: &'i [u8], 21 | //SIG-IDENTS idents: &'b mut Vec<(Ident<'i>, usize)> 22 | ) -> Option<&'i [u8]> { 23 | //SIG-IDENTS let idents_len = idents.len(); 24 | 25 | if let Some(input) = quick_parse_choice_item_id(input, choice_idents) { return Some(input); } unsafe { idents.set_len(idents_len); } 26 | 27 | None 28 | } 29 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_outer.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] 2 | pub enum Rule { 3 | RuleVariant, 4 | } 5 | 6 | #[derive(Debug, Copy, Clone)] 7 | pub enum Ident<'i> { 8 | IdentVariant(&'i str), 9 | } 10 | 11 | impl<'i> IdentTrait for Ident<'i> { 12 | type Rule = Rule; 13 | 14 | fn as_rule(&self) -> Rule { 15 | match self { 16 | Ident::IdentVariant(_) => Rule::RuleVariant, 17 | } 18 | } 19 | 20 | fn as_str(&self) -> &str { 21 | match self { 22 | Ident::IdentVariant(s) => s, 23 | } 24 | } 25 | } 26 | 27 | #[automatically_derived] 28 | impl StructIdent { 29 | pub fn parse(rule: Rule, input: &str) -> Result, Error> { 30 | let mut idents = Vec::with_capacity(500); // TODO: refine 500 31 | match rule { 32 | Rule::RuleVariant => StructIdent_faster_pest::parse_RuleVariant(input.as_bytes(), &mut idents)?, 33 | }; 34 | Ok(unsafe { Pairs2::from_idents(idents, input) }) 35 | } 36 | } 37 | 38 | #[automatically_derived] 39 | #[allow(clippy::all)] 40 | mod StructIdent_faster_pest { 41 | use super::*; 42 | 43 | // inner code 44 | } 45 | -------------------------------------------------------------------------------- /faster-pest/examples/csv/main.rs: -------------------------------------------------------------------------------- 1 | use faster_pest::*; 2 | 3 | #[derive(Parser)] 4 | #[grammar = "faster-pest/examples/csv/grammar.pest"] 5 | struct CSVParser { 6 | 7 | } 8 | 9 | fn main() { 10 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/csv/input.csv") { 11 | Ok(s) => s, 12 | Err(_) => match std::fs::read_to_string("examples/csv/input.csv") { 13 | Ok(s) => s, 14 | Err(e) => panic!("cannot read file: {}", e) 15 | } 16 | }; 17 | 18 | let file = CSVParser::parse(Rule::file, &unparsed_file) 19 | .expect("unsuccessful parse") 20 | .next() 21 | .expect("couldn't find file rule"); 22 | 23 | let mut field_sum: f64 = 0.0; 24 | let mut record_count: u64 = 0; 25 | 26 | for record in file.into_inner() { 27 | match record.as_rule() { 28 | Rule::record => { 29 | record_count += 1; 30 | 31 | for field in record.into_inner() { 32 | field_sum += field.as_str().parse::().expect("field should be a number") 33 | } 34 | } 35 | // TODO Rule::EOI => (), 36 | o => println!("Unexpected {o:?}") 37 | } 38 | } 39 | 40 | println!("Sum of fields: {}", field_sum); 41 | println!("Number of records: {}", record_count); 42 | } 43 | -------------------------------------------------------------------------------- /faster-pest/benches/lightgrep.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | use std::hint::black_box; 4 | 5 | extern crate test; 6 | 7 | #[path = "../examples/lightgrep/main.rs"] 8 | mod lightgrep; 9 | 10 | use faster_pest::*; 11 | use test::Bencher; 12 | use lightgrep::*; 13 | 14 | #[bench] 15 | fn lightgrep_as_is(b: &mut Bencher) { 16 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/lightgrep/input.txt") { 17 | Ok(s) => s, 18 | Err(_) => match std::fs::read_to_string("examples/lightgrep/input.txt") { 19 | Ok(s) => s, 20 | Err(e) => panic!("cannot read file: {}", e) 21 | } 22 | }; 23 | 24 | b.iter(|| black_box({ 25 | LightgrepParser::parse_file(&unparsed_file).expect("unsuccessful parse"); 26 | })); 27 | } 28 | 29 | #[bench] 30 | fn lightgrep_to_rust(b: &mut Bencher) { 31 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/lightgrep/input.txt") { 32 | Ok(s) => s, 33 | Err(_) => match std::fs::read_to_string("examples/lightgrep/input.txt") { 34 | Ok(s) => s, 35 | Err(e) => panic!("cannot read file: {}", e) 36 | } 37 | }; 38 | 39 | b.iter(|| black_box({ 40 | let output = LightgrepParser::parse_file(&unparsed_file).map_err(|e| e.print(unparsed_file.as_str())).expect("unsuccessful parse"); 41 | let file = output.into_iter().next().expect("couldn't find file rule"); 42 | let main_object = file.children().next().expect("couldn't find main object"); 43 | let output = ExpressionRationnelle::from_ident_ref(main_object); 44 | })); 45 | } 46 | -------------------------------------------------------------------------------- /faster-pest-derive/src/pattern_rule.rs: -------------------------------------------------------------------------------- 1 | pub fn parse_RuleVariant<'i, 'b>(input: &'i [u8], idents: &'b mut Vec<(Ident<'i>, usize)>) -> Result<&'i [u8], Error> { 2 | let idents_len = idents.len(); 3 | if idents_len == idents.capacity() { 4 | idents.reserve(500); 5 | } 6 | unsafe { idents.set_len(idents_len + 1); } 7 | let new_input = match parse_top_expr_id(input, formatted_idents) { 8 | Ok(input) => input, 9 | Err(e) => { 10 | unsafe { idents.set_len(idents_len); } 11 | return Err(e); 12 | } 13 | }; 14 | let content = unsafe { std::str::from_utf8_unchecked(input.get_unchecked(..input.len() - new_input.len())) }; 15 | unsafe { *idents.get_unchecked_mut(idents_len) = (Ident::IdentVariant(content), idents.len()); } 16 | Ok(new_input) 17 | } 18 | 19 | pub fn quick_parse_RuleVariant<'i, 'b>(input: &'i [u8], idents: &'b mut Vec<(Ident<'i>, usize)>) -> Option<&'i [u8]> { 20 | let idents_len = idents.len(); 21 | if idents_len == idents.capacity() { 22 | idents.reserve(500); 23 | } 24 | unsafe { idents.set_len(idents_len + 1); } 25 | let new_input = match quick_parse_top_expr_id(input, formatted_idents) { 26 | Some(input) => input, 27 | None => { 28 | unsafe { idents.set_len(idents_len); } 29 | return None; 30 | } 31 | }; 32 | let content = unsafe { std::str::from_utf8_unchecked(input.get_unchecked(..input.len() - new_input.len())) }; 33 | unsafe { *idents.get_unchecked_mut(idents_len) = (Ident::IdentVariant(content), idents.len()); } 34 | Some(new_input) 35 | } 36 | -------------------------------------------------------------------------------- /faster-pest/examples/ini/main.rs: -------------------------------------------------------------------------------- 1 | use faster_pest::*; 2 | use std::collections::HashMap; 3 | 4 | #[derive(Parser)] 5 | #[grammar = "faster-pest/examples/ini/grammar.pest"] 6 | pub struct INIParser; 7 | 8 | fn main() { 9 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/ini/input.ini") { 10 | Ok(s) => s, 11 | Err(_) => match std::fs::read_to_string("examples/ini/input.ini") { 12 | Ok(s) => s, 13 | Err(e) => panic!("cannot read file: {}", e) 14 | } 15 | }; 16 | 17 | let file = INIParser::parse(Rule::file, &unparsed_file) 18 | .expect("unsuccessful parse") 19 | .next() 20 | .expect("couldn't find file rule"); 21 | 22 | let mut properties: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); 23 | 24 | let mut current_section_name = ""; 25 | 26 | for line in file.into_inner() { 27 | match line.as_rule() { 28 | Rule::section => { 29 | let mut inner_rules = line.into_inner(); // { name } 30 | current_section_name = inner_rules.next().expect("section name").as_str(); 31 | } 32 | Rule::property => { 33 | let mut inner_rules = line.into_inner(); // { name ~ "=" ~ value } 34 | 35 | let name: &str = inner_rules.next().expect("property name").as_str(); 36 | let value: &str = inner_rules.next().expect("property value").as_str(); 37 | 38 | // Insert an empty inner hash map if the outer hash map hasn't 39 | // seen this section name before. 40 | let section = properties.entry(current_section_name).or_default(); 41 | section.insert(name, value); 42 | } 43 | //Rule::EOI => (), 44 | _ => unreachable!(), 45 | } 46 | } 47 | 48 | println!("{:#?}", properties); 49 | } 50 | -------------------------------------------------------------------------------- /faster-pest-generator/src/tree_inspection.rs: -------------------------------------------------------------------------------- 1 | use crate::*; 2 | 3 | pub fn list_exprs(expr: &FPestExpr) -> Vec<&FPestExpr> { 4 | let mut exprs = Vec::new(); 5 | match expr { 6 | FPestExpr::NegPred(expr) | FPestExpr::Opt(expr) | FPestExpr::Rep(expr, _) => exprs.extend(list_exprs(expr)), 7 | FPestExpr::Seq(items) | FPestExpr::Choice(items) => items.iter().for_each(|i| exprs.extend(list_exprs(i))), 8 | FPestExpr::Ident(_) | FPestExpr::Str(_) | FPestExpr::Insens(_) | FPestExpr::CharacterCondition(_) => {}, 9 | } 10 | exprs.push(expr); 11 | exprs 12 | } 13 | 14 | pub fn contains_idents(expr: &FPestExpr, has_whitespace: bool) -> bool { 15 | match expr { 16 | FPestExpr::Ident(ident) if ident != "SOI" && ident != "EOI" && ident != "NEWLINE" => { 17 | true 18 | }, 19 | FPestExpr::NegPred(expr) | FPestExpr::Opt(expr) => contains_idents(expr, has_whitespace), 20 | FPestExpr::Seq(items) => has_whitespace || items.iter().any(|i| contains_idents(i, has_whitespace)), 21 | FPestExpr::Choice(items) => true, // TODO: items.iter().any(|i| contains_idents(i, has_whitespace)), 22 | FPestExpr::Rep(expr, _) => has_whitespace || contains_idents(expr, has_whitespace), 23 | FPestExpr::Str(_) | FPestExpr::Insens(_) | FPestExpr::CharacterCondition(_) => false, 24 | FPestExpr::Ident(_) => false, 25 | } 26 | } 27 | 28 | pub fn list_choices<'a>(expr: &'a OptimizedExpr, choices: &mut Vec<&'a OptimizedExpr>) { 29 | if let OptimizedExpr::Choice(first, second) = expr { 30 | list_choices(first, choices); 31 | list_choices(second, choices); 32 | } else { 33 | choices.push(expr); 34 | } 35 | } 36 | 37 | pub fn list_seq<'a>(expr: &'a OptimizedExpr, seq: &mut Vec<&'a OptimizedExpr>) { 38 | if let OptimizedExpr::Seq(first, second) = expr { 39 | list_seq(first, seq); 40 | list_seq(second, seq); 41 | } else { 42 | seq.push(expr); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /faster-pest/examples/json/input.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "5741cfe6bf9f447a509a269e", 4 | "index": 0, 5 | "guid": "642f0c2a-3d87-43ac-8f82-25f004e0c96a", 6 | "isActive": false, 7 | "balance": "$3,666.68", 8 | "picture": "http://placehold.it/32x32", 9 | "age": 39, 10 | "eyeColor": "blue", 11 | "name": "Leonor Herman", 12 | "gender": "female", 13 | "company": "RODEOMAD", 14 | "email": "leonorherman@rodeomad.com", 15 | "phone": "+1 (848) 456-2962", 16 | "address": "450 Seeley Street, Iberia, North Dakota, 7859", 17 | "about": "Reprehenderit in anim laboris labore sint occaecat labore proident ipsum exercitation. Ut ea aliqua duis occaecat consectetur aliqua anim id. Dolor ea fugiat excepteur reprehenderit eiusmod enim non sit nisi. Mollit consequat anim mollit et excepteur qui laborum qui eiusmod. Qui ea amet incididunt cillum quis occaecat excepteur qui duis nisi. Dolore labore eu sunt consequat magna.\r\n", 18 | "registered": "2015-03-06T02:49:06 -02:00", 19 | "latitude": -29.402032, 20 | "longitude": 151.088135, 21 | "tags": [ 22 | "Lorem", 23 | "voluptate", 24 | "aute", 25 | "ullamco", 26 | "elit", 27 | "esse", 28 | "culpa" 29 | ], 30 | "friends": [ 31 | { 32 | "id": 0, 33 | "name": "Millicent Norman" 34 | }, 35 | { 36 | "id": 1, 37 | "name": "Vincent Cannon" 38 | }, 39 | { 40 | "id": 2, 41 | "name": "Gray Berry" 42 | } 43 | ], 44 | "greeting": "Hello, Leonor Herman! You have 4 unread messages.", 45 | "favoriteFruit": "apple" 46 | }, 47 | { 48 | "_id": "5741cfe69424f42d4493caa2", 49 | "index": 1, 50 | "guid": "40ec6b43-e6e6-44e1-92a8-dc80cd5d7179", 51 | "isActive": true, 52 | "balance": "$2,923.78", 53 | "picture": "http://placehold.it/32x32", 54 | "age": 36, 55 | "eyeColor": "blue", 56 | "name": "Barton Barnes", 57 | "gender": "male", 58 | "company": "BRAINQUIL", 59 | "email": "bartonbarnes@brainquil.com", 60 | "phone": "+1 (907) 553-3739", 61 | "address": "644 Falmouth Street, Sedley, Michigan, 5602", 62 | "about": "Et nulla laboris consectetur laborum labore. Officia dolor sint do amet excepteur dolore eiusmod. Occaecat pariatur sunt velit sunt ullamco labore commodo mollit sint dolore occaecat.\r\n", 63 | "registered": "2014-08-28T01:07:22 -03:00", 64 | "latitude": 14.056553, 65 | "longitude": -61.911624, 66 | "tags": [ 67 | "laboris", 68 | "sunt", 69 | "esse", 70 | "tempor", 71 | "pariatur", 72 | "occaecat", 73 | "et" 74 | ], 75 | "friends": [ 76 | { 77 | "id": 0, 78 | "name": "Tillman Mckay" 79 | }, 80 | { 81 | "id": 1, 82 | "name": "Rivera Berg" 83 | }, 84 | { 85 | "id": 2, 86 | "name": "Rosetta Erickson" 87 | } 88 | ], 89 | "greeting": "Hello, Barton Barnes! You have 2 unread messages.", 90 | "favoriteFruit": "banana" 91 | } 92 | ] 93 | -------------------------------------------------------------------------------- /faster-pest/src/error.rs: -------------------------------------------------------------------------------- 1 | const RED: &str = "\x1b[31;1m"; 2 | const NORMAL: &str = "\x1b[0m"; 3 | const BLUE: &str = "\x1b[34;1m"; 4 | const BOLD: &str = "\x1b[1m"; 5 | 6 | #[derive(Debug)] 7 | pub enum ErrorKind { 8 | ExpectedValue(&'static str), 9 | Expected(&'static str), 10 | NegPredFailed(&'static str), 11 | All(Vec) 12 | } 13 | 14 | impl std::fmt::Display for ErrorKind { 15 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 16 | match self { 17 | ErrorKind::ExpectedValue(expected) => write!(f, "Expected value: {expected}"), 18 | ErrorKind::Expected(expected) => write!(f, "Expected: {expected}"), 19 | ErrorKind::NegPredFailed(not_expected) => write!(f, "Negated predicate failed: {not_expected}"), 20 | ErrorKind::All(errors) => write!(f, "All {} accepted patterns fail to match", errors.len()), 21 | } 22 | } 23 | } 24 | 25 | #[derive(Debug)] 26 | pub struct Error { 27 | kind: ErrorKind, 28 | remaining_bytes: usize, 29 | trace: Vec, 30 | note: Option, 31 | } 32 | 33 | impl Error { 34 | pub fn new(kind: ErrorKind, input: &str, root: impl Into) -> Error { 35 | Error { 36 | kind, 37 | remaining_bytes: input.len(), 38 | trace: vec![root.into()], 39 | note: None, 40 | } 41 | } 42 | 43 | pub fn with_trace(mut self, trace: impl Into) -> Self { 44 | self.trace.push(trace.into()); 45 | self 46 | } 47 | 48 | pub fn with_note(mut self, note: impl Into) -> Self { 49 | if self.note.is_none() { 50 | self.note = Some(note.into()); 51 | } 52 | self 53 | } 54 | 55 | pub fn print(&self, input: &str) { 56 | if self.remaining_bytes > input.len() { 57 | panic!("Error::print: remaining_bytes is greater than input length"); 58 | } 59 | let position = input.len() - self.remaining_bytes; 60 | 61 | let line_start = input[..position].rfind('\n').map(|i| i + 1).unwrap_or(0); 62 | let line_end = input[position..].find('\n').map(|i| i + position).unwrap_or(input.len()); 63 | let line_number = input[..position].matches('\n').count() + 1; 64 | let position_in_utf8_line = input[line_start..position].chars().count(); 65 | 66 | println!("{RED}error{NORMAL}: {}", self.kind); 67 | println!("{BLUE} -->{NORMAL} {}:{}:{}", self.trace[0], line_number, position - line_start + 1); 68 | println!("{BLUE} |{NORMAL}"); 69 | println!("{BLUE}{:<3}|{NORMAL} {}", line_number, &input[line_start..line_end]); 70 | println!("{BLUE} |{NORMAL} {}{RED}^{NORMAL}", " ".repeat(position_in_utf8_line)); 71 | if let Some(note) = &self.note { 72 | println!(" {BLUE}= {NORMAL}{BOLD}note{NORMAL}: {note}"); 73 | } 74 | println!(" {BLUE}= {NORMAL}{BOLD}trace{NORMAL}: {}", self.trace.join(", ")); 75 | } 76 | 77 | pub fn into_pest(self, input: &str) -> pest::error::Error { 78 | pest::error::Error::new_from_pos( 79 | pest::error::ErrorVariant::CustomError { 80 | message: format!("{}", self.kind), 81 | }, 82 | pest::Position::new(input, input.len() - self.remaining_bytes).expect("Error::into_pest: invalid position"), 83 | ) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /faster-pest/benches/csv.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | mod pest_classic { 6 | use std::hint::black_box; 7 | 8 | use pest_derive::Parser; 9 | use pest::Parser; 10 | use test::Bencher; 11 | 12 | #[derive(Parser)] 13 | #[grammar = "../faster-pest/examples/csv/grammar.pest"] 14 | pub struct CSVParser { 15 | 16 | } 17 | 18 | #[bench] 19 | fn csv(b: &mut Bencher) { 20 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/csv/input.csv") { 21 | Ok(s) => s, 22 | Err(_) => match std::fs::read_to_string("examples/csv/input.csv") { 23 | Ok(s) => s, 24 | Err(e) => panic!("cannot read file: {}", e) 25 | } 26 | }; 27 | 28 | b.iter(|| black_box({ 29 | let file = CSVParser::parse(Rule::file, &unparsed_file) 30 | .expect("unsuccessful parse") 31 | .next() 32 | .expect("couldn't find file rule"); 33 | 34 | let mut field_sum: f64 = 0.0; 35 | let mut record_count: u64 = 0; 36 | 37 | for record in file.into_inner() { 38 | match record.as_rule() { 39 | Rule::record => { 40 | record_count += 1; 41 | 42 | for field in record.into_inner() { 43 | field_sum += field.as_str().parse::().expect("field should be a number") 44 | } 45 | } 46 | // TODO Rule::EOI => (), 47 | o => println!("Unexpected {o:?}") 48 | } 49 | } 50 | 51 | (field_sum, record_count) 52 | })); 53 | } 54 | } 55 | 56 | mod faster_pest { 57 | use std::hint::black_box; 58 | use faster_pest::*; 59 | use test::Bencher; 60 | 61 | #[derive(Parser)] 62 | #[grammar = "faster-pest/examples/csv/grammar.pest"] 63 | pub struct CSVParser { 64 | 65 | } 66 | 67 | #[bench] 68 | fn csv(b: &mut Bencher) { 69 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/csv/input.csv") { 70 | Ok(s) => s, 71 | Err(_) => match std::fs::read_to_string("examples/csv/input.csv") { 72 | Ok(s) => s, 73 | Err(e) => panic!("cannot read file: {}", e) 74 | } 75 | }; 76 | 77 | b.iter(|| black_box({ 78 | let file = CSVParser::parse_file(&unparsed_file).expect("unsuccessful parse"); 79 | let file = file.into_iter().next().expect("couldn't find file rule"); 80 | 81 | let mut field_sum: f64 = 0.0; 82 | let mut record_count: u64 = 0; 83 | 84 | for record in file.children() { 85 | match record.as_rule() { 86 | Rule::record => { 87 | record_count += 1; 88 | 89 | for field in record.children() { 90 | field_sum += field.as_str().parse::().expect("field should be a number") 91 | } 92 | } 93 | // TODO Rule::EOI => (), 94 | o => println!("Unexpected {o:?}") 95 | } 96 | } 97 | 98 | (field_sum, record_count) 99 | })); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /faster-pest/examples/json/main.rs: -------------------------------------------------------------------------------- 1 | use faster_pest::*; 2 | use std::{collections::HashMap, borrow::Cow}; 3 | 4 | #[derive(Parser)] 5 | #[grammar = "faster-pest/examples/json/grammar.pest"] 6 | pub struct JsonParser; 7 | 8 | #[derive(Debug)] 9 | enum Value<'i> { 10 | String(Cow<'i, str>), 11 | Number(f64), 12 | Boolean(bool), 13 | Array(Vec>), 14 | Object(HashMap, Value<'i>>), 15 | Null, 16 | } 17 | 18 | impl<'i> Value<'i> { 19 | fn from_ident_ref(value: IdentRef<'i, Ident>) -> Self { 20 | match value.as_rule() { 21 | Rule::string => Value::String(unescape(value)), 22 | Rule::number => Value::Number(value.as_str().parse().expect("number")), 23 | Rule::boolean => Value::Boolean(value.as_str() == "true"), 24 | Rule::array => { 25 | let mut array = Vec::new(); 26 | array.extend(value.children().map(Value::from_ident_ref)); 27 | Value::Array(array) 28 | } 29 | Rule::object => { 30 | let mut object = HashMap::new(); 31 | for property in value.children() { 32 | let mut property_children = property.children(); 33 | let name = property_children.next().expect("name"); 34 | let name = unescape(name); 35 | let value = property_children.next().expect("value"); 36 | object.insert(name, Value::from_ident_ref(value)); 37 | } 38 | Value::Object(object) 39 | } 40 | Rule::null => Value::Null, 41 | Rule::property | Rule::file | Rule::escaped_char => unreachable!(), 42 | } 43 | } 44 | } 45 | 46 | fn unescape<'i>(s: IdentRef<'i, Ident>) -> Cow<'i, str> { 47 | let children_count = s.children_count(); 48 | if children_count == 0 { 49 | return Cow::Borrowed(s.as_str()); 50 | } 51 | let mut unescaped = String::with_capacity(s.as_str().len() - children_count); 52 | let mut i = 0; 53 | let start_addr = s.as_str().as_ptr() as usize; 54 | for escaped_char in s.children() { 55 | let end = escaped_char.as_str().as_ptr() as usize - start_addr; 56 | unescaped.push_str(unsafe { s.as_str().get_unchecked(i..end) }); 57 | match unsafe { escaped_char.as_str().as_bytes().get_unchecked(1) } { 58 | b'"' => unescaped.push('"'), 59 | b'\\' => unescaped.push('\\'), 60 | b'/' => unescaped.push('/'), 61 | b'b' => unescaped.push('\x08'), 62 | b'f' => unescaped.push('\x0c'), 63 | b'n' => unescaped.push('\n'), 64 | b'r' => unescaped.push('\r'), 65 | b't' => unescaped.push('\t'), 66 | b'u' => { 67 | // Warning when you implement this, you might want to increase the capacity of the string set above 68 | unimplemented!() 69 | } 70 | _ => unreachable!() 71 | } 72 | i = end + 2; 73 | } 74 | unescaped.push_str(unsafe { s.as_str().get_unchecked(i..) }); 75 | Cow::Owned(unescaped) 76 | } 77 | 78 | fn main() { 79 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/json/input.json") { 80 | Ok(s) => s, 81 | Err(_) => match std::fs::read_to_string("examples/json/input.json") { 82 | Ok(s) => s, 83 | Err(e) => panic!("cannot read file: {}", e) 84 | } 85 | }; 86 | 87 | let output = JsonParser::parse_file(&unparsed_file).map_err(|e| e.print(unparsed_file.as_str())).expect("unsuccessful parse"); 88 | let file = output.into_iter().next().expect("couldn't find file rule"); 89 | let main_object = file.children().next().expect("couldn't find main object"); 90 | let output = Value::from_ident_ref(main_object); 91 | println!("{:#?}", output); 92 | } 93 | -------------------------------------------------------------------------------- /faster-pest/README.md: -------------------------------------------------------------------------------- 1 | # Faster-Pest 2 | 3 | Welcome to `faster-pest`, a **high-performance** code generator for [Parsing Expression Grammars](https://pest.rs/book/grammars/peg.html). `faster-pest` is an unofficial pro-macro providing next-level implementations of [Pest](https://pest.rs/) parsers. It uses low-level optimization tricks under the hood to generate highly optimized code which minimizes the overhead of the AST recognition process, resulting in much faster parsing. 4 | 5 | `faster-pest` is **compatible** with the standard [Pest syntax](https://pest.rs/book/grammars/syntax.html), so you can easily switch to it without having to change your existing grammar. 6 | 7 | With `faster-pest`, you can enjoy the **convenience and expressiveness** of Pest while getting the performance of a low-level parsing library. Give it a try and experience the difference for yourself! 8 | 9 | The parsing approach used under the hood has nothing in common with the original pest code. To be honest, I never looked at the pest codebase, because it was easier to start from scratch. There is still one thing that was not reimplemented: the parsing of the actual pest grammar. However, this might not last. I need to extend the grammar to enable more advanced tricks, like making it possible to define complex rules with Rust code and import them in a pest grammar. 10 | 11 | ## Benchmarks 12 | 13 | Only a week after its creation, `faster-pest` already parses Json at **705%** the speed of Pest and **137%** the speed of Nom. This places `faster-pest` on par with `serde_json`. `faster-pest` allows you to approach limits that only SIMD-powered parsers can overcome. 14 | 15 | [Benchmark yourself](https://github.com/Mubelotix/pestvsnom) 16 | 17 | ## Examples 18 | 19 | See the [example folder](https://github.com/Mubelotix/faster-pest/tree/master/faster-pest/examples) for examples. 20 | 21 | It contains two examples from the Pest book: [csv](https://github.com/Mubelotix/faster-pest/tree/master/faster-pest/examples/csv) and [ini](https://github.com/Mubelotix/faster-pest/tree/master/faster-pest/examples/ini). 22 | These use the exact same code as in the Pest book, showing that `faster-pest` is a drop-in replacement for Pest. 23 | 24 | If you don't have any legacy Pest codebase, it is recommended to not use the pest compatibility layer. See other two examples: [json](https://github.com/Mubelotix/faster-pest/tree/master/faster-pest/examples/json) and [po](https://github.com/Mubelotix/faster-pest/tree/master/faster-pest/examples/po). 25 | These are the most efficient and idiomatic uses of `faster-pest`. They work rather similarly to the pest compatibility layer, but their implementation is nicer. 26 | 27 | ## Limitations 28 | 29 | `faster-pest` is still in its early stages of development, so it has some limitations. Here are the most important ones: 30 | 31 | - Limited syntax support (Missing: stack, insens, pospred) 32 | - The tokens API of Pest is not supported (you probably didn't use that) 33 | - Error printing is made for Linux 34 | - Errors can be obscure when a repetition ends prematurely 35 | - Not everything has been tested and there could be incorrect parsing behavior 36 | 37 | ## Optimization tricks used (for curious people) 38 | 39 | - `faster-pest` generates two versions of every parsing component that exists. One version has error support, the other doesn't. There are so many places where error support is not needed because it would be discarded rightaway (like a failing branch). `faster-pest` will only retrieve errors if parsing completely fails, so any valid input will only result in calls of completely error-unaware code. From the developer point of view, this optimization is completely transparent. 40 | - Groups of rules are sometimes grouped into a single rule where pest would have split them 41 | - Repetitions of simple character rules use iterator adapters instead of loops 42 | - Every unnecessary check is bypassed 43 | - Allocations are made in bulk which makes them fairly sporadic 44 | - Code is so small it is likely to get inlined often by the compiler 45 | - Parsing itself is entirely zero-copy 46 | - Iteration over parsed identifiers is almost free 47 | 48 | Licence: GPL-3.0 49 | -------------------------------------------------------------------------------- /faster-pest/examples/lightgrep/main.rs: -------------------------------------------------------------------------------- 1 | use faster_pest::*; 2 | use std::collections::HashSet; 3 | 4 | #[derive(Parser)] 5 | #[grammar = "faster-pest/examples/lightgrep/grammar.pest"] 6 | pub struct LightgrepParser; 7 | 8 | #[derive(Debug)] 9 | pub enum Repetition { 10 | ZeroOrOne, 11 | One, 12 | OneOrMore, 13 | Any, 14 | } 15 | 16 | #[derive(Debug)] 17 | pub enum ExpressionRationnelle { 18 | Joker, 19 | Lettre(char), 20 | EnsembleLettre { lettres: HashSet, negation: bool }, 21 | WithRepetition(Box, Repetition), 22 | All(Vec), 23 | Any(Vec), 24 | } 25 | 26 | // TODO: Investigate: EOI didn't work well 27 | 28 | impl ExpressionRationnelle { 29 | pub fn from_ident_ref(value: IdentRef) -> Self { 30 | match value.as_rule() { 31 | Rule::er => { 32 | let mut children: Vec<_> = value.children().map(ExpressionRationnelle::from_ident_ref).collect(); 33 | if children.len() == 1 { 34 | return children.remove(0); 35 | } 36 | ExpressionRationnelle::Any(children) 37 | } 38 | Rule::erc => { 39 | let mut children: Vec<_> = value.children().map(ExpressionRationnelle::from_ident_ref).collect(); 40 | if children.len() == 1 { 41 | return children.remove(0); 42 | } 43 | ExpressionRationnelle::All(children) 44 | } 45 | Rule::erb => { 46 | let mut children = value.children(); 47 | let ere = children.next().expect("ere"); 48 | let ere = match ere.as_rule() { 49 | Rule::er => ExpressionRationnelle::from_ident_ref(ere), 50 | Rule::caractere | Rule::caractere_echape => ExpressionRationnelle::Lettre(ere.as_str().chars().next().expect("caractere")), 51 | Rule::joker => ExpressionRationnelle::Joker, 52 | Rule::ens_lettre => ExpressionRationnelle::from_ident_ref(ere), 53 | any => panic!("Unknown rule in ere: {:?}", any), 54 | }; 55 | if let Some(rep) = children.next() { 56 | let rep = match rep.as_str() { 57 | "?" => Repetition::ZeroOrOne, 58 | "+" => Repetition::OneOrMore, 59 | "*" => Repetition::Any, 60 | any => panic!("Unknown repetition: {:?}", any), 61 | }; 62 | ExpressionRationnelle::WithRepetition(Box::new(ere), rep) 63 | } else { 64 | ere 65 | } 66 | } 67 | Rule::ens_lettre => { 68 | let negation = value.as_str().as_bytes().get(1) == Some(&b'^'); 69 | let mut lettres = HashSet::new(); 70 | for element_ens_lettre in value.children() { 71 | debug_assert!(element_ens_lettre.as_rule() == Rule::element_ens_lettre); 72 | let mut children = element_ens_lettre.children(); 73 | let first = children.next().expect("first"); 74 | match children.next() { 75 | Some(second) => { 76 | let first = first.as_str().chars().next().expect("first"); 77 | let second = second.as_str().chars().next().expect("second"); 78 | for c in first..=second { 79 | lettres.insert(c); 80 | } 81 | } 82 | None => { 83 | let c = first.as_str().chars().next().expect("c"); 84 | lettres.insert(c); 85 | } 86 | } 87 | } 88 | ExpressionRationnelle::EnsembleLettre { lettres, negation } 89 | } 90 | any => panic!("Unknown rule: {:?}", any), 91 | } 92 | } 93 | } 94 | 95 | fn main() { 96 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/lightgrep/input.txt") { 97 | Ok(s) => s, 98 | Err(_) => match std::fs::read_to_string("examples/lightgrep/input.txt") { 99 | Ok(s) => s, 100 | Err(e) => panic!("cannot read file: {}", e) 101 | } 102 | }; 103 | 104 | let output = LightgrepParser::parse_file(&unparsed_file).map_err(|e| e.print(unparsed_file.as_str())).expect("unsuccessful parse"); 105 | let file = output.into_iter().next().expect("couldn't find file rule"); 106 | let main_object = file.children().next().expect("couldn't find main object"); 107 | println!("{:#?}", main_object); 108 | 109 | let output = ExpressionRationnelle::from_ident_ref(main_object); 110 | println!("{:#?}", output); 111 | } 112 | -------------------------------------------------------------------------------- /faster-pest-generator/src/expr_codegen.rs: -------------------------------------------------------------------------------- 1 | use crate::{*, optimizer::FPestExpr}; 2 | 3 | fn to_pest(expr: &FPestExpr) -> String { 4 | match expr { 5 | FPestExpr::Str(s) => format!("{s:?}"), 6 | FPestExpr::CharacterCondition(c) => format!("({c})"), 7 | FPestExpr::Insens(s) => format!("^{s:?}"), 8 | FPestExpr::Ident(i) => i.to_owned(), 9 | FPestExpr::NegPred(e) => format!("!{}", to_pest(e)), 10 | FPestExpr::Seq(exprs) => format!("({})", exprs.iter().map(to_pest).collect::>().join(" ~ ")), 11 | FPestExpr::Choice(exprs) => format!("({})", exprs.iter().map(to_pest).collect::>().join(" | ")), 12 | FPestExpr::Opt(e) => format!("{}?", to_pest(e)), 13 | FPestExpr::Rep(e, true) => format!("{}*", to_pest(e)), 14 | FPestExpr::Rep(e, false) => format!("{}+", to_pest(e)), 15 | } 16 | } 17 | 18 | pub fn code(expr: &FPestExpr, ids: &mut IdRegistry, has_whitespace: bool) -> String { 19 | let id = ids.id(expr); 20 | let mut code = match expr { 21 | FPestExpr::Ident(ident) => G::ident(ident), 22 | FPestExpr::CharacterCondition(condition) => { 23 | G::pattern_expr_character().replace("character_condition", condition) 24 | } 25 | FPestExpr::Choice(items) => { 26 | let mut code = G::pattern_expr_choice().to_owned(); 27 | code = multi_replace(code, vec![ 28 | ("choice_item_id", items.iter().map(|item| ids.id(item)).collect::>()), 29 | ("choice_idents", items.iter().map(|item| { 30 | match contains_idents(item, has_whitespace) { 31 | true => "idents", 32 | false => "", 33 | }.to_string() 34 | }).collect::>()), 35 | ]); 36 | code 37 | } 38 | FPestExpr::Str(value) => { 39 | G::pattern_expr_str().replace("expr_str", format!("{value:?}").as_str()) 40 | } 41 | FPestExpr::Seq(items) => { 42 | let mut code = G::pattern_expr_seq().to_owned(); 43 | code = multi_replace(code, vec![ 44 | ("seq_item_id", items.iter().map(|item| ids.id(item)).collect::>()), 45 | ("seq_idents", items.iter().map(|item| { 46 | match contains_idents(item, has_whitespace) { 47 | true => "idents", 48 | false => "", 49 | }.to_string() 50 | }).collect::>()), 51 | ("seq_n", (0..items.len()).map(|i| i.to_string()).collect::>()), 52 | ]); 53 | code 54 | } 55 | FPestExpr::Rep(expr, empty_accepted) => { 56 | if let FPestExpr::CharacterCondition(condition) = &**expr { 57 | let mut code = G::pattern_expr_rep_character().replace("character_condition", condition); 58 | if !empty_accepted { 59 | code = code.replace("//NON-EMPTY", ""); 60 | } 61 | code 62 | } else { 63 | let mut code = G::pattern_expr_rep().to_owned(); 64 | code = code.replace("inner_eid", &ids.id(expr)); 65 | code = code.replace("inner_idents", match contains_idents(expr, has_whitespace) { 66 | true => "idents", 67 | false => "", 68 | }); 69 | 70 | if !empty_accepted { 71 | code = code.replace("//NON-EMPTY", ""); 72 | } 73 | code 74 | } 75 | } 76 | FPestExpr::Opt(expr) => { 77 | let code = G::pattern_expr_opt().to_owned(); 78 | let code = code.replace("inner_eid", &ids.id(expr)); 79 | code 80 | } 81 | FPestExpr::NegPred(expr) => { 82 | let code = G::pattern_expr_neg().to_owned(); 83 | let code = code.replace("inner_id", &ids.id(expr)); 84 | code 85 | } 86 | FPestExpr::Insens(value) => { 87 | let inverted_value = value.chars().map(|c| { 88 | if c.is_ascii_uppercase() { 89 | c.to_ascii_lowercase() 90 | } else { 91 | c.to_ascii_uppercase() 92 | } 93 | }).collect::(); 94 | 95 | let code = G::pattern_expr_insens().to_owned(); 96 | let code = code.replace("expr_str", format!("{value:?}").as_str()); 97 | let code = code.replace("expr_inv_str", format!("{inverted_value:?}").as_str()); 98 | let code = code.replace("expr_len_str", &value.len().to_string()); 99 | code 100 | } 101 | }; 102 | 103 | code = code.replace("expr_id", &id); 104 | code = code.replace("expr_pest", &to_pest(expr)); 105 | if contains_idents(expr, has_whitespace) { 106 | code = code.replace("//SIG-IDENTS", ""); 107 | } 108 | if has_whitespace { 109 | code = code.replace("//WSP", ""); 110 | } 111 | code 112 | } 113 | -------------------------------------------------------------------------------- /faster-pest-derive/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "block-buffer" 7 | version = "0.10.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" 10 | dependencies = [ 11 | "generic-array", 12 | ] 13 | 14 | [[package]] 15 | name = "cfg-if" 16 | version = "1.0.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 19 | 20 | [[package]] 21 | name = "cpufeatures" 22 | version = "0.2.5" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" 25 | dependencies = [ 26 | "libc", 27 | ] 28 | 29 | [[package]] 30 | name = "crypto-common" 31 | version = "0.1.6" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 34 | dependencies = [ 35 | "generic-array", 36 | "typenum", 37 | ] 38 | 39 | [[package]] 40 | name = "digest" 41 | version = "0.10.6" 42 | source = "registry+https://github.com/rust-lang/crates.io-index" 43 | checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" 44 | dependencies = [ 45 | "block-buffer", 46 | "crypto-common", 47 | ] 48 | 49 | [[package]] 50 | name = "generic-array" 51 | version = "0.14.6" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" 54 | dependencies = [ 55 | "typenum", 56 | "version_check", 57 | ] 58 | 59 | [[package]] 60 | name = "libc" 61 | version = "0.2.139" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" 64 | 65 | [[package]] 66 | name = "once_cell" 67 | version = "1.17.0" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" 70 | 71 | [[package]] 72 | name = "pest" 73 | version = "2.5.2" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "0f6e86fb9e7026527a0d46bc308b841d73170ef8f443e1807f6ef88526a816d4" 76 | dependencies = [ 77 | "thiserror", 78 | "ucd-trie", 79 | ] 80 | 81 | [[package]] 82 | name = "pest-based-parser" 83 | version = "0.1.0" 84 | dependencies = [ 85 | "pest_meta", 86 | ] 87 | 88 | [[package]] 89 | name = "pest_meta" 90 | version = "2.5.2" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "984298b75898e30a843e278a9f2452c31e349a073a0ce6fd950a12a74464e065" 93 | dependencies = [ 94 | "once_cell", 95 | "pest", 96 | "sha1", 97 | ] 98 | 99 | [[package]] 100 | name = "proc-macro2" 101 | version = "1.0.49" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" 104 | dependencies = [ 105 | "unicode-ident", 106 | ] 107 | 108 | [[package]] 109 | name = "quote" 110 | version = "1.0.23" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" 113 | dependencies = [ 114 | "proc-macro2", 115 | ] 116 | 117 | [[package]] 118 | name = "sha1" 119 | version = "0.10.5" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" 122 | dependencies = [ 123 | "cfg-if", 124 | "cpufeatures", 125 | "digest", 126 | ] 127 | 128 | [[package]] 129 | name = "syn" 130 | version = "1.0.107" 131 | source = "registry+https://github.com/rust-lang/crates.io-index" 132 | checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" 133 | dependencies = [ 134 | "proc-macro2", 135 | "quote", 136 | "unicode-ident", 137 | ] 138 | 139 | [[package]] 140 | name = "thiserror" 141 | version = "1.0.38" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" 144 | dependencies = [ 145 | "thiserror-impl", 146 | ] 147 | 148 | [[package]] 149 | name = "thiserror-impl" 150 | version = "1.0.38" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" 153 | dependencies = [ 154 | "proc-macro2", 155 | "quote", 156 | "syn", 157 | ] 158 | 159 | [[package]] 160 | name = "typenum" 161 | version = "1.16.0" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" 164 | 165 | [[package]] 166 | name = "ucd-trie" 167 | version = "0.1.5" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" 170 | 171 | [[package]] 172 | name = "unicode-ident" 173 | version = "1.0.6" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" 176 | 177 | [[package]] 178 | name = "version_check" 179 | version = "0.9.4" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 182 | -------------------------------------------------------------------------------- /faster-pest/src/pairs.rs: -------------------------------------------------------------------------------- 1 | use std::rc::Rc; 2 | use crate::*; 3 | use pest::*; 4 | 5 | /// A [`Pair2`] is a reference to a `Ident` and its children. It mimics pest's [`Pair`](pest::iterators::Pair). 6 | /// It is created by [`Pairs2::next`]. 7 | #[derive(Clone)] 8 | pub struct Pair2<'i, I: IdentTrait> { 9 | /// The original input that was parsed. 10 | original_input: &'i str, 11 | /// A reference to the output of the parsing. 12 | all_idents: Rc>, 13 | /// The range indicates where the [`Pair2`] is stored in `all_idents`. 14 | /// `all_idents[range.start]` is the ident of the [`Pair2`], and `all_idents[range.start + 1..range.end]` are the children. 15 | range: std::ops::Range, 16 | } 17 | 18 | impl<'i, I: IdentTrait> Pair2<'i, I> { 19 | pub fn ident(&self) -> &I { 20 | // This is safe if the data is valid. 21 | // The data is valid because it originally comes from `Pairs2::from_idents`, which is only called with valid data. 22 | unsafe { 23 | &self.all_idents.get_unchecked(self.range.start).0 24 | } 25 | } 26 | 27 | pub fn as_rule(&self) -> I::Rule { 28 | self.ident().as_rule() 29 | } 30 | 31 | pub fn as_str(&self) -> &'i str { 32 | // This is safe if the data is valid. 33 | // The data is valid because it originally comes from `Pairs2::from_idents`, which is only called with valid data. 34 | unsafe { 35 | let str_start = self.ident().as_str().as_ptr() as usize - self.original_input.as_ptr() as usize; 36 | let str_end = str_start + self.ident().as_str().len(); 37 | self.original_input.get_unchecked(str_start..str_end) 38 | } 39 | } 40 | 41 | #[deprecated = "Please use as_span instead"] 42 | pub fn into_span(self) -> Span<'i> { 43 | self.as_span() 44 | } 45 | 46 | pub fn as_span(&self) -> Span<'i> { 47 | let start = self.as_str().as_ptr() as usize - self.original_input.as_ptr() as usize; 48 | let end = start + self.as_str().len(); 49 | Span::new(self.original_input, start, end).expect("Pair2::as_span: invalid span") 50 | } 51 | 52 | pub fn inner(&self) -> Pairs2<'i, I> { 53 | Pairs2 { 54 | all_idents: Rc::clone(&self.all_idents), 55 | range: self.range.start + 1..self.range.end, 56 | initial_text: self.original_input, 57 | i: 0, 58 | } 59 | } 60 | 61 | pub fn into_inner(self) -> Pairs2<'i, I> { 62 | Pairs2 { 63 | all_idents: self.all_idents, 64 | range: self.range.start + 1..self.range.end, 65 | initial_text: self.original_input, 66 | i: 0, 67 | } 68 | } 69 | } 70 | 71 | impl<'i, I: IdentTrait> std::fmt::Debug for Pair2<'i, I> { 72 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 73 | f.debug_struct(&format!("{:?}", self.as_rule())) 74 | .field("text", &self.as_str()) 75 | //.field("range", &self.range) 76 | .field("inner", &self.inner()) 77 | .finish() 78 | } 79 | } 80 | 81 | 82 | /// A [`Pairs2`] is an iterator over [`Pair2`]s. It mimics pest's [`Pairs`](pest::iterators::Pairs). 83 | /// It is created by [`Parser::parse`]. 84 | /// 85 | /// Iterating over it will only yield top-level children. 86 | /// To iterate over all [`Pair2`]s, use [`Pair2::into_inner`] on yielded [`Pair2`]s. 87 | #[derive(Clone)] 88 | pub struct Pairs2<'i, I: IdentTrait> { 89 | all_idents: Rc>, 90 | range: std::ops::Range, 91 | initial_text: &'i str, 92 | i: usize, 93 | } 94 | 95 | impl<'i, I: IdentTrait> Pairs2<'i, I> { 96 | /// This is used by the generated parser to convert its output to a [`Pairs2`]. 97 | /// **You should not ever need to use this.** 98 | /// 99 | /// # Safety 100 | /// 101 | /// The whole [Pairs2] and [Pair2] implementation assumes that the arguments of this function are valid. 102 | /// When this method is called by generated code, the input is guaranteed to be valid. 103 | pub unsafe fn from_idents(idents: Vec<(I, usize)>, initial_text: &'i str) -> Self { 104 | Self { 105 | range: 0..idents.len(), 106 | all_idents: Rc::new(idents), 107 | initial_text, 108 | i: 0, 109 | } 110 | } 111 | } 112 | 113 | impl<'i, I: IdentTrait + 'i> Iterator for Pairs2<'i, I> { 114 | type Item = Pair2<'i, I>; 115 | 116 | fn next(&mut self) -> Option { 117 | if self.i >= self.range.len() { 118 | return None; 119 | } 120 | let start = self.i + self.range.start; 121 | let end = unsafe { 122 | // This is safe if the data is valid. 123 | // The data is valid because it originally comes from `Pairs2::from_idents`, which is only called with valid data. 124 | self.all_idents.get_unchecked(start).1 125 | }; 126 | self.i = end - self.range.start; 127 | 128 | Some(Pair2 { 129 | all_idents: Rc::clone(&self.all_idents), 130 | original_input: self.initial_text, 131 | range: start..end, 132 | }) 133 | } 134 | } 135 | 136 | impl<'i, I: IdentTrait> std::fmt::Debug for Pairs2<'i, I> { 137 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 138 | f.debug_list().entries({ 139 | let mut clone = self.clone(); 140 | clone.i = 0; 141 | clone 142 | }).finish() 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /faster-pest-derive/src/lib.rs: -------------------------------------------------------------------------------- 1 | use faster_pest_generator::Generator; 2 | extern crate proc_macro; 3 | use proc_macro::TokenStream; 4 | 5 | use syn::*; 6 | use proc_macro2::TokenTree; 7 | 8 | fn list_grammar_files(attrs: &[Attribute]) -> Vec { 9 | attrs.iter().filter(|attr| attr.path.is_ident("grammar")).map(|a| { 10 | let mut tokens = a.tokens.clone().into_iter(); 11 | match tokens.next() { 12 | Some(TokenTree::Punct(punct)) if punct.as_char() == '=' => (), 13 | _ => panic!("Expected leading '=' in grammar attribute"), 14 | } 15 | let path = match tokens.next() { 16 | Some(TokenTree::Literal(value)) => value.to_string(), 17 | _ => panic!("Expected literal in grammar attribute") 18 | }; 19 | path.trim_matches('"').to_string() 20 | }).collect() 21 | } 22 | 23 | struct RustGenerator { 24 | 25 | } 26 | 27 | impl Generator for RustGenerator { 28 | fn ident(ident: &str) -> String { 29 | String::from(match ident { 30 | "EOI" => { 31 | r#" 32 | pub fn parse_expr_id<'i>(input: &'i [u8]) -> Result<&'i [u8], Error> {{ 33 | if input.is_empty() {{ 34 | Ok(input) 35 | }} else {{ 36 | Err(Error::new(ErrorKind::Expected("EOI"), unsafe{{std::str::from_utf8_unchecked(input)}}, "EOI")) 37 | }} 38 | }} 39 | pub fn quick_parse_expr_id<'i>(input: &'i [u8]) -> Option<&'i [u8]> {{ 40 | if input.is_empty() {{ 41 | Some(input) 42 | }} else {{ 43 | None 44 | }} 45 | }} 46 | "# 47 | }, 48 | "SOI" => { 49 | r#" // TODO 50 | pub fn parse_expr_id<'i>(input: &'i [u8]) -> Result<&'i [u8], Error> {{ 51 | Ok(input) 52 | }} 53 | pub fn quick_parse_expr_id<'i>(input: &'i [u8]) -> Option<&'i [u8]> {{ 54 | Some(input) 55 | }} 56 | "# 57 | } 58 | "NEWLINE" => { 59 | r#" 60 | pub fn parse_expr_id<'i>(input: &'i [u8]) -> Result<&'i [u8], Error> {{ 61 | if input.starts_with(b"\r\n") {{ 62 | Ok(unsafe {{ input.get_unchecked(2..) }}) 63 | }} else if input.starts_with(b"\n") || input.starts_with(b"\r") {{ 64 | Ok(unsafe {{ input.get_unchecked(1..) }}) 65 | }} else {{ 66 | Err(Error::new(ErrorKind::Expected("newline"), unsafe{{std::str::from_utf8_unchecked(input)}}, "NEWLINE")) 67 | }} 68 | }} 69 | pub fn quick_parse_expr_id<'i>(input: &'i [u8]) -> Option<&'i [u8]> {{ 70 | if input.starts_with(b"\r\n") {{ 71 | Some(unsafe {{ input.get_unchecked(2..) }}) 72 | }} else if input.starts_with(b"\n") || input.starts_with(b"\r") {{ 73 | Some(unsafe {{ input.get_unchecked(1..) }}) 74 | }} else {{ 75 | None 76 | }} 77 | }} 78 | "# 79 | } 80 | _ => "" 81 | }) 82 | 83 | } 84 | 85 | fn character_ident(ident: &str) -> Option<&'static str> { 86 | match ident { 87 | "ASCII_DIGIT" => Some("c.is_ascii_digit()"), 88 | "ASCII_NONZERO_DIGIT" => Some("(c.is_ascii_digit() && c != '0')"), 89 | "ASCII_ALPHA_LOWER" => Some("c.is_ascii_lowercase()"), 90 | "ASCII_ALPHA_UPPER" => Some("c.is_ascii_uppercase()"), 91 | "ASCII_ALPHA" => Some("c.is_ascii_alphabetic()"), 92 | "ASCII_ALPHANUMERIC" => Some("c.is_ascii_alphanumeric()"), 93 | "ASCII" => Some("c.is_ascii()"), 94 | "ANY" => Some("true"), 95 | _ => None 96 | } 97 | } 98 | 99 | fn character(c: u8) -> String { 100 | format!("(c == &{c})") 101 | } 102 | 103 | fn character_range(c1: u8, c2: u8) -> String { 104 | format!("(c >= &{c1} && c <= &{c2})") 105 | } 106 | 107 | fn pattern_expr_character() -> &'static str { 108 | include_str!("pattern_expr_character.rs") 109 | } 110 | 111 | fn pattern_expr_choice() -> &'static str { 112 | include_str!("pattern_expr_choice.rs") 113 | } 114 | 115 | fn pattern_expr_insens() -> &'static str { 116 | include_str!("pattern_expr_insens.rs") 117 | } 118 | 119 | fn pattern_expr_neg() -> &'static str { 120 | include_str!("pattern_expr_neg.rs") 121 | } 122 | 123 | fn pattern_expr_opt() -> &'static str { 124 | include_str!("pattern_expr_opt.rs") 125 | } 126 | 127 | fn pattern_expr_rep_character() -> &'static str { 128 | include_str!("pattern_expr_rep_character.rs") 129 | } 130 | 131 | fn pattern_expr_rep() -> &'static str { 132 | include_str!("pattern_expr_rep.rs") 133 | } 134 | 135 | fn pattern_expr_seq() -> &'static str { 136 | include_str!("pattern_expr_seq.rs") 137 | } 138 | 139 | fn pattern_expr_str() -> &'static str { 140 | include_str!("pattern_expr_str.rs") 141 | } 142 | 143 | fn pattern_outer() -> &'static str { 144 | include_str!("pattern_outer.rs") 145 | } 146 | 147 | fn pattern_rule_method() -> &'static str { 148 | include_str!("pattern_rule_method.rs") 149 | } 150 | 151 | fn pattern_rule_silent() -> &'static str { 152 | include_str!("pattern_rule_silent.rs") 153 | } 154 | 155 | fn pattern_rule() -> &'static str { 156 | include_str!("pattern_rule.rs") 157 | } 158 | } 159 | 160 | #[proc_macro_derive(Parser, attributes(grammar))] 161 | pub fn derive_parser(input: TokenStream) -> TokenStream { 162 | let ast = parse_macro_input!(input as DeriveInput); 163 | let struct_ident = ast.ident; 164 | 165 | let grammar_files = list_grammar_files(&ast.attrs); 166 | 167 | let code = faster_pest_generator::gen::(struct_ident.to_string(), grammar_files); 168 | 169 | let r = std::fs::write("target/fp_code.rs", &code); 170 | if let Err(e) = r { 171 | eprintln!("Unable to write code to target/fp_code.rs: {e}"); 172 | } 173 | 174 | code.parse().expect("Unable to parse code") 175 | } 176 | -------------------------------------------------------------------------------- /faster-pest/src/ident.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | pub trait IdentTrait: Copy { 4 | type Rule: pest::RuleType; 5 | 6 | fn as_rule(&self) -> Self::Rule; 7 | fn as_str(&self) -> &str; 8 | } 9 | 10 | #[derive(Clone)] 11 | pub struct IdentList { 12 | all_idents: Vec<(I, usize)>, 13 | } 14 | 15 | impl IdentList { 16 | /// This is used by the generated parser to convert its output to an IdentList. 17 | /// **You should not ever need to use this.** 18 | /// 19 | /// # Safety 20 | /// 21 | /// The whole implementation assumes that the arguments of this function are valid. 22 | /// When this method is called by generated code, the input is guaranteed to be valid. 23 | pub unsafe fn from_idents(idents: Vec<(I, usize)>) -> Self { 24 | Self { 25 | all_idents: idents 26 | } 27 | } 28 | 29 | /// # Safety 30 | /// 31 | /// The caller must ensure that `idx` is a valid index. 32 | pub unsafe fn get_unchecked(&self, idx: usize) -> IdentRef<'_, I> { 33 | IdentRef { 34 | ident_list: self, 35 | range: idx..self.all_idents.get_unchecked(idx).1, 36 | } 37 | } 38 | 39 | pub fn root(&self) -> IdentRef { 40 | IdentRef { 41 | ident_list: self, 42 | range: 0..self.all_idents.len(), 43 | } 44 | } 45 | } 46 | 47 | impl<'i, I: IdentTrait> IntoIterator for &'i IdentList { 48 | type Item = IdentRef<'i, I>; 49 | type IntoIter = IdentIter<'i, I>; 50 | 51 | fn into_iter(self) -> Self::IntoIter { 52 | IdentIter { 53 | ident_list: self, 54 | range: 0..self.all_idents.len(), 55 | i: 0, 56 | } 57 | } 58 | } 59 | 60 | impl std::fmt::Debug for IdentList { 61 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 62 | f.debug_list() 63 | .entries(self.all_idents.iter().map(|(ident, _)| ident.as_str())) 64 | .finish() 65 | } 66 | } 67 | 68 | 69 | #[derive(Clone)] 70 | pub struct IdentRef<'i, I: IdentTrait> { 71 | ident_list: &'i IdentList, 72 | range: std::ops::Range, 73 | } 74 | 75 | impl<'i, I: IdentTrait> IdentRef<'i, I> { 76 | pub fn ident(&self) -> &'i I { 77 | // This is safe if the data is valid. 78 | // The data is valid because it originally comes from `Pairs2::from_idents`, which is only called with valid data. 79 | unsafe { 80 | &self.ident_list.all_idents.get_unchecked(self.range.start).0 81 | } 82 | } 83 | 84 | pub fn idx(&self) -> usize { 85 | self.range.start 86 | } 87 | 88 | pub fn as_str(&self) -> &'i str { 89 | self.ident().as_str() 90 | } 91 | 92 | pub fn as_rule(&self) -> I::Rule { 93 | self.ident().as_rule() 94 | } 95 | 96 | pub fn children_count(&self) -> usize { 97 | self.range.end - self.range.start - 1 98 | } 99 | 100 | pub fn children(&self) -> IdentIter<'i, I> { 101 | IdentIter { 102 | ident_list: self.ident_list, 103 | range: self.range.start + 1..self.range.end, 104 | i: 0, 105 | } 106 | } 107 | 108 | #[deprecated = "Use `children` instead"] 109 | pub fn inner(&self) -> IdentIter<'i, I> { 110 | self.children() 111 | } 112 | 113 | #[deprecated = "Use `children` instead"] 114 | pub fn into_inner(self) -> IdentIter<'i, I> { 115 | self.children() 116 | } 117 | } 118 | 119 | impl<'i, I: IdentTrait> AsRef for IdentRef<'i, I> { 120 | fn as_ref(&self) -> &'i str { 121 | self.ident().as_str() 122 | } 123 | } 124 | 125 | impl<'i, I: IdentTrait> AsRef for IdentRef<'i, I> { 126 | fn as_ref(&self) -> &'i I { 127 | self.ident() 128 | } 129 | } 130 | 131 | impl<'i, I: IdentTrait> std::fmt::Debug for IdentRef<'i, I> { 132 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 133 | if self.range.len() > 1 { 134 | f.debug_struct(&format!("{:?}", self.as_rule())) 135 | .field("text", &self.as_str()) 136 | .field("children", &self.children()) 137 | .finish() 138 | } else { 139 | f.debug_struct(&format!("{:?}", self.as_rule())) 140 | .field("text", &self.as_str()) 141 | .finish() 142 | } 143 | } 144 | } 145 | 146 | 147 | #[derive(Clone)] 148 | pub struct IdentIter<'i, I: IdentTrait> { 149 | ident_list: &'i IdentList, 150 | range: std::ops::Range, 151 | i: usize, 152 | } 153 | 154 | impl<'i, I: IdentTrait> IdentIter<'i, I> { 155 | pub fn join_all(&self) -> Cow<'i, str> { 156 | // Try joining a reference first 157 | 'try_ref: { 158 | if self.range.is_empty() { 159 | return Cow::Borrowed(""); 160 | } 161 | let first_str = unsafe { self.ident_list.get_unchecked(self.range.start).as_str() }; 162 | let mut end = unsafe { first_str.as_ptr().add(first_str.len()) }; 163 | let others = IdentIter { 164 | ident_list: self.ident_list, 165 | range: self.range.clone(), 166 | i: 1, 167 | }; 168 | for next in others { 169 | let next_str = next.as_str(); 170 | if end == next_str.as_ptr() { 171 | end = unsafe { next_str.as_ptr().add(next_str.len()) }; 172 | } else { 173 | break 'try_ref 174 | } 175 | } 176 | return Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(first_str.as_ptr(), end as usize - first_str.as_ptr() as usize)) }); 177 | } 178 | 179 | // Fallback to a string 180 | let mut result = String::new(); 181 | let all = IdentIter { 182 | ident_list: self.ident_list, 183 | range: self.range.clone(), 184 | i: 0, 185 | }; 186 | for next in all { 187 | result.push_str(next.as_str()); 188 | } 189 | result.into() 190 | } 191 | } 192 | 193 | impl<'i, I: IdentTrait> Iterator for IdentIter<'i, I> { 194 | type Item = IdentRef<'i, I>; 195 | 196 | fn next(&mut self) -> Option { 197 | if self.i >= self.range.len() { 198 | return None; 199 | } 200 | 201 | let start = self.i + self.range.start; 202 | let end = unsafe { 203 | // This is safe if the data is valid. 204 | // The data is valid because it originally comes from `Pairs2::from_idents`, which is only called with valid data. 205 | self.ident_list.all_idents.get_unchecked(start).1 206 | }; 207 | self.i = end - self.range.start; 208 | 209 | Some(IdentRef { 210 | ident_list: self.ident_list, 211 | range: start..end, 212 | }) 213 | } 214 | } 215 | 216 | impl<'i, I: IdentTrait> std::fmt::Debug for IdentIter<'i, I> { 217 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 218 | f.debug_list() 219 | .entries({ 220 | let mut clone = self.clone(); 221 | clone.i = 0; 222 | clone 223 | }) 224 | .finish() 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /faster-pest/benches/json.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | use std::{borrow::Cow, collections::BTreeMap}; 4 | 5 | extern crate test; 6 | 7 | enum Value<'i> { 8 | String(Cow<'i, str>), 9 | Number(f64), 10 | Boolean(bool), 11 | Array(Vec>), 12 | Object(BTreeMap, Value<'i>>), 13 | Null, 14 | } 15 | 16 | fn unescape_str(s: &str) -> Cow { 17 | let bytes = s.as_bytes(); 18 | let mut i = 0; 19 | while i < s.len() { 20 | if bytes[i] == b'\\' { 21 | let mut result = bytes.to_vec(); 22 | let mut j = i; 23 | while j < result.len() { 24 | if result[j] == b'\\' { 25 | result.remove(j); 26 | match result[j] { 27 | b'n' => result[j] = b'\n', 28 | b'\\' | b'"' | b'/' => (), 29 | b't' => result[j] = b'\t', 30 | b'r' => result[j] = b'\r', 31 | b'b' => result[j] = b'\x08', 32 | b'f' => result[j] = b'\x0C', 33 | _ => todo!() 34 | } 35 | } 36 | j += 1; 37 | } 38 | return Cow::Owned(unsafe { String::from_utf8_unchecked(result) }) 39 | } 40 | i += 1; 41 | } 42 | Cow::Borrowed(s) 43 | } 44 | 45 | mod pest_classic_json { 46 | use std::hint::black_box; 47 | 48 | use pest_derive::Parser; 49 | use pest::Parser; 50 | use test::Bencher; 51 | 52 | #[derive(Parser)] 53 | #[grammar = "../faster-pest/examples/json/grammar.pest"] 54 | pub struct JsonParser { 55 | 56 | } 57 | 58 | #[bench] 59 | fn json_as_is(b: &mut Bencher) { 60 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/json/input.json") { 61 | Ok(s) => s, 62 | Err(_) => match std::fs::read_to_string("examples/json/input.json") { 63 | Ok(s) => s, 64 | Err(e) => panic!("cannot read file: {}", e) 65 | } 66 | }; 67 | 68 | b.iter(|| black_box({ 69 | JsonParser::parse(Rule::file, &unparsed_file) 70 | })); 71 | } 72 | } 73 | 74 | mod faster_pest_json { 75 | use std::hint::black_box; 76 | use super::*; 77 | use faster_pest::*; 78 | use test::Bencher; 79 | 80 | impl<'i> Value<'i> { 81 | fn from_ident_ref(value: IdentRef<'i, Ident>) -> Self { 82 | match value.as_rule() { 83 | Rule::string => Value::String(unescape(value)), 84 | Rule::number => Value::Number(value.as_str().parse().expect("number")), 85 | Rule::boolean => Value::Boolean(value.as_str() == "true"), 86 | Rule::array => { 87 | let mut array = Vec::new(); 88 | array.extend(value.children().map(Value::from_ident_ref)); 89 | Value::Array(array) 90 | } 91 | Rule::object => { 92 | let mut object = BTreeMap::new(); 93 | for property in value.children() { 94 | let mut property_children = property.children(); 95 | let name = property_children.next().expect("name"); 96 | let name = unescape(name); 97 | let value = property_children.next().expect("value"); 98 | object.insert(name, Value::from_ident_ref(value)); 99 | } 100 | Value::Object(object) 101 | } 102 | Rule::null => Value::Null, 103 | Rule::property | Rule::file | Rule::escaped_char => unreachable!(), 104 | } 105 | } 106 | } 107 | 108 | fn unescape<'i>(s: IdentRef<'i, Ident>) -> Cow<'i, str> { 109 | let children_count = s.children_count(); 110 | if children_count == 0 { 111 | return Cow::Borrowed(s.as_str()); 112 | } 113 | let mut unescaped = String::with_capacity(s.as_str().len() - children_count); 114 | let mut i = 0; 115 | let start_addr = s.as_str().as_ptr() as usize; 116 | for escaped_char in s.children() { 117 | let end = escaped_char.as_str().as_ptr() as usize - start_addr; 118 | unescaped.push_str(unsafe { s.as_str().get_unchecked(i..end) }); 119 | match unsafe { escaped_char.as_str().as_bytes().get_unchecked(1) } { 120 | b'"' => unescaped.push('"'), 121 | b'\\' => unescaped.push('\\'), 122 | b'/' => unescaped.push('/'), 123 | b'b' => unescaped.push('\x08'), 124 | b'f' => unescaped.push('\x0c'), 125 | b'n' => unescaped.push('\n'), 126 | b'r' => unescaped.push('\r'), 127 | b't' => unescaped.push('\t'), 128 | b'u' => { 129 | // Warning when you implement this, you might want to increase the capacity of the string set above 130 | unimplemented!() 131 | } 132 | _ => unreachable!() 133 | } 134 | i = end + 2; 135 | } 136 | unescaped.push_str(unsafe { s.as_str().get_unchecked(i..) }); 137 | Cow::Owned(unescaped) 138 | } 139 | 140 | #[derive(Parser)] 141 | #[grammar = "faster-pest/examples/json/grammar.pest"] 142 | pub struct JsonParser { 143 | 144 | } 145 | 146 | #[bench] 147 | fn json_as_is(b: &mut Bencher) { 148 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/json/input.json") { 149 | Ok(s) => s, 150 | Err(_) => match std::fs::read_to_string("examples/json/input.json") { 151 | Ok(s) => s, 152 | Err(e) => panic!("cannot read file: {}", e) 153 | } 154 | }; 155 | 156 | b.iter(|| black_box({ 157 | JsonParser::parse_file(&unparsed_file).expect("unsuccessful parse"); 158 | })); 159 | } 160 | 161 | #[bench] 162 | fn json_to_rust(b: &mut Bencher) { 163 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/json/input.json") { 164 | Ok(s) => s, 165 | Err(_) => match std::fs::read_to_string("examples/json/input.json") { 166 | Ok(s) => s, 167 | Err(e) => panic!("cannot read file: {}", e) 168 | } 169 | }; 170 | 171 | b.iter(|| black_box({ 172 | let output = JsonParser::parse_file(&unparsed_file).map_err(|e| e.print(unparsed_file.as_str())).expect("unsuccessful parse"); 173 | let file = output.into_iter().next().expect("couldn't find file rule"); 174 | let main_object = file.children().next().expect("couldn't find main object"); 175 | let output = Value::from_ident_ref(main_object); 176 | })); 177 | } 178 | } 179 | 180 | mod serde { 181 | use std::hint::black_box; 182 | 183 | #[bench] 184 | fn json_to_rust(b: &mut test::Bencher) { 185 | let unparsed_file = match std::fs::read_to_string("faster-pest/examples/json/input.json") { 186 | Ok(s) => s, 187 | Err(_) => match std::fs::read_to_string("examples/json/input.json") { 188 | Ok(s) => s, 189 | Err(e) => panic!("cannot read file: {}", e) 190 | } 191 | }; 192 | 193 | b.iter(|| black_box({ 194 | serde_json::from_str::(&unparsed_file).expect("unsuccessful parse"); 195 | })); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /faster-pest-generator/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use pest_meta::optimizer::OptimizedRule; 4 | pub(crate) use pest_meta::{optimizer::OptimizedExpr, ast::RuleType}; 5 | 6 | mod ids; 7 | pub(crate) use ids::*; 8 | mod tree_inspection; 9 | pub(crate) use tree_inspection::*; 10 | mod expr_codegen; 11 | pub(crate) use expr_codegen::*; 12 | mod optimizer; 13 | pub(crate) use optimizer::*; 14 | 15 | pub trait Generator { 16 | fn ident(ident: &str) -> String; 17 | fn character_ident(ident: &str) -> Option<&'static str>; 18 | fn character(c: u8) -> String; 19 | fn character_range(c1: u8, c2: u8) -> String; 20 | fn pattern_expr_character() -> &'static str; 21 | fn pattern_expr_choice() -> &'static str; 22 | fn pattern_expr_insens() -> &'static str; 23 | fn pattern_expr_neg() -> &'static str; 24 | fn pattern_expr_opt() -> &'static str; 25 | fn pattern_expr_rep_character() -> &'static str; 26 | fn pattern_expr_rep() -> &'static str; 27 | fn pattern_expr_seq() -> &'static str; 28 | fn pattern_expr_str() -> &'static str; 29 | fn pattern_outer() -> &'static str; 30 | fn pattern_rule_method() -> &'static str; 31 | fn pattern_rule_silent() -> &'static str; 32 | fn pattern_rule() -> &'static str; 33 | } 34 | 35 | fn multi_replace(mut text: String, values: Vec<(&'static str, Vec)>) -> String { 36 | assert!(!values.is_empty(), "Patterns and values must not be empty."); 37 | assert!(values.iter().all(|v| v.1.len() == values[0].1.len()), "Values must equal lenghts."); 38 | assert!(!values.iter().any(|v| values.iter().any(|other| other.0 != v.0 && other.0.contains(v.0))), "Patterns must not contain each other."); 39 | 40 | let mut line_ranges = Vec::new(); 41 | for line in text.lines() { 42 | let begin = line.as_ptr() as usize - text.as_ptr() as usize; 43 | let end = begin + line.len(); 44 | line_ranges.push(begin..end); 45 | } 46 | 47 | for line in line_ranges.into_iter().rev() { 48 | let mut is_to_be_replaced = false; 49 | for (pattern, _) in &values { 50 | if text[line.clone()].contains(pattern) { 51 | is_to_be_replaced = true; 52 | break; 53 | } 54 | } 55 | if !is_to_be_replaced { 56 | continue; 57 | } 58 | 59 | let mut new_lines: Vec = Vec::new(); 60 | for i in 0..values[0].1.len() { 61 | let mut new_line = text[line.clone()].to_string(); 62 | for (pattern, values) in &values { 63 | new_line = new_line.replace(pattern, &values[i]); 64 | } 65 | new_lines.push(new_line); 66 | } 67 | 68 | if new_lines.is_empty() { 69 | text.replace_range(line.start..line.end+1, ""); 70 | } else { 71 | text.replace_range(line.clone(), new_lines.join("\n").as_str()); 72 | } 73 | } 74 | 75 | text 76 | } 77 | 78 | fn get_all_rules(grammar_files: &[String]) -> Vec { 79 | let mut rules = HashMap::new(); 80 | 81 | for path in grammar_files { 82 | let Ok(grammar) = std::fs::read_to_string(path) else { 83 | panic!("Could not read grammar file at {path:?}"); 84 | }; 85 | let (_, new_rules) = match pest_meta::parse_and_optimize(&grammar) { 86 | Ok(new_rules) => new_rules, 87 | Err(e) => panic!("{}", e[0]) 88 | }; 89 | for new_rule in new_rules { 90 | rules.insert(new_rule.name.clone(), new_rule); 91 | } 92 | } 93 | 94 | let mut rules: Vec = rules.into_values().collect(); 95 | rules.sort_by_key(|rule| rule.name.clone()); 96 | rules 97 | } 98 | 99 | pub fn gen(struct_ident: String, grammar_files: Vec) -> String { 100 | let rules = get_all_rules(&grammar_files); 101 | 102 | // Find silent rules 103 | let silent_rules = rules.iter().filter(|rule| matches!(rule.ty, RuleType::Silent)).map(|rule| rule.name.as_str()).collect::>(); 104 | 105 | // Find if there is a rule named WHITESPACE 106 | let has_whitespace = rules.iter().any(|rule| rule.name.as_str() == "WHITESPACE"); 107 | 108 | let mut full_code = G::pattern_outer().to_string(); 109 | full_code = multi_replace(full_code, vec![ 110 | ("RuleVariant", rules.iter().filter(|r| !silent_rules.contains(&r.name.as_str())).map(|rule| rule.name.as_str().to_string()).collect()), 111 | ("IdentVariant", rules.iter().filter(|r| !silent_rules.contains(&r.name.as_str())).map(|rule| { 112 | let name = rule.name.as_str(); 113 | let name_pascal_case = name.chars() 114 | .next() 115 | .expect("Rule name must not be empty") 116 | .to_uppercase() 117 | .collect::() 118 | + &name[1..]; 119 | name_pascal_case 120 | }).collect()), 121 | ]); 122 | full_code = full_code.replace("StructIdent", struct_ident.to_string().as_str()); 123 | 124 | let mut ids = IdRegistry::new(); 125 | let mut optimized_exprs = Vec::new(); 126 | let mut exprs = Vec::new(); 127 | let mut character_set_rules = HashMap::new(); 128 | for rule in &rules { 129 | let expr = optimize::(&rule.expr); 130 | if matches!(rule.ty, RuleType::Silent) { 131 | if let FPestExpr::CharacterCondition(c) = &expr { 132 | character_set_rules.insert(rule.name.as_str(), c.to_owned()); 133 | } 134 | } 135 | optimized_exprs.push(expr); 136 | } 137 | for expr in &mut optimized_exprs { 138 | optimize_second_stage(expr, &character_set_rules); 139 | } 140 | println!("{:#?}", optimized_exprs); 141 | let mut inner_code = String::new(); 142 | for (i, rule) in rules.iter().enumerate() { 143 | let expr = optimized_exprs.get(i).expect("Expr not found"); 144 | exprs.extend(list_exprs(expr)); 145 | let rule_name = rule.name.as_str(); 146 | let rule_name_pascal_case = rule_name.chars() 147 | .next() 148 | .expect("Rule name must not be empty") 149 | .to_uppercase() 150 | .collect::() 151 | + &rule_name[1..]; 152 | let top_expr_id = ids.id(expr); 153 | let formatted_idents = match contains_idents(expr, has_whitespace) { 154 | true => "idents", 155 | false => "", 156 | }; 157 | 158 | let mut code = match silent_rules.contains(&rule_name) { 159 | false => G::pattern_rule().to_string(), 160 | true => G::pattern_rule_silent().to_string(), 161 | }; 162 | code.push_str(G::pattern_rule_method()); 163 | code = code.replace("RuleVariant", rule.name.as_str()); 164 | code = code.replace("top_expr_id", top_expr_id.to_string().as_str()); 165 | code = code.replace("formatted_idents", formatted_idents); 166 | code = code.replace("IdentVariant", rule_name_pascal_case.as_str()); 167 | code = code.replace("StructIdent", struct_ident.to_string().as_str()); 168 | inner_code.push_str(code.as_str()); 169 | } 170 | exprs.sort_by_key(|expr| ids.id(expr)); 171 | exprs.dedup(); 172 | for expr in exprs { 173 | let mut new_code = code::(expr, &mut ids, has_whitespace); 174 | let mut new_code2 = new_code.trim_start_matches('\n'); 175 | let new_code2_len = new_code2.len(); 176 | new_code2 = new_code2.trim_start_matches(' '); 177 | let len_diff = new_code2_len - new_code2.len(); 178 | let pattern = "\n".to_string() + &" ".repeat(len_diff); 179 | new_code = new_code.replace(&pattern, "\n"); 180 | inner_code.push_str(new_code.as_str()); 181 | } 182 | full_code = full_code.replace(" // inner code", inner_code.as_str()); 183 | 184 | full_code.parse().expect("Unable to parse code") 185 | } 186 | -------------------------------------------------------------------------------- /faster-pest-generator/src/optimizer.rs: -------------------------------------------------------------------------------- 1 | use pest_meta::optimizer::OptimizedExpr; 2 | use crate::*; 3 | 4 | #[derive(Debug, Clone, PartialEq)] 5 | pub enum FPestExpr { 6 | Ident(String), 7 | Str(String), 8 | Insens(String), 9 | CharacterCondition(String), 10 | NegPred(Box), 11 | Seq(Vec), 12 | Choice(Vec), 13 | /// true when empty is accepted 14 | Rep(Box, bool), 15 | Opt(Box), 16 | } 17 | 18 | pub fn optimize(expr: &OptimizedExpr) -> FPestExpr { 19 | match expr { 20 | OptimizedExpr::Str(value) => { 21 | if value.len() == 1 { 22 | FPestExpr::CharacterCondition(G::character(value.as_bytes()[0])) 23 | } else { 24 | FPestExpr::Str(value.to_owned()) 25 | } 26 | }, 27 | OptimizedExpr::Insens(value) => { 28 | // TODO(optimization): one character optimization 29 | FPestExpr::Insens(value.to_owned()) 30 | } 31 | OptimizedExpr::Ident(ident) => { 32 | if let Some(condition) = G::character_ident(ident) { 33 | FPestExpr::CharacterCondition(condition.to_string()) 34 | } else { 35 | FPestExpr::Ident(ident.to_owned()) 36 | } 37 | }, 38 | OptimizedExpr::NegPred(expr) => { 39 | FPestExpr::NegPred(Box::new(optimize::(expr))) 40 | } 41 | OptimizedExpr::Seq(first, second) => { 42 | if **second == OptimizedExpr::Rep(first.to_owned()) { 43 | return FPestExpr::Rep(Box::new(optimize::(first)), false); 44 | } 45 | 46 | let mut seq = Vec::new(); 47 | list_seq(expr, &mut seq); 48 | let mut items = seq.into_iter().map(optimize::).collect::>(); 49 | 50 | // Find NegPred(character condition) that are before a character condition 51 | // and merge them into the character condition 52 | let mut i = 0; 53 | while i + 1 < items.len() { 54 | if let FPestExpr::NegPred(boxed) = &items[i] { 55 | if let FPestExpr::CharacterCondition(c) = &**boxed { 56 | if let FPestExpr::CharacterCondition(c2) = &items[i + 1] { 57 | items[i] = FPestExpr::CharacterCondition(format!("(!{} && {})", c, c2)); 58 | items.remove(i + 1); 59 | continue; 60 | } else if let FPestExpr::NegPred(boxed2) = &items[i + 1] { 61 | if let FPestExpr::CharacterCondition(c2) = &**boxed2 { 62 | items[i] = FPestExpr::NegPred(Box::new(FPestExpr::CharacterCondition(format!("({} || {})", c, c2)))); 63 | items.remove(i + 1); 64 | continue; 65 | } 66 | } 67 | } 68 | } 69 | i += 1; 70 | } 71 | 72 | if items.len() == 1 { 73 | items.pop().expect("Seq") 74 | } else { 75 | FPestExpr::Seq(items) 76 | } 77 | } 78 | OptimizedExpr::Choice(_, _) => { 79 | let mut choices = Vec::new(); 80 | list_choices(expr, &mut choices); 81 | 82 | // Group character conditions that are next to each other 83 | let mut fp_choices = Vec::new(); 84 | let mut current_condition = String::new(); 85 | for choice in choices { 86 | let choice = optimize::(choice); 87 | if let FPestExpr::CharacterCondition(c) = choice { 88 | if !current_condition.is_empty() { 89 | current_condition.push_str(" || "); 90 | } 91 | current_condition.push_str(&c); 92 | } else { 93 | if !current_condition.is_empty() { 94 | fp_choices.push(FPestExpr::CharacterCondition(current_condition)); 95 | current_condition = String::new(); 96 | } 97 | fp_choices.push(choice); 98 | } 99 | } 100 | if !current_condition.is_empty() { 101 | fp_choices.push(FPestExpr::CharacterCondition(current_condition)); 102 | } 103 | 104 | if fp_choices.len() == 1 { 105 | fp_choices.pop().expect("Choice") 106 | } else { 107 | FPestExpr::Choice(fp_choices) 108 | } 109 | }, 110 | OptimizedExpr::Opt(expr) => FPestExpr::Opt(Box::new(optimize::(expr))), 111 | OptimizedExpr::Rep(expr) => FPestExpr::Rep(Box::new(optimize::(expr)), true), 112 | OptimizedExpr::Range(a, b) => { 113 | if a.len() == 1 && b.len() == 1 { 114 | let a = a.chars().next().expect("Range a").to_ascii_lowercase() as u8; 115 | let b = b.chars().next().expect("Range b").to_ascii_lowercase() as u8; 116 | FPestExpr::CharacterCondition(G::character_range(a, b)) 117 | } else { 118 | todo!() 119 | } 120 | } 121 | OptimizedExpr::PosPred(_) => todo!(), 122 | OptimizedExpr::Skip(_) => todo!(), 123 | OptimizedExpr::Push(_) => todo!(), 124 | OptimizedExpr::RestoreOnErr(_) => todo!(), 125 | OptimizedExpr::PeekSlice(_, _) => todo!(), 126 | } 127 | } 128 | 129 | pub fn optimize_second_stage(expr: &mut FPestExpr, character_set_rules: &HashMap<&str, String>) { 130 | match expr { 131 | FPestExpr::Ident(ident) => if let Some(condition) = character_set_rules.get(ident.as_str()) { 132 | *expr = FPestExpr::CharacterCondition(condition.to_string()); 133 | }, 134 | FPestExpr::Str(_) => (), 135 | FPestExpr::Insens(_) => (), 136 | FPestExpr::CharacterCondition(_) => (), 137 | FPestExpr::NegPred(expr) => optimize_second_stage(expr, character_set_rules), 138 | FPestExpr::Seq(items) => { 139 | for item in items.iter_mut() { 140 | optimize_second_stage(item, character_set_rules); 141 | } 142 | 143 | // Find NegPred(character condition) that are before a character condition 144 | // and merge them into the character condition 145 | let mut i = 0; 146 | while i + 1 < items.len() { 147 | if let FPestExpr::NegPred(boxed) = &items[i] { 148 | if let FPestExpr::CharacterCondition(c) = &**boxed { 149 | if let FPestExpr::CharacterCondition(c2) = &items[i + 1] { 150 | items[i] = FPestExpr::CharacterCondition(format!("(!{} && {})", c, c2)); 151 | items.remove(i + 1); 152 | continue; 153 | } else if let FPestExpr::NegPred(boxed2) = &items[i + 1] { 154 | if let FPestExpr::CharacterCondition(c2) = &**boxed2 { 155 | items[i] = FPestExpr::NegPred(Box::new(FPestExpr::CharacterCondition(format!("({} || {})", c, c2)))); 156 | items.remove(i + 1); 157 | continue; 158 | } 159 | } 160 | } 161 | } 162 | i += 1; 163 | } 164 | 165 | if items.len() == 1 { 166 | *expr = items.pop().expect("Seq") 167 | } 168 | }, 169 | FPestExpr::Choice(items) => { 170 | // Group character conditions that are next to each other 171 | let mut fp_choices = Vec::new(); 172 | let mut current_condition = String::new(); 173 | for item in items.iter_mut() { 174 | optimize_second_stage(item, character_set_rules); 175 | if let FPestExpr::CharacterCondition(c) = item { 176 | if !current_condition.is_empty() { 177 | current_condition.push_str(" || "); 178 | } 179 | current_condition.push_str(c); 180 | } else { 181 | if !current_condition.is_empty() { 182 | fp_choices.push(FPestExpr::CharacterCondition(current_condition)); 183 | current_condition = String::new(); 184 | } 185 | fp_choices.push(item.to_owned()); 186 | } 187 | } 188 | if !current_condition.is_empty() { 189 | fp_choices.push(FPestExpr::CharacterCondition(current_condition)); 190 | } 191 | 192 | if fp_choices.len() == 1 { 193 | *expr = fp_choices.pop().expect("Choice") 194 | } else { 195 | *expr = FPestExpr::Choice(fp_choices); 196 | } 197 | }, 198 | FPestExpr::Rep(expr, _) => optimize_second_stage(expr, character_set_rules), 199 | FPestExpr::Opt(expr) => optimize_second_stage(expr, character_set_rules), 200 | } 201 | } 202 | --------------------------------------------------------------------------------