├── .gitignore ├── .rustfmt.toml ├── Cargo.toml ├── examples └── demo.rs ├── old ├── compile.rs ├── notes.rs ├── notes2.md ├── op.rs ├── script2.rs ├── syntax.rs └── tokenstream_regex.md ├── run.sh ├── src ├── ast │ ├── mod.rs │ ├── names.rs │ ├── repr.rs │ └── visit.rs ├── lib.rs ├── tokens.rs └── trace.rs └── testcase ├── input.rs └── patterns.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /Cargo.lock 2 | /target 3 | **/*.rs.bk 4 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | # orthodox formatting, no heresies 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "comacro" 3 | version = "0.0.2" 4 | authors = ["Kaz Wesley "] 5 | description = "Declarative Rust static analysis with macro-like syntax" 6 | edition = "2018" 7 | categories = ["development-tools"] 8 | license = "MIT/Apache-2.0" 9 | homepage = "https://comacro.lambdaverse.org" 10 | 11 | [dependencies] 12 | log = { version = "0.4", features = ["max_level_trace", "release_max_level_off"] } 13 | proc-macro2 = "0.4" 14 | 15 | [dev-dependencies] 16 | env_logger = "0.5" 17 | proc-macro2 = { version = "0.4", features = ["nightly"] } 18 | 19 | [features] 20 | default = ["syn"] 21 | 22 | [dependencies.syn] 23 | version = "0.15" 24 | default-features = false 25 | features = ["full", "visit", "parsing"] 26 | optional = true 27 | -------------------------------------------------------------------------------- /examples/demo.rs: -------------------------------------------------------------------------------- 1 | use log::*; 2 | 3 | use comacro::PatternDef; 4 | 5 | fn main() { 6 | env_logger::init(); 7 | 8 | use std::env; 9 | use std::fs::File; 10 | use std::io::Read; 11 | use std::process; 12 | 13 | let mut args = env::args(); 14 | let _ = args.next(); // executable name 15 | let (patterns, input) = match (args.next(), args.next(), args.next()) { 16 | (Some(patterns), Some(input), None) => (patterns, input), 17 | _ => { 18 | error!("Usage: dump-syntax patterns.rs input.rs"); 19 | process::exit(1); 20 | } 21 | }; 22 | let mut src = String::new(); 23 | File::open(&input) 24 | .expect("Unable to open input file") 25 | .read_to_string(&mut src) 26 | .expect("Unable to read input file"); 27 | let insyn = syn::parse_file(&src).expect("Unable to parse input file"); 28 | src.clear(); 29 | File::open(&patterns) 30 | .expect("Unable to open pattern file") 31 | .read_to_string(&mut src) 32 | .expect("Unable to read pattern file"); 33 | let patsyn = syn::parse_file(&src).expect("Unable to parse pattern file"); 34 | let pats: Vec<_> = patsyn.items.iter().map(compile_pat).collect(); 35 | for it in insyn.items { 36 | use syn::Item::*; 37 | match it { 38 | Fn(f) => { 39 | let input = &f.block.stmts; 40 | let in_trace = comacro::compile_input(input); 41 | for (i, pat) in pats.iter().enumerate() { 42 | for m in pat.matches(input, &in_trace) { 43 | println!("pattern {} is a match: {:?}", i, m); 44 | } 45 | } 46 | } 47 | _ => unimplemented!("non-fn item in input"), 48 | } 49 | } 50 | } 51 | 52 | fn compile_pat(item: &syn::Item) -> comacro::Ir { 53 | let pat = if let syn::Item::Macro2(mac) = item { 54 | PatternDef::lex(mac.args.clone(), mac.body.clone()) 55 | } else { 56 | unimplemented!() 57 | }; 58 | let pat = pat.parse().unwrap(); 59 | /* 60 | let pat = match pat.parse() { 61 | Ok(x) => x, 62 | Err(e) => { 63 | eprintln!("error: {}", e.annotate(&src)); 64 | panic!() 65 | } 66 | }; 67 | */ 68 | trace!("debug_tree_repr: {}", pat.debug_tree_repr()); 69 | trace!("debug_flat_repr:\n{}", pat.debug_flat_repr()); 70 | pat.compile() 71 | } 72 | -------------------------------------------------------------------------------- /old/compile.rs: -------------------------------------------------------------------------------- 1 | //! Compile comacro syntax to VM ops 2 | 3 | use quote::ToTokens; 4 | use syn::parse::Parse; 5 | use syn::{File, Item, ItemMacro2}; 6 | 7 | use log::{debug, error, info, log, trace}; 8 | use std::collections::HashMap; 9 | 10 | /// find-or-insert the T into the (unsorted) Vec, and then return its index 11 | fn smallset_index(v: &mut Vec, t: &T) -> usize { 12 | match v.iter().position(|x| x == t) { 13 | Some(n) => n, 14 | None => { 15 | v.push(t.clone()); 16 | v.len() - 1 17 | } 18 | } 19 | } 20 | 21 | /// TODO: str/String shouldn't need a special case 22 | fn smallset_str_index(v: &mut Vec, t: &str) -> usize { 23 | match v.iter().position(|x| x == t) { 24 | Some(n) => n, 25 | None => { 26 | v.push(t.to_owned()); 27 | v.len() - 1 28 | } 29 | } 30 | } 31 | 32 | /* 33 | (Local(l0), Local(l1)) => { 34 | // TODO: support metavars inside patterns 35 | if l0.pats.len() == 1 { 36 | if let syn::Pat::Ident(ref id) = l0.pats[0] { 37 | let syn::PatIdent { ident: id, .. } = id; 38 | let id = format!("{}", id); 39 | let trimmed = id.trim_left_matches("__PLACEHOLDER_IDENT_"); 40 | if trimmed != id { 41 | smallset_str_index(&mut idents, &trimmed); 42 | } else { 43 | trace!("non-match: local: patterns"); 44 | return; 45 | } 46 | } else { 47 | if l0.pats != l1.pats { 48 | trace!("non-match: local: patterns"); 49 | return; 50 | } 51 | } 52 | } else { 53 | if l0.pats != l1.pats { 54 | trace!("non-match: local: patterns"); 55 | return; 56 | } 57 | } 58 | if l0.init != l1.init { 59 | if let Some((_, box syn::Expr::Path(syn::ExprPath { ref path, .. }))) = 60 | l0.init 61 | { 62 | let segs = &path.segments; 63 | if segs.len() == 1 { 64 | // TODO: arguments? 65 | let syn::PathSegment { ref ident, .. } = segs[0]; 66 | let id = format!("{}", ident); 67 | // TODO: an IDENT metavar can also be an expression 68 | let trimmed = id.trim_left_matches("__PLACEHOLDER_EXPR_"); 69 | if trimmed != id { 70 | smallset_str_index(&mut exprs, &trimmed); 71 | } else { 72 | trace!("non-match: local: patterns"); 73 | return; 74 | } 75 | } else { 76 | trace!("non-match: local: patterns"); 77 | return; 78 | } 79 | } else { 80 | trace!("non-match: local: patterns"); 81 | return; 82 | } 83 | } 84 | } 85 | (Item(i0), Item(i1)) => unimplemented!(), 86 | (Expr(x0), Expr(x1)) => { 87 | if x0 == x1 { 88 | continue; 89 | } 90 | // TODO: metavars 91 | trace!("non-match: expr"); 92 | return; 93 | } 94 | (Semi(x0, _), Semi(x1, _)) => { 95 | if x0 == x1 { 96 | continue; 97 | } 98 | // TODO: metavars 99 | trace!("non-match: semi expr"); 100 | return; 101 | } 102 | (_, _) => { 103 | trace!("non-match: shape"); 104 | return; 105 | } 106 | */ 107 | 108 | pub fn compile_pat(pat: &Item) -> crate::op::Script { 109 | use syn::Item::*; 110 | let mac = match pat { 111 | Macro2(m) => m, 112 | _ => unimplemented!("non-macro2 item in patterns"), 113 | }; 114 | let ar = parse_args(mac.args.clone().into_iter()); 115 | let body: TokenStream = MacBodyTransducer::new(mac.body.clone().into_iter(), ar).collect(); 116 | trace!("MacBody: {}", body); 117 | let Stmts { body } = syn::parse2(body).unwrap(); 118 | let mut exprs: Vec = Vec::new(); 119 | let mut idents: Vec = Vec::new(); 120 | for (pat, target) in self.body.iter().zip(stmts) { 121 | use syn::Stmt::*; 122 | match (pat, target) { 123 | Local(l) => { 124 | } 125 | Item(i) => unimplemented!(), 126 | Expr(x) => { 127 | } 128 | Semi(x0, _) => { 129 | } 130 | } 131 | } 132 | } 133 | 134 | -------------------------------------------------------------------------------- /old/notes.rs: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | comacros: search code for patterns, by example 4 | 5 | comacros are the dual of macros 6 | 7 | as such, they are not used in compilation, but in static analysis: with a macro the user tells the 8 | compiler to apply a pattern; with a comacro the compiler tells the user a pattern has been seen 9 | 10 | Uses: 11 | - declarative lints 12 | - incl a generalization of a common lint type: search for anything that could be simplified 13 | with a libstd function 14 | - decl lints are a fraction the size and decoupled from compiler interals 15 | - RLS: search for things that could be "uninlined" to the given function/macro 16 | - search for patterns that could be factored into a utility (or made unnecessary with API changes?) 17 | 18 | Execution model: 19 | - RLS: read a pattern from raw text, search some files 20 | - clippy: efficiently search for hundreds of patterns 21 | 22 | */ 23 | 24 | /* 25 | * - in pattern, normalize $placeholders to sequentially-numbered private identifiers 26 | * 27 | * diff patterns match in diff contexts: 28 | * e.g. 29 | * - stmt[+] -- matches a window of a sequence of statements 30 | * - item[+] -- matches a window of a sequence of items 31 | * - expr -- matches a subtree of an expr 32 | */ 33 | 34 | /* 35 | 36 | type of expression: 37 | - no quantification 38 | - no alternation 39 | - (no grouping) 40 | - backreferences! 41 | 42 | a stmt has a type and terms 43 | - shape of 2 stmts can be: 44 | - disjoin(A, B) 45 | - subset(A, B) 46 | - equiv(A, B) 47 | if a term is a backref, context may also be necessary to compare 48 | FSM based on shape alone, then try unification? 49 | 50 | */ 51 | 52 | /* 53 | backreferences form an equivalence relation 54 | set of terms X 55 | equiv relation partitions X into a family of sets α 56 | goal: identify patterns whose partitions are refinements of α 57 | - without calculating more of α than needed by matches 58 | - this can be handled with a binary decision tree that traverses the refinement poset, with 59 | merges cut and any paths that don't lead to any additional matches 60 | - e.g. at each step either branch to a state where a particular equivalence may hold, or doesn't 61 | */ 62 | 63 | /* 64 | ($x: expr, $y: expr) => { $x >= $y + 1 } => { lint_binop(GE, RT) } 65 | ($x: expr, $y: expr) => { $x >= 1 + $y } => { lint_binop(GE, RT) } 66 | ($x: expr, $y: expr) => { $x - 1 >= $y } => { lint_binop(GE, LF) } 67 | ($x: expr, $y: expr) => { -1 + $x >= $y } => { lint_binop(GE, LF) } 68 | ($x: expr, $y: expr) => { $x + 1 <= $y } => { lint_binop(LE, LF) } 69 | ($x: expr, $y: expr) => { 1 + $x <= $y } => { lint_binop(LE, LF) } 70 | ($x: expr, $y: expr) => { $x <= $y - 1 } => { lint_binop(LE, RT) } 71 | ($x: expr, $y: expr) => { $x <= -1 + $y } => { lint_binop(LE, RT) } 72 | 73 | ($v: expr, $x: expr, $b: block) => { if let Some($v) = $x.ok() $b } 74 | */ 75 | 76 | // no type 77 | let $t = $x; 78 | // any type [$ty:type] 79 | let $t: $ty = $x; 80 | // some type, no type, don't care 81 | let $t: $_ = $x; 82 | // Option [$ty:type?] 83 | let $t: $ty = $x; 84 | // particular type 85 | let $t: u32 = $x; 86 | 87 | 88 | /* 89 | 90 | match syntax-to-syntax, with out-of-band metavars: 91 | 92 | - read patterns as TokenStreams (in Macro2 envelopes) 93 | - scan stream for placeholders 94 | - replace placeholders with trivial values appropriate for metavar type, 95 | remembering metavar positions 96 | - parse stream to syn structures 97 | - to match a position, first check if it's in the metavar table 98 | 99 | later, performance improvements: 100 | 101 | - linearize: 102 | - [] of unions, where context distinguishes between Kind Kinds 103 | enum Node { // this struct is a bitfield; whole thing fits into a scalar (u16?) 104 | Meta{...} 105 | Literal{ 106 | arity: u8, 107 | kind: union Kind { // discriminate by context 108 | enum StmtKind {...} 109 | enum ExprKind {...} 110 | } 111 | } 112 | } 113 | - if not ismeta, an exact match is needed 114 | - can check for exact match with only arity; no switching on NodeKind 115 | - general traversal of this linearized structure would require a stack, but exact match and 116 | metavar match can be done only tracking stack _depth_ 117 | - in pats, encode common literals as metas? 118 | - larger literals may need additional cell(s) 119 | ( probably better to just store all literals in cells? ) 120 | - Spans etc OOB 121 | - i.e., operate directly on a serialized form ("homomorphic serialization") 122 | 123 | - NFA-ish multimatch 124 | - it's like NFA-matching a regex, except the handling of metavars: 125 | - an unbound metavar always matches a single subtree, whose end can be determined by 126 | keeping a running total of stack depth 127 | - a bound mvar (backref) is more complicated... 128 | - could do: 129 | - when an unbound mvar matches, remember where (each mvar has a slot) 130 | - when a bound mvar matches, do the comparison and abort or continue 131 | - optimization: 132 | - an mvar not preceded by any mvar, $_, or optional always binds at a fixed offset 133 | 134 | */ 135 | -------------------------------------------------------------------------------- /old/notes2.md: -------------------------------------------------------------------------------- 1 | pattern is same, but a DAG: 2 | a node may have a label 3 | a node may be a reference to a previous label 4 | 5 | skip over a child in constant time 6 | (optionally while binding it to a label) 7 | could have node-size prefix 8 | 9 | a normal stack machine constructs bottom-up 10 | we need top-down context for matching (metas etc) 11 | 12 | destructive machine: 13 | - typed stack machine 14 | - running a script forward would destruct an AST 15 | - a script is *constructive* if: 16 | - it doesn't contain any non-constructive operators (wildcards, partial extractors, repetitions, ..?) 17 | - it destructs all the way to terminals 18 | - running a constructive script *backward* would construct an AST 19 | - read it backwards and do the inverse of each operator 20 | 21 | a comacro pattern compiles to a script 22 | an input compiles to a constructive script 23 | 24 | a pattern can be matched against an input in linear time 25 | - this does not require actually executing any operations (only the 26 | script needs be consulted, and no additional storage is necessary 27 | 28 | -------------------------------------------------------------------------------- /old/op.rs: -------------------------------------------------------------------------------- 1 | //! Opcodes and virtual machine 2 | 3 | use syn::{Pat, Ident, Expr, Stmt}; 4 | 5 | use log::{trace, debug, info, error, warn, log}; 6 | 7 | // typed SSA register machine 8 | // no(?) control flow 9 | // destructuring ops 10 | 11 | // coercion: raising is more general than lowering, so initial impl relies on raising; 12 | // later as optimization add lowering coercions to compiler 13 | 14 | // compact representation w/ normalized form: 15 | // - implicit out params [sequential] 16 | // - implicit S params [sequential] 17 | 18 | #[derive(Debug, Default, Clone)] 19 | pub struct Vm { 20 | // S is provided at execution time 21 | p: Vec, 22 | x: Vec>, 23 | i: Vec, 24 | } 25 | 26 | #[derive(Debug, PartialEq, Eq)] 27 | pub struct Script { 28 | ops: Box<[Op]>, 29 | } 30 | 31 | #[derive(Debug, Copy, Clone, PartialEq, Eq)] 32 | pub(crate) enum Op { 33 | IToX(I), // -> (X) 34 | //MExec(M), 35 | PAsI(P), // -> (I)? 36 | SAsLocalX(S), // -> (P, X)? 37 | SAsSemiX(S), // -> (X)? 38 | TSet(u8), 39 | XAsAssignX(X), // -> (X, X)? 40 | XAsI(X), // -> (I)? 41 | XEq(X, X), // -> ()? 42 | } 43 | 44 | type I = u8; 45 | type P = u8; 46 | type S = u8; 47 | type X = u8; 48 | type M = u8; 49 | 50 | /* 51 | macro manual_swap($t: ident, $x: expr, $y: expr) { 52 | let $t = $x; 53 | $x = $y; 54 | $y = $t; 55 | } 56 | 57 | // T_SET {END} 58 | 59 | // S_AS_LOCAL? S0 -> P0 X0 60 | // P_AS_I? P0 -> I0 61 | 62 | // S_AS_SEMI? S1 -> X1 63 | // X_AS_ASSIGN? X1 -> X2 X3 64 | // X_EQ? X2 X0 65 | 66 | // S_AS_SEMI? S2 -> X4 67 | // X_AS_ASSIGN? X4 -> X5 X6 68 | // X_EQ? X5 X3 69 | // I_TO_X I0 -> X7 70 | // X_EQ? X6 X7 71 | 72 | // M_EXEC M0 73 | // {END} 74 | */ 75 | 76 | pub fn manual_swap_script() -> Script { 77 | use self::Op::*; 78 | let ops = vec![ 79 | SAsLocalX(0), 80 | PAsI(0), 81 | SAsSemiX(1), 82 | XAsAssignX(1), 83 | XEq(2, 0), 84 | SAsSemiX(2), 85 | XAsAssignX(4), 86 | XEq(5, 3), 87 | IToX(0), 88 | XEq(6, 7), 89 | ].into_boxed_slice(); 90 | Script{ ops } 91 | } 92 | 93 | enum Control { 94 | Mark(u8), 95 | Jump, 96 | Continue, 97 | } 98 | 99 | impl Vm { 100 | pub fn new() -> Vm { 101 | Default::default() 102 | } 103 | 104 | /// Run the given script for the given block of statements. 105 | /// Any previous state is overwritten (but its storage is reused); 106 | /// reusing a Vm for the same script(s) minimizes allocation. 107 | pub fn run(&mut self, script: &Script, block: &[Stmt]) { 108 | self.reset(); 109 | let mut t = 0; 110 | let mut i = 0; 111 | loop { 112 | let op = match script.ops.get(i) { 113 | Some(op) => op, 114 | None => break, 115 | }; 116 | trace!("apply op#{} to {:?}", i, self); 117 | use self::Control::*; 118 | match op.apply(self, &block) { 119 | Continue => { i += 1 }, 120 | Mark(u8) => { t = i + usize::from(u8) + 1 }, 121 | Jump => { 122 | assert!(t > i); 123 | i = t; 124 | } 125 | } 126 | } 127 | } 128 | 129 | pub fn reset(&mut self) { 130 | self.p.clear(); 131 | self.x.clear(); 132 | self.i.clear(); 133 | } 134 | 135 | pub fn i(&self, i: I) -> &Ident { 136 | &self.i[usize::from(i)] 137 | } 138 | 139 | pub fn x(&self, x: X) -> &Expr { 140 | self.x[usize::from(x)].as_ref().unwrap() 141 | } 142 | 143 | pub fn p(&self, p: P) -> &Pat { 144 | &self.p[usize::from(p)] 145 | } 146 | } 147 | 148 | fn i_to_x(ident: Ident) -> Expr { 149 | let mut segments = syn::punctuated::Punctuated::new(); 150 | let arguments = syn::PathArguments::None; 151 | segments.push_value(syn::PathSegment { ident, arguments }); 152 | let leading_colon = None; 153 | let path = syn::Path { 154 | leading_colon, 155 | segments, 156 | }; 157 | let attrs = Vec::new(); 158 | let qself = None; 159 | (syn::ExprPath { path, attrs, qself }).into() 160 | } 161 | 162 | fn p_as_i(p: &Pat) -> Option { 163 | let p = match p { 164 | syn::Pat::Ident(p) => p, 165 | _ => return None, 166 | }; 167 | // TODO: other stuff in PatIdent? 168 | let syn::PatIdent{ ident, .. } = p; 169 | Some(ident.clone()) 170 | } 171 | 172 | fn s_as_local_x(s: &Stmt) -> Option<(Pat, Option)> { 173 | let lx = match s { 174 | Stmt::Local(local) => local, 175 | _ => return None, 176 | }; 177 | let syn::Local { pats, ty, init, .. } = lx; 178 | if pats.len() != 1 { 179 | // ? 180 | unimplemented!(); 181 | } 182 | let p = (*pats.first().unwrap().value()).clone(); 183 | let x = init.as_ref().map(|x| (&*x.1).clone()); 184 | Some((p, x)) 185 | } 186 | 187 | fn s_as_semi_x(s: &Stmt) -> Option { 188 | Some(match s { 189 | syn::Stmt::Semi(x, _) => x.clone(), 190 | _ => return None, 191 | }) 192 | } 193 | 194 | fn x_as_assign_x(x: &Expr) -> Option<(Expr, Expr)> { 195 | let (lhs, rhs) = match x { 196 | syn::Expr::Assign(syn::ExprAssign { left, right, .. }) => { 197 | (&**left, &**right) 198 | } 199 | _ => return None, 200 | }; 201 | Some((lhs.clone(), rhs.clone())) 202 | } 203 | 204 | fn x_as_i(x: &Expr) -> Option { 205 | let path = match x { 206 | syn::Expr::Path(syn::ExprPath { path, .. }) => path, 207 | _ => return None, 208 | }; 209 | if path.segments.len() != 1 { 210 | return None; 211 | } 212 | let syn::PathSegment { ident, arguments } = path.segments.first().unwrap().value(); 213 | if *arguments != syn::PathArguments::None { 214 | return None; 215 | } 216 | Some(ident.clone()) 217 | } 218 | 219 | fn block_s(block: &[Stmt], s: u8) -> &Stmt { 220 | &block[usize::from(s)] 221 | } 222 | 223 | impl Op { 224 | fn apply(&self, state: &mut Vm, block: &[Stmt]) -> Control { 225 | use self::Control::*; 226 | use self::Op::*; 227 | match *self { 228 | IToX(i) => { 229 | trace!("IToX(I{})", usize::from(i)); 230 | state.x.push(Some(i_to_x(state.i(i).clone()))); 231 | } 232 | PAsI(p) => { 233 | trace!("PAsI(P{})", usize::from(p)); 234 | match p_as_i(state.p(p)) { 235 | Some(i) => state.i.push(i), 236 | _ => return Jump, 237 | } 238 | } 239 | SAsLocalX(s) => { 240 | trace!("SAsLocalX(S{})", usize::from(s)); 241 | let (p, x) = match s_as_local_x(block_s(block, s)) { 242 | Some(px) => px, 243 | None => return Jump, 244 | }; 245 | state.p.push(p); 246 | //state.t.push(ty); 247 | state.x.push(x); 248 | } 249 | SAsSemiX(s) => { 250 | trace!("SAsSemiX(S{})", usize::from(s)); 251 | let x = match s_as_semi_x(block_s(block, s)) { 252 | Some(x) => x, 253 | _ => return Jump, 254 | }; 255 | state.x.push(Some(x)); 256 | } 257 | TSet(u8) => return Mark(u8), 258 | XAsAssignX(x) => { 259 | trace!("XAsAssignX(X{})", usize::from(x)); 260 | let (lhs, rhs) = match x_as_assign_x(state.x(x)) { 261 | Some(lr) => lr, 262 | _ => return Jump, 263 | }; 264 | state.x.push(Some(lhs)); 265 | state.x.push(Some(rhs)); 266 | } 267 | XAsI(x) => { 268 | let i = match x_as_i(state.x(x)) { 269 | Some(i) => i, 270 | _ => return Jump, 271 | }; 272 | state.i.push(i); 273 | } 274 | XEq(x0, x1) => { 275 | trace!("XEq(X{}, X{})", usize::from(x0), usize::from(x1)); 276 | if state.x(x0) != state.x(x1) { 277 | return Jump 278 | } 279 | } 280 | _ => unimplemented!(), 281 | } 282 | Continue 283 | } 284 | } 285 | -------------------------------------------------------------------------------- /old/script2.rs: -------------------------------------------------------------------------------- 1 | 2 | enum Op { 3 | // lowering ops 4 | PatAsIdent, 5 | StmtAsLocal, 6 | StmtAsItem, 7 | StmtAsExpr, 8 | StmtAsSemi, 9 | 10 | // terminals 11 | Ident 12 | } 13 | 14 | fn do_expr(x: &Expr) { 15 | } 16 | 17 | fn compile(stmts: &[syn::Stmt]) -> Script { 18 | for stmt in stmts { 19 | use syn::Stmt::*; 20 | match stmt { 21 | Local(syn::Local { attrs, pats, ty, init, .. }) => { 22 | ops.push(Op::StmtAsLocal); 23 | // attrs 24 | do_pats(pats); 25 | do_ty(ty); 26 | do_expr(init); 27 | 28 | ops.push(Op::StmtAsLocal(attrs.len(), pats.len(), ty.is_some(), init.is_some())); 29 | // ... 30 | 31 | // mutate to add optional fields 32 | ops.push(Op::StmtAsLocal(ty.is_some(), init.is_some())); 33 | push_pats(pats); 34 | // ... 35 | } 36 | Item(i) => { 37 | ops.push(Op::StmtAsItem); 38 | unimplemented!(); 39 | } 40 | Expr(x) => { 41 | ops.push(Op::StmtAsExpr); 42 | do_expr(Some(x)); 43 | } 44 | Semi(x, _) => { 45 | ops.push(Op::StmtAsSemi); 46 | do_expr(Some(x)); 47 | } 48 | } 49 | } 50 | } 51 | 52 | -------------------------------------------------------------------------------- /old/syntax.rs: -------------------------------------------------------------------------------- 1 | // x: Any / Option 2 | let $foo: $bar = $baz; 3 | 4 | // _: Option 5 | let $foo: $_ = $baz; 6 | // _: Option 7 | let $foo = $baz; 8 | // _: None 9 | let $foo: $! = $baz; 10 | // _: None 11 | let $foo = $baz; 12 | 13 | // literal without explicit wildcards: 14 | // ( signifies None) 15 | // _: Option 16 | #[$_] 17 | let $foo: $_ = $_; 18 | // _: None 19 | let $foo; 20 | // - this approach compiles straightforwardly 21 | // - "match this function" probably wants a loosening transform 22 | // - explicit wildcards are easy to understand 23 | 24 | // wildcard unless literal: 25 | // ( signifies unspecified) 26 | // _: Option 27 | let $foo; 28 | // _: None 29 | #[$!] 30 | let $foo: $! = $!; 31 | // - this is the only sane approach for attributes 32 | // - it would be nice if everything were consistent 33 | 34 | // hybrid: 35 | // - attributes are metadata; metadata is ignored by default 36 | // - terms are literal unless explicitly wildcarded 37 | // - explicit-wildcards is probably noisier in the common case, but it's also POLA 38 | // 39 | // out-of-band metadata: 40 | // - attributes 41 | // - mapping from serialization back to syntax (suitable for presenting syn objects to onmatch fn) 42 | 43 | let $foo: $bar = $baz; 44 | let $foo: $_ = $baz; 45 | let $foo: $! = $baz; 46 | 47 | let $t = $x; 48 | $x = $y; 49 | $y = $t; 50 | -------------------------------------------------------------------------------- /old/tokenstream_regex.md: -------------------------------------------------------------------------------- 1 | # TokenStream regex approach 2 | 3 | - need to match on syntactic features 4 | - but, simpler to compare sequences of uniform objects than trees with many 5 | types of node 6 | - so: tokens -> syntax -> tokens, to inject information about syntactic 7 | structure into the token layer 8 | 9 | syn: 10 | extend tokens to support wildcards/backreferences, grouping/repetition, 11 | extend syntax to support metavar bindings/backrefs 12 | - metavar impl of ToTokens produces suitable regex 13 | 14 | - this approach seems easy to implement 15 | - doesn't require special handling for every type of AST node 16 | 17 | problem: regex to recognize an Expr is hideous, no? 18 | - just create an AnyExpr token; don't need to lower it to an alternation of 19 | all concrete Expr kinds 20 | 21 | ## Challenges 22 | 23 | - metavar matching will be slow (have to parse an Expr to know when it ends!) 24 | - possible to parse once for structure? 25 | 26 | let $id: $ty = $expr; 27 | let $id (: $ty)? (= $expr)?; 28 | 29 | tokenization of a mvar depends on context! 30 | - normally, parent inserts delimiters based on present/not-present 31 | - mvars can be optional 32 | 33 | ------------------------------------------------ 34 | 35 | object-level: 36 | 37 | - AnyExpr/BindAnyExpr, AnyIdent/BindAnyIdent, ... 38 | 39 | Local { 40 | Vec, 41 | Seq, 42 | Option, 43 | Option, 44 | } 45 | - pattern needs to be able to specify: 46 | match exact attrs (incl none) / match any attrs (optionally bind) 47 | 48 | Local { 49 | Pattern>, 50 | Pattern>, 51 | Pattern>, 52 | Pattern>, 53 | } 54 | 55 | Pattern { 56 | Concrete(T), 57 | BindAny, // separate IgnoreAny? 58 | Backref(Backref), 59 | } 60 | 61 | Local: { 62 | attrs: [], 63 | pats: ?, 64 | type: None, 65 | init: Some(Expr: { ... }), 66 | } 67 | 68 | Local: { 69 | attrs: Pattern>::BindAny, 70 | pats: ?, 71 | type: Pattern>::BindAny, 72 | init: Pattern>::BindAny, 73 | } 74 | 75 | ``` 76 | Local{ attrs: ?_, pats: Pat{ id: ?t }, type: ?_, init: Expr{ ExprPath: [ PathSeg{ ident: ?x } ] } } 77 | Semi{ Expr{ ExprAssign{ pat: [ Pat{ id: ?x } ], expr: ?y } } } 78 | Semi{ Expr{ ExprAssign{ pat: [ Pat{ id: ?y } ], expr: Expr{ ExprPath: [ PathSeg{ ident: ?t } ] } } } } 79 | ``` 80 | 81 | serialize to self-describing TLV format 82 | - length allows wildcards/skipping 83 | - tags allow Pattern handling 84 | 85 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #RUSTFLAGS='--cfg procmacro2_semver_exempt' 2 | RUST_LOG=demo=trace,comacro=trace RUST_BACKTRACE=1 cargo run --example demo -- testcase/patterns.rs testcase/input.rs 3 | -------------------------------------------------------------------------------- /src/ast/mod.rs: -------------------------------------------------------------------------------- 1 | //! Operations on syn AST 2 | 3 | mod names; 4 | mod repr; 5 | mod visit; 6 | 7 | use self::visit::Visitor; 8 | 9 | use crate::trace::*; 10 | use crate::tokens::MetaContext; 11 | 12 | use proc_macro2::TokenStream; 13 | use syn::parse::Parse; 14 | 15 | #[derive(Debug)] 16 | pub struct Error; 17 | 18 | /*/ 19 | pub struct Error{ 20 | message: String, 21 | location: Option<(proc_macro2::LineColumn, proc_macro2::LineColumn)>, 22 | } 23 | 24 | impl Error { 25 | pub fn annotate(&self, src: &str) -> String { 26 | let start_line = self.location.as_ref().unwrap().0.line; 27 | let start_col = self.location.as_ref().unwrap().0.column; 28 | let src = src.lines().nth(start_line).expect("error location within source"); 29 | let pointer = "^"; 30 | format!("{}\n{}\n{:start_col$}", &self.message, &src, pointer, start_col = start_col) 31 | } 32 | } 33 | */ 34 | 35 | pub type Result = std::result::Result; 36 | 37 | 38 | #[derive(Default)] 39 | struct TraceGenerator { 40 | trace: Tracer, 41 | } 42 | impl Visitor<'_> for TraceGenerator { 43 | type Output = Trace; 44 | fn open_subtree(&mut self) { 45 | self.trace.open_subtree(); 46 | } 47 | fn close_subtree(&mut self) { 48 | self.trace.close_subtree(); 49 | } 50 | fn open_datum(&mut self) { 51 | self.trace.open_datum(); 52 | } 53 | fn close_datum(&mut self) { 54 | self.trace.close_datum(); 55 | } 56 | fn push_byte(&mut self, x: u8) { 57 | self.trace.push_byte(x); 58 | } 59 | fn extend_bytes(&mut self, x: &[u8]) { 60 | self.trace.extend_bytes(x); 61 | } 62 | fn finish(self) -> Self::Output { 63 | self.trace.finish() 64 | } 65 | } 66 | 67 | #[derive(Default)] 68 | struct IndexedTraceGenerator { 69 | trace: Tracer, 70 | } 71 | impl IndexedTraceGenerator { 72 | pub fn with_hint(hint: usize) -> Self { 73 | let mut trace = Tracer::default(); 74 | trace.expect_marks(hint); 75 | IndexedTraceGenerator { trace } 76 | } 77 | } 78 | impl Visitor<'_> for IndexedTraceGenerator { 79 | type Output = IndexedTrace; 80 | fn open_stmt(&mut self, _: &syn::Stmt) { 81 | self.trace.mark(); 82 | self.trace.open_subtree(); 83 | } 84 | fn open_subtree(&mut self) { 85 | self.trace.open_subtree(); 86 | } 87 | fn close_subtree(&mut self) { 88 | self.trace.close_subtree(); 89 | } 90 | fn open_datum(&mut self) { 91 | self.trace.open_datum(); 92 | } 93 | fn close_datum(&mut self) { 94 | self.trace.close_datum(); 95 | } 96 | fn push_byte(&mut self, x: u8) { 97 | self.trace.push_byte(x); 98 | } 99 | fn extend_bytes(&mut self, x: &[u8]) { 100 | self.trace.extend_bytes(x); 101 | } 102 | fn finish(self) -> Self::Output { 103 | self.trace.finish_with_indexes() 104 | } 105 | } 106 | 107 | struct Reconciler<'t> { 108 | trace: DeltaTracer<'t>, 109 | } 110 | impl<'t> Reconciler<'t> { 111 | pub fn new(trace: &'t Trace) -> Self { 112 | let trace = DeltaTracer::new(trace); 113 | Reconciler { trace } 114 | } 115 | } 116 | impl<'t, 'a> Visitor<'a> for Reconciler<'t> { 117 | type Output = Trace; 118 | fn close_ident(&mut self, x: &syn::Ident) { 119 | if let Ok(()) = self.trace.close_subtree() { 120 | return; 121 | } 122 | let x = x.to_string(); 123 | let x_id = x.trim_left_matches("IDENT_"); 124 | let x_expr = x.trim_left_matches("EXPR_"); 125 | if x_id != x { 126 | // replace current 127 | self.trace 128 | .new 129 | .replacement 130 | .push_mvar(u8::from_str_radix(x_id, 10).unwrap()); 131 | self.trace.new.rollback(0); 132 | self.trace.new.close_subtree(); 133 | } else if x_expr != x { 134 | // replace parent 135 | self.trace 136 | .new 137 | .replacement 138 | .push_mvar(u8::from_str_radix(x_expr, 10).unwrap()); 139 | self.trace.new.rollback(1); 140 | self.trace.new.close_subtree(); 141 | } else { 142 | unreachable!("no such placeholder"); 143 | } 144 | } 145 | 146 | fn open_subtree(&mut self) { 147 | self.trace.open_subtree().unwrap(); 148 | } 149 | fn close_subtree(&mut self) { 150 | self.trace.close_subtree().unwrap(); 151 | } 152 | fn open_datum(&mut self) { 153 | self.trace.open_datum(); 154 | } 155 | fn close_datum(&mut self) { 156 | self.trace.close_datum(); 157 | } 158 | fn push_byte(&mut self, x: u8) { 159 | self.trace.push_byte(x); 160 | } 161 | fn extend_bytes(&mut self, x: &[u8]) { 162 | self.trace.extend_bytes(x); 163 | } 164 | 165 | fn finish(self) -> Self::Output { 166 | self.trace.finish() 167 | } 168 | } 169 | 170 | #[derive(Clone)] 171 | pub enum Binding<'ast> { 172 | Expr(&'ast syn::Expr), 173 | Ident(&'ast syn::Ident), 174 | } 175 | 176 | pub struct Bindings<'ast> { 177 | pub binds: Vec>, 178 | } 179 | pub struct Binder<'ast, 't> { 180 | trace: ReTracer<'t>, 181 | bindings: Vec>>, 182 | } 183 | impl<'ast, 't> Binder<'ast, 't> { 184 | pub fn new(trace: ReTracer<'t>) -> Self { 185 | Binder { 186 | trace, 187 | bindings: Vec::new(), 188 | } 189 | } 190 | } 191 | 192 | impl<'ast, 't> Visitor<'ast> for Binder<'ast, 't> { 193 | type Output = Bindings<'ast>; 194 | 195 | fn open_expr(&mut self, expr: &'ast syn::Expr) -> std::result::Result<(), ()> { 196 | if let Err(()) = self.trace.open_subtree() { 197 | let x = usize::from(self.trace.consume_meta()) - 1; 198 | if x >= self.bindings.len() { 199 | self.bindings.resize(x + 1, None); 200 | } 201 | if self.bindings[x].is_none() { 202 | self.bindings[x] = Some(Binding::Expr(expr)); 203 | } 204 | return Err(()); 205 | } 206 | Ok(()) 207 | } 208 | fn open_ident(&mut self, ident: &'ast syn::Ident) -> std::result::Result<(), ()> { 209 | if let Err(()) = self.trace.open_subtree() { 210 | let x = usize::from(self.trace.consume_meta()) - 1; 211 | if x >= self.bindings.len() { 212 | self.bindings.resize(x + 1, None); 213 | } 214 | if self.bindings[x].is_none() { 215 | self.bindings[x] = Some(Binding::Ident(ident)); 216 | } 217 | return Err(()); 218 | } 219 | Ok(()) 220 | } 221 | 222 | fn open_subtree(&mut self) { 223 | self.trace.open_subtree().unwrap(); 224 | } 225 | fn close_subtree(&mut self) { 226 | self.trace.close_subtree().unwrap(); 227 | } 228 | fn open_datum(&mut self) { 229 | self.trace.open_datum(); 230 | } 231 | fn close_datum(&mut self) { 232 | self.trace.close_datum(); 233 | } 234 | fn push_byte(&mut self, x: u8) { 235 | self.trace.push_byte(x); 236 | } 237 | fn extend_bytes(&mut self, x: &[u8]) { 238 | self.trace.extend_bytes(x); 239 | } 240 | 241 | fn finish(self) -> Self::Output { 242 | self.trace.finish(); 243 | Bindings { 244 | binds: self.bindings.into_iter().map(|x| x.unwrap()).collect(), 245 | } 246 | } 247 | } 248 | 249 | struct Stmts(Vec); 250 | impl Parse for Stmts { 251 | fn parse(input: syn::parse::ParseStream) -> syn::parse::Result { 252 | Ok(Stmts(input.call(syn::Block::parse_within)?)) 253 | } 254 | } 255 | 256 | /// A pattern definition. 257 | pub enum PatternDef { 258 | StmtSeq { 259 | nodes: Vec, 260 | ids: Vec, 261 | }, 262 | Expr { 263 | nodes: syn::Expr, 264 | ids: syn::Expr, 265 | }, 266 | } 267 | 268 | impl PatternDef { 269 | pub fn parse(args: TokenStream, body: TokenStream) -> Result { 270 | let (nodes, ids) = MetaContext::new(args).apply(body); 271 | let nodes: Stmts = syn::parse2(nodes).map_err(|_| Error)?; 272 | let ids: Stmts = syn::parse2(ids).expect("if succeeded must"); 273 | let (mut nodes, mut ids) = (nodes.0, ids.0); 274 | if nodes.len() == 1 { 275 | if let syn::Stmt::Expr(_) = nodes[0] { 276 | if let (syn::Stmt::Expr(nodes), syn::Stmt::Expr(ids)) = 277 | (nodes.remove(0), ids.remove(0)) 278 | { 279 | return Ok(PatternDef::Expr { nodes, ids }); 280 | } 281 | unreachable!(); 282 | } 283 | } 284 | Ok(PatternDef::StmtSeq { nodes, ids }) 285 | } 286 | } 287 | 288 | /// A compiled pattern. 289 | pub enum Pattern { 290 | StmtSeq { trace: Trace }, 291 | Expr { trace: Trace }, 292 | } 293 | 294 | enum MatchesInner<'p, 'it> { 295 | StmtSeq { 296 | matches: crate::trace::ToplevelMatches<'p, 'it>, 297 | }, 298 | Expr { 299 | matches: crate::trace::InternalMatches<'p, 'it>, 300 | }, 301 | } 302 | 303 | #[derive(Debug)] 304 | pub struct Match { 305 | pub context: String, 306 | pub bindings: String, 307 | } 308 | 309 | pub struct Matches<'p, 'i, 'it> { 310 | inner: MatchesInner<'p, 'it>, 311 | pattern: &'p Trace, 312 | input: &'i [syn::Stmt], 313 | } 314 | 315 | impl Iterator for Matches<'_, '_, '_> { 316 | type Item = Match; 317 | fn next(&mut self) -> Option { 318 | match &mut self.inner { 319 | MatchesInner::StmtSeq { matches } => matches.next().map(|m| { 320 | let mut context = "[".to_owned(); 321 | let mut first = true; 322 | for s in &self.input[..m] { 323 | if !first { 324 | context.push_str(","); 325 | } 326 | first = false; 327 | context.push_str(&repr::input_json(s)); 328 | } 329 | if !first { 330 | context.push_str(","); 331 | } 332 | context.push_str("\"$1\""); 333 | for s in &self.input[m + self.pattern.toplevel_len()..] { 334 | context.push_str(","); 335 | context.push_str(&repr::input_json(s)); 336 | } 337 | context.push_str("]"); 338 | let binder = Binder::new(ReTracer::new(self.pattern)); 339 | let bindings = binder.visit(&self.input[m..m + self.pattern.toplevel_len()]); 340 | let bindings = repr::bindings_json(&bindings); 341 | Match { context, bindings } 342 | }), 343 | MatchesInner::Expr { matches } => matches.next().map(|m| { 344 | let context = repr::pattern_json(&m, self.input); 345 | let extracted = Binder::new(ReTracer::new(&m)).visit(self.input); 346 | let bindings = if let crate::ast::Binding::Expr(ex) = extracted.binds[0] { 347 | Binder::new(ReTracer::new(&self.pattern)).visit(ex) 348 | } else { 349 | unreachable!() 350 | }; 351 | 352 | let bindings = repr::bindings_json(&bindings); 353 | Match { context, bindings } 354 | }), 355 | } 356 | } 357 | } 358 | 359 | impl Pattern { 360 | pub fn matches<'p, 'i, 'it>( 361 | &'p self, 362 | input: &'i [syn::Stmt], 363 | input_trace: &'it crate::trace::IndexedTrace, 364 | ) -> Matches<'p, 'i, 'it> { 365 | let (inner, pattern) = match self { 366 | Pattern::StmtSeq { trace } => ( 367 | MatchesInner::StmtSeq { 368 | matches: trace.toplevel_matches(input_trace), 369 | }, 370 | trace, 371 | ), 372 | Pattern::Expr { trace } => ( 373 | MatchesInner::Expr { 374 | matches: trace.internal_matches(input_trace), 375 | }, 376 | trace, 377 | ), 378 | }; 379 | Matches { 380 | inner, 381 | pattern, 382 | input, 383 | } 384 | } 385 | } 386 | 387 | pub struct Input { 388 | pub stmts: Vec, 389 | } 390 | 391 | impl Input { 392 | pub fn parse(ts: TokenStream) -> Result { 393 | let stmts: Stmts = syn::parse2(ts).map_err(|_| Error)?; 394 | let stmts = stmts.0; 395 | Ok(Input { stmts }) 396 | } 397 | 398 | pub fn compile(&self) -> IndexedTrace { 399 | IndexedTraceGenerator::with_hint(self.stmts.len()).visit(&self.stmts[..]) 400 | } 401 | 402 | pub fn debug_tree_repr(&self) -> String { 403 | repr::pattern_json(&self.compile().deindex(), &self.stmts[..]) 404 | } 405 | } 406 | 407 | impl PatternDef { 408 | fn trace(&self) -> Trace { 409 | match self { 410 | PatternDef::StmtSeq { nodes, ids } => Reconciler::new(&TraceGenerator::apply(&nodes[..])).visit(&ids[..]), 411 | PatternDef::Expr { nodes, ids } => Reconciler::new(&TraceGenerator::apply(nodes)).visit(ids), 412 | } 413 | } 414 | 415 | pub fn compile(&self) -> Pattern { 416 | match self { 417 | PatternDef::StmtSeq { .. } => Pattern::StmtSeq { trace: self.trace() }, 418 | PatternDef::Expr { .. } => Pattern::Expr { trace: self.trace() }, 419 | } 420 | } 421 | 422 | pub fn debug_tree_repr(&self) -> String { 423 | match self { 424 | PatternDef::StmtSeq { ids, .. } => repr::pattern_json(&self.trace(), &ids[..]), 425 | PatternDef::Expr { ids, .. } => repr::pattern_json(&self.trace(), ids), 426 | } 427 | } 428 | 429 | pub fn debug_flat_repr(&self) -> String { 430 | match self { 431 | PatternDef::StmtSeq { ids, .. } => repr::pattern_flat(&self.trace(), &ids[..]), 432 | PatternDef::Expr { ids, .. } => repr::pattern_flat(&self.trace(), ids), 433 | } 434 | } 435 | 436 | pub fn fragment(&self) -> String { 437 | match self { 438 | PatternDef::StmtSeq { .. } => "StmtSeq".to_owned(), 439 | PatternDef::Expr { .. } => "Expr".to_owned(), 440 | } 441 | } 442 | } 443 | -------------------------------------------------------------------------------- /src/ast/names.rs: -------------------------------------------------------------------------------- 1 | pub(crate) trait Discrim { 2 | fn discrim(&self) -> &'static str; 3 | } 4 | 5 | impl Discrim for syn::Expr { 6 | fn discrim(&self) -> &'static str { 7 | use syn::Expr::*; 8 | match self { 9 | Box(..) => "Box", 10 | InPlace(..) => "InPlace", 11 | Array(..) => "Array", 12 | Call(..) => "Call", 13 | MethodCall(..) => "MethodCall", 14 | Tuple(..) => "Tuple", 15 | Binary(..) => "Binary", 16 | Unary(..) => "Unary", 17 | Lit(..) => "Lit", 18 | Cast(..) => "Cast", 19 | Type(..) => "Type", 20 | Let(..) => "Let", 21 | If(..) => "If", 22 | While(..) => "While", 23 | ForLoop(..) => "ForLoop", 24 | Loop(..) => "Loop", 25 | Match(..) => "Match", 26 | Closure(..) => "Closure", 27 | Unsafe(..) => "Unsafe", 28 | Block(..) => "Block", 29 | Assign(..) => "Assign", 30 | AssignOp(..) => "AssignOp", 31 | Field(..) => "Field", 32 | Index(..) => "Index", 33 | Range(..) => "Range", 34 | Path(..) => "Path", 35 | Reference(..) => "Reference", 36 | Break(..) => "Break", 37 | Continue(..) => "Continue", 38 | Return(..) => "Return", 39 | Macro(..) => "Macro", 40 | Struct(..) => "Struct", 41 | Repeat(..) => "Repeat", 42 | Paren(..) => "Paren", 43 | Group(..) => "Group", 44 | Try(..) => "Try", 45 | Async(..) => "Async", 46 | TryBlock(..) => "TryBlock", 47 | Yield(..) => "Yield", 48 | Verbatim(..) => "Verbatim", 49 | } 50 | } 51 | } 52 | 53 | impl Discrim for syn::Stmt { 54 | fn discrim(&self) -> &'static str { 55 | use syn::Stmt::*; 56 | match self { 57 | Local(..) => "Local", 58 | Item(..) => "Item", 59 | Expr(..) => "Expr", 60 | Semi(..) => "Semi", 61 | } 62 | } 63 | } 64 | 65 | impl Discrim for syn::Pat { 66 | fn discrim(&self) -> &'static str { 67 | use syn::Pat::*; 68 | match self { 69 | Wild(..) => "Wild", 70 | Ident(..) => "Ident", 71 | Struct(..) => "Struct", 72 | TupleStruct(..) => "TupleStruct", 73 | Path(..) => "Path", 74 | Tuple(..) => "Tuple", 75 | Box(..) => "Box", 76 | Ref(..) => "Ref", 77 | Lit(..) => "Lit", 78 | Range(..) => "Range", 79 | Slice(..) => "Slice", 80 | Macro(..) => "Macro", 81 | Verbatim(..) => "Verbatim", 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/ast/repr.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | use std::io::Write; 3 | 4 | use crate::ast::{Binding, Bindings}; 5 | use crate::ast::names::Discrim; 6 | use crate::ast::visit::{Visitor, Visitable}; 7 | use crate::trace::*; 8 | 9 | pub(crate) trait Emitter { 10 | fn meta(&mut self, x: u32); 11 | fn item(&mut self, s: impl Display); 12 | fn text_item(&mut self, s: impl Display); 13 | fn opener(&mut self, s: impl Display); 14 | fn closer(&mut self); 15 | fn maybe_break(&mut self) {} 16 | fn finish(self) -> Vec; 17 | } 18 | 19 | pub(crate) struct ReprEmitter { 20 | buf: std::io::Cursor>, 21 | sibling: bool, 22 | } 23 | 24 | impl ReprEmitter { 25 | pub fn new() -> Self { 26 | let buf = std::io::Cursor::new(Vec::new()); 27 | let sibling = false; 28 | ReprEmitter { buf, sibling } 29 | } 30 | 31 | fn maybe_comma(&mut self) { 32 | if self.sibling { 33 | self.comma(); 34 | } 35 | } 36 | 37 | fn comma(&mut self) { 38 | write!(self.buf, " ").unwrap(); 39 | } 40 | } 41 | 42 | impl Emitter for ReprEmitter { 43 | fn meta(&mut self, x: u32) { 44 | self.maybe_comma(); 45 | write!(self.buf, "${}", x); 46 | self.sibling = true; 47 | } 48 | 49 | fn item(&mut self, s: impl Display) { 50 | self.maybe_comma(); 51 | write!(self.buf, "{}", s); 52 | self.sibling = true; 53 | } 54 | 55 | fn text_item(&mut self, s: impl Display) { 56 | self.maybe_comma(); 57 | write!(self.buf, "{}", s); 58 | self.sibling = true; 59 | } 60 | 61 | fn opener(&mut self, s: impl Display) { 62 | self.maybe_comma(); 63 | write!(self.buf, "{}{{", s); 64 | self.sibling = true; 65 | } 66 | 67 | fn closer(&mut self) { 68 | self.maybe_comma(); 69 | write!(self.buf, "}}"); 70 | self.sibling = true; 71 | } 72 | 73 | fn maybe_break(&mut self) { 74 | if self.buf.position() != 0 { 75 | writeln!(self.buf); 76 | self.sibling = false; 77 | } 78 | } 79 | 80 | fn finish(self) -> Vec { 81 | self.buf.into_inner() 82 | } 83 | } 84 | 85 | pub(crate) struct JsonEmitter { 86 | buf: std::io::Cursor>, 87 | sibling: bool, 88 | scalar_context: bool, 89 | } 90 | 91 | impl JsonEmitter { 92 | pub fn new() -> Self { 93 | let mut buf = std::io::Cursor::new(Vec::new()); 94 | write!(buf, "["); 95 | let sibling = false; 96 | JsonEmitter { 97 | buf, 98 | sibling, 99 | scalar_context: false, 100 | } 101 | } 102 | 103 | pub fn new_scalar() -> Self { 104 | let buf = std::io::Cursor::new(Vec::new()); 105 | let sibling = false; 106 | JsonEmitter { 107 | buf, 108 | sibling, 109 | scalar_context: true, 110 | } 111 | } 112 | 113 | fn maybe_comma(&mut self) { 114 | if self.sibling { 115 | self.comma(); 116 | } 117 | } 118 | 119 | fn comma(&mut self) { 120 | write!(self.buf, ",").unwrap(); 121 | } 122 | } 123 | 124 | impl Emitter for JsonEmitter { 125 | fn finish(mut self) -> Vec { 126 | if !self.scalar_context { 127 | write!(self.buf, "]"); 128 | } 129 | self.buf.into_inner() 130 | } 131 | 132 | fn meta(&mut self, x: u32) { 133 | self.maybe_comma(); 134 | write!(self.buf, "\"${}\"", x); 135 | self.sibling = true; 136 | } 137 | 138 | fn item(&mut self, s: impl Display) { 139 | self.maybe_comma(); 140 | write!(self.buf, "{}", s); 141 | self.sibling = true; 142 | } 143 | 144 | fn text_item(&mut self, s: impl Display) { 145 | self.maybe_comma(); 146 | write!(self.buf, "\"{}\"", s); 147 | self.sibling = true; 148 | } 149 | 150 | fn opener(&mut self, s: impl Display) { 151 | self.maybe_comma(); 152 | write!(self.buf, "[\"{}\"", s); 153 | self.sibling = true; 154 | } 155 | 156 | fn closer(&mut self) { 157 | write!(self.buf, "]"); 158 | self.sibling = true; 159 | } 160 | } 161 | 162 | /// An AST visitor that compares compiled MatchCode for a pattern with the AST for that pattern's 163 | /// or tree to emit a representation of the pattern. 164 | pub(crate) struct ReprGenerator<'t, E> { 165 | emitter: E, 166 | trace: ReTracer<'t>, 167 | } 168 | 169 | impl<'t, E: Emitter> ReprGenerator<'t, E> { 170 | pub fn new(old: &'t Trace, emitter: E) -> Self { 171 | let trace = ReTracer::new(old); 172 | ReprGenerator { emitter, trace } 173 | } 174 | } 175 | 176 | impl<'t, 'ast, E: Emitter> Visitor<'ast> for ReprGenerator<'t, E> { 177 | type Output = String; 178 | 179 | fn open_expr(&mut self, x: &syn::Expr) -> Result<(), ()> { 180 | if let Err(()) = self.trace.open_subtree() { 181 | let x = u32::from(self.trace.consume_meta()); 182 | self.emitter.meta(x); 183 | return Err(()); 184 | } 185 | self.emitter.opener(x.discrim()); 186 | Ok(()) 187 | } 188 | fn open_ident(&mut self, x: &syn::Ident) -> Result<(), ()> { 189 | if let Err(()) = self.trace.open_subtree() { 190 | let x = u32::from(self.trace.consume_meta()); 191 | self.emitter.meta(x); 192 | return Err(()); 193 | } 194 | self.emitter.text_item(x); 195 | Ok(()) 196 | } 197 | fn open_stmt(&mut self, x: &syn::Stmt) { 198 | self.open_subtree(); 199 | self.emitter.maybe_break(); 200 | self.emitter.opener(x.discrim()); 201 | } 202 | fn open_pat(&mut self, x: &syn::Pat) { 203 | self.open_subtree(); 204 | self.emitter.opener(x.discrim()); 205 | } 206 | fn open_lit_int(&mut self, x: &syn::LitInt) { 207 | self.open_datum(); 208 | self.emitter.item(x.value()); 209 | } 210 | 211 | fn close_expr(&mut self, _: &syn::Expr) { 212 | self.close_subtree(); 213 | self.emitter.closer(); 214 | } 215 | fn close_stmt(&mut self, _: &syn::Stmt) { 216 | self.close_subtree(); 217 | self.emitter.closer(); 218 | } 219 | fn close_pat(&mut self, _: &syn::Pat) { 220 | self.close_subtree(); 221 | self.emitter.closer(); 222 | } 223 | 224 | fn open_subtree(&mut self) { 225 | self.trace.open_subtree().unwrap(); 226 | } 227 | fn close_subtree(&mut self) { 228 | self.trace.close_subtree().unwrap(); 229 | } 230 | fn open_datum(&mut self) { 231 | self.trace.open_datum(); 232 | } 233 | fn close_datum(&mut self) { 234 | self.trace.close_datum(); 235 | } 236 | fn push_byte(&mut self, x: u8) { 237 | self.trace.push_byte(x); 238 | } 239 | fn extend_bytes(&mut self, x: &[u8]) { 240 | self.trace.extend_bytes(x); 241 | } 242 | 243 | fn finish(self) -> Self::Output { 244 | String::from_utf8(self.emitter.finish()).unwrap() 245 | } 246 | } 247 | 248 | /// Serialize a normal AST (no metavars) 249 | pub(crate) struct PlainAstRepr { 250 | emitter: E, 251 | } 252 | 253 | impl PlainAstRepr { 254 | pub fn new(emitter: E) -> Self { 255 | PlainAstRepr { emitter } 256 | } 257 | } 258 | 259 | impl Visitor<'_> for PlainAstRepr { 260 | type Output = String; 261 | 262 | fn open_expr(&mut self, x: &syn::Expr) -> Result<(), ()> { 263 | self.emitter.opener(x.discrim()); 264 | Ok(()) 265 | } 266 | fn open_ident(&mut self, x: &syn::Ident) -> Result<(), ()> { 267 | self.emitter.text_item(x); 268 | Ok(()) 269 | } 270 | fn open_stmt(&mut self, x: &syn::Stmt) { 271 | self.emitter.maybe_break(); 272 | self.emitter.opener(x.discrim()); 273 | } 274 | fn open_pat(&mut self, x: &syn::Pat) { 275 | self.emitter.opener(x.discrim()); 276 | } 277 | fn open_lit_int(&mut self, x: &syn::LitInt) { 278 | self.emitter.item(x.value()); 279 | } 280 | 281 | fn close_expr(&mut self, _: &syn::Expr) { 282 | self.emitter.closer(); 283 | } 284 | fn close_stmt(&mut self, _: &syn::Stmt) { 285 | self.emitter.closer(); 286 | } 287 | fn close_pat(&mut self, _: &syn::Pat) { 288 | self.emitter.closer(); 289 | } 290 | 291 | fn open_subtree(&mut self) {} 292 | fn close_subtree(&mut self) {} 293 | fn open_datum(&mut self) {} 294 | fn close_datum(&mut self) {} 295 | fn push_byte(&mut self, x: u8) {} 296 | fn extend_bytes(&mut self, x: &[u8]) {} 297 | 298 | fn finish(self) -> Self::Output { 299 | String::from_utf8(self.emitter.finish()).unwrap() 300 | } 301 | } 302 | 303 | pub fn pattern_json<'a, 'v: 'a, V>(trace: &Trace, v: &'v V) -> String where V: Visitable<'a>+?Sized { 304 | ReprGenerator::new(trace, JsonEmitter::new()).visit(v) 305 | } 306 | 307 | pub fn pattern_flat<'a, 'v: 'a, V>(trace: &Trace, v: &'v V) -> String where V: Visitable<'a>+?Sized { 308 | ReprGenerator::new(trace, ReprEmitter::new()).visit(v) 309 | } 310 | 311 | pub fn input_json<'a, 'v: 'a, V>(v: &'v V) -> String where V: Visitable<'a>+?Sized { 312 | PlainAstRepr::new(JsonEmitter::new_scalar()).visit(v) 313 | } 314 | 315 | pub fn bindings_json(bindings: &'_ Bindings) -> String { 316 | let f = |b: &Binding| match b { 317 | Binding::Ident(i) => format!("[\"Ident\",\"{}\"]", i), 318 | Binding::Expr(x) => format!("[\"Expr\",{}]", input_json(*x)) 319 | }; 320 | let mut buf = "[".to_owned(); 321 | let mut binds = bindings.binds.iter(); 322 | if let Some(b) = binds.next() { 323 | buf.push_str(&f(b)); 324 | } 325 | for b in binds { 326 | buf.push_str(","); 327 | buf.push_str(&f(b)); 328 | } 329 | buf.push_str("]"); 330 | buf 331 | } 332 | -------------------------------------------------------------------------------- /src/ast/visit.rs: -------------------------------------------------------------------------------- 1 | use syn::visit::Visit; 2 | 3 | pub trait Visitor<'ast> { 4 | type Output; 5 | 6 | fn finish(self) -> Self::Output; 7 | 8 | fn open_expr(&mut self, _: &'ast syn::Expr) -> Result<(), ()> { 9 | self.open_subtree(); 10 | Ok(()) 11 | } 12 | fn open_ident(&mut self, _: &'ast syn::Ident) -> Result<(), ()> { 13 | self.open_subtree(); 14 | Ok(()) 15 | } 16 | fn open_stmt(&mut self, _: &'ast syn::Stmt) { 17 | self.open_subtree() 18 | } 19 | fn open_pat(&mut self, _: &'ast syn::Pat) { 20 | self.open_subtree() 21 | } 22 | fn open_lit_int(&mut self, _: &'ast syn::LitInt) { 23 | self.open_datum() 24 | } 25 | 26 | fn close_expr(&mut self, _: &'ast syn::Expr) { 27 | self.close_subtree() 28 | } 29 | fn close_ident(&mut self, _: &'ast syn::Ident) { 30 | self.close_subtree() 31 | } 32 | fn close_stmt(&mut self, _: &'ast syn::Stmt) { 33 | self.close_subtree() 34 | } 35 | fn close_pat(&mut self, _: &'ast syn::Pat) { 36 | self.close_subtree() 37 | } 38 | fn close_lit_int(&mut self, _: &'ast syn::LitInt) { 39 | self.close_datum() 40 | } 41 | 42 | fn open_subtree(&mut self); 43 | fn close_subtree(&mut self); 44 | fn open_datum(&mut self); 45 | fn close_datum(&mut self); 46 | fn push_byte(&mut self, x: u8); 47 | fn extend_bytes(&mut self, x: &[u8]); 48 | 49 | fn visit<'v: 'ast, V: Visitable<'ast>+?Sized>(mut self, v: &'v V) -> Self::Output where Self: Sized { 50 | v.apply(&mut self); 51 | self.finish() 52 | } 53 | 54 | fn apply<'v: 'ast, V: Visitable<'ast>+?Sized>(v: &'v V) -> Self::Output where Self: Sized+Default { 55 | let mut viz = ::default(); 56 | v.apply(&mut viz); 57 | viz.finish() 58 | } 59 | } 60 | 61 | pub trait Visitable<'a> { 62 | fn apply<'v, V: Visitor<'a>>(&'a self, v: &'v mut V); 63 | } 64 | 65 | impl<'a> Visitable<'a> for syn::Expr { 66 | fn apply<'v, V: Visitor<'a>>(&'a self, v: &'v mut V) { 67 | let mut v = SynVis { inner: v }; 68 | v.visit_expr(self); 69 | } 70 | } 71 | 72 | impl<'a> Visitable<'a> for syn::Stmt { 73 | fn apply<'v, V: Visitor<'a>>(&'a self, v: &'v mut V) { 74 | let mut v = SynVis { inner: v }; 75 | v.visit_stmt(self); 76 | } 77 | } 78 | 79 | impl<'a> Visitable<'a> for [syn::Stmt] { 80 | fn apply<'v, V: Visitor<'a>>(&'a self, v: &'v mut V) { 81 | let mut v = SynVis { inner: v }; 82 | for s in self { 83 | v.visit_stmt(s); 84 | } 85 | } 86 | } 87 | 88 | /* 89 | // TODO: make something like this work? 90 | impl<'a, 't, T> Visitable<'a> for T where T: AsRef<&'a [syn::Stmt]> { 91 | fn apply<'v, V: Visitor<'a>>(&'a self, v: &'v mut V) { 92 | self.as_ref().apply(v) 93 | } 94 | } 95 | */ 96 | 97 | /// Outer visitor ensures AST->Trace lowering is consistent for different traversal modes 98 | struct SynVis<'v, V> { 99 | inner: &'v mut V, 100 | } 101 | 102 | impl<'ast, 'v, V: Visitor<'ast>> Visit<'ast> for SynVis<'v, V> { 103 | fn visit_stmt(&mut self, x: &'ast syn::Stmt) { 104 | self.inner.open_stmt(x); 105 | self.inner 106 | .push_byte(unsafe { std::mem::transmute::<_, u64>(std::mem::discriminant(x)) } as u8); 107 | syn::visit::visit_stmt(self, x); 108 | self.inner.close_stmt(x); 109 | } 110 | 111 | fn visit_expr(&mut self, x: &'ast syn::Expr) { 112 | if let Err(()) = self.inner.open_expr(x) { 113 | return; 114 | } 115 | self.inner 116 | .push_byte(unsafe { std::mem::transmute::<_, u64>(std::mem::discriminant(x)) } as u8); 117 | syn::visit::visit_expr(self, x); 118 | self.inner.close_expr(x); 119 | } 120 | 121 | fn visit_pat(&mut self, x: &'ast syn::Pat) { 122 | self.inner.open_pat(x); 123 | syn::visit::visit_pat(self, x); 124 | self.inner.close_pat(x); 125 | } 126 | 127 | fn visit_ident(&mut self, x: &'ast syn::Ident) { 128 | if let Err(()) = self.inner.open_ident(x) { 129 | return; 130 | } 131 | self.inner.extend_bytes(x.to_string().as_bytes()); 132 | self.inner.close_ident(x); 133 | } 134 | 135 | fn visit_lit_int(&mut self, x: &'ast syn::LitInt) { 136 | self.inner.open_lit_int(x); 137 | // TODO: compact repr? 138 | self.inner.extend_bytes(format!("{}", x.value()).as_bytes()); 139 | self.inner.close_lit_int(x); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub use proc_macro2; 2 | pub mod trace; 3 | #[cfg(feature = "syn")] 4 | pub mod ast; 5 | 6 | mod tokens; 7 | -------------------------------------------------------------------------------- /src/tokens.rs: -------------------------------------------------------------------------------- 1 | /// A TokenStream can represent: 2 | /// 1: standard syntax 3 | /// 2: a Macros 2.0-style metavars declaration 4 | /// 3: (1) extended with metavars declared in a separate stream of type (2) 5 | /// 4: (1) extended with self-describing metavars 6 | /// This module supports using (2) + (3) to produce (4). 7 | use proc_macro2::{TokenStream, TokenTree}; 8 | 9 | use log::trace; 10 | use std::collections::HashMap; 11 | 12 | type DefMap = HashMap; 13 | 14 | #[derive(Debug)] 15 | enum MacBodyState { 16 | AwaitingDollar, 17 | AwaitingIdent, 18 | Cont, 19 | } 20 | 21 | struct MacBodyTransducer<'a, It, F> { 22 | ts: It, 23 | defs: &'a DefMap, 24 | state: MacBodyState, 25 | cont: Vec, 26 | tokenize: &'a F, 27 | } 28 | 29 | impl<'a, It, F> MacBodyTransducer<'a, It, F> { 30 | fn new(ts: It, defs: &'a DefMap, tokenize: &'a F) -> Self { 31 | let state = MacBodyState::AwaitingDollar; 32 | let cont = Vec::new(); 33 | MacBodyTransducer { 34 | ts, 35 | defs, 36 | state, 37 | cont, 38 | tokenize, 39 | } 40 | } 41 | } 42 | 43 | #[derive(Debug)] 44 | pub enum NodeType { 45 | Expr, 46 | Ident, 47 | } 48 | 49 | #[derive(Debug)] 50 | struct MetaDef { 51 | node: NodeType, 52 | id: u32, 53 | } 54 | 55 | impl MetaDef { 56 | // placeholder token that must: 57 | // - parse as the right syntactic type 58 | // - always differ from the corresponding id_token 59 | fn node_token(&self) -> TokenTree { 60 | match self.node { 61 | self::NodeType::Ident => { 62 | syn::Ident::new("__IDENT", proc_macro2::Span::call_site()).into() 63 | } 64 | self::NodeType::Expr => { 65 | syn::Ident::new("__EXPR", proc_macro2::Span::call_site()).into() 66 | } 67 | } 68 | } 69 | 70 | // placeholder token that must: 71 | // - always differ from the corresponding node_token 72 | // - parse as the right syntactic type 73 | // - encode a mvar id 74 | fn id_token(&self) -> TokenTree { 75 | match self.node { 76 | self::NodeType::Ident => syn::Ident::new( 77 | &format!("IDENT_{}", self.id), 78 | proc_macro2::Span::call_site(), 79 | ).into(), 80 | //self::NodeType::Expr => proc_macro2::Literal::u32_suffixed(self.id).into() 81 | self::NodeType::Expr => { 82 | syn::Ident::new(&format!("EXPR_{}", self.id), proc_macro2::Span::call_site()).into() 83 | } 84 | } 85 | } 86 | } 87 | 88 | impl<'a, It: Iterator, F: Fn(&MetaDef) -> TokenTree> Iterator 89 | for MacBodyTransducer<'a, It, F> 90 | { 91 | type Item = TokenTree; 92 | 93 | fn next(&mut self) -> Option { 94 | use self::MacBodyState::*; 95 | use proc_macro2::TokenTree::*; 96 | if let Cont = self.state { 97 | if self.cont.is_empty() { 98 | self.state = AwaitingDollar; 99 | } else { 100 | return self.cont.pop(); 101 | } 102 | } 103 | let tt = self.ts.next(); 104 | match (&self.state, tt) { 105 | (AwaitingDollar, Some(Punct(ref c))) if c.as_char() == '$' => { 106 | self.state = AwaitingIdent; 107 | self.next() 108 | } 109 | (AwaitingDollar, Some(Group(ref g))) => { 110 | let delim = g.delimiter(); 111 | let ts = MacBodyTransducer::new(g.stream().into_iter(), self.defs, self.tokenize) 112 | .collect(); 113 | Some(proc_macro2::Group::new(delim, ts).into()) 114 | } 115 | (AwaitingDollar, x) => x, 116 | (AwaitingIdent, Some(Ident(id))) => { 117 | self.state = AwaitingDollar; 118 | Some((self.tokenize)(&self.defs[&id.to_string()])) 119 | } 120 | (AwaitingIdent, Some(Punct(ref c))) if c.as_char() == '$' => { 121 | self.state = AwaitingDollar; 122 | Some(Punct(c.clone())) 123 | } 124 | (AwaitingIdent, _) => { 125 | panic!("macro body parse failure: after '$', expected one of: identifier, '$'") 126 | } 127 | (Cont, _) => unreachable!("Cont handled before advancing ts"), 128 | } 129 | } 130 | } 131 | 132 | pub struct MetaContext { 133 | bindings: DefMap, 134 | } 135 | 136 | fn parse_args(ts: TokenStream) -> DefMap { 137 | let mut ts = ts.into_iter(); 138 | let mut args = HashMap::new(); 139 | loop { 140 | use proc_macro2::TokenTree::*; 141 | match ts.next() { 142 | Some(Punct(ref c)) if c.as_char() == '$' => (), 143 | None => break, 144 | _ => panic!(), 145 | } 146 | let id = match ts.next() { 147 | Some(Ident(id)) => id.to_string(), 148 | _ => panic!(), 149 | }; 150 | match ts.next() { 151 | Some(Punct(ref c)) if c.as_char() == ':' => (), 152 | _ => panic!(), 153 | } 154 | let node = match ts.next() { 155 | Some(Ident(typ)) => typ, 156 | _ => panic!(), 157 | }; 158 | let node = match node.to_string().as_ref() { 159 | "ident" => self::NodeType::Ident, 160 | "expr" => self::NodeType::Expr, 161 | _ => panic!(), 162 | }; 163 | let def = MetaDef { 164 | node, 165 | id: (args.len() + 1) as u32, 166 | }; 167 | let prev_def = args.insert(id, def); 168 | assert!(prev_def.is_none()); 169 | match ts.next() { 170 | Some(Punct(ref c)) if c.as_char() == ',' => (), 171 | None => break, 172 | _ => panic!(), 173 | } 174 | } 175 | args 176 | } 177 | 178 | impl MetaContext { 179 | /// A macro's args declaration creates a context associating AST types with metavar names. 180 | pub fn new(ts: TokenStream) -> Self { 181 | let bindings = parse_args(ts); 182 | trace!("bindings={:?}", bindings); 183 | MetaContext { bindings } 184 | } 185 | 186 | pub fn apply(&self, ts: TokenStream) -> (TokenStream, TokenStream) { 187 | let nodes = 188 | MacBodyTransducer::new(ts.clone().into_iter(), &self.bindings, &MetaDef::node_token); 189 | let ids = MacBodyTransducer::new(ts.into_iter(), &self.bindings, &MetaDef::id_token); 190 | (nodes.collect(), ids.collect()) 191 | } 192 | } 193 | 194 | // for ergonomics, need to support $_ at least when its unambiguously parsable 195 | //let $baz: $_ = $foo; 196 | 197 | // macro type system 198 | /* 199 | item 200 | block 201 | stmt 202 | pat 203 | expr 204 | ty 205 | ident 206 | path 207 | lifetime 208 | meta: the contents of an attribute 209 | tt: a token tree (a single token by matching (), [], or {}) 210 | */ 211 | 212 | // equivalence classes are more composable than backrefs 213 | -------------------------------------------------------------------------------- /src/trace.rs: -------------------------------------------------------------------------------- 1 | //! A regex language for trees 2 | 3 | use log::trace; 4 | use std::fmt::Debug; 5 | 6 | #[derive(PartialEq, Eq, Clone)] 7 | pub struct Trace { 8 | buf: Vec, 9 | } 10 | 11 | const META: u8 = 255; 12 | const OPEN: u8 = 254; 13 | const CLOSE: u8 = 253; 14 | 15 | #[derive(Debug)] 16 | pub struct IndexedTrace { 17 | trace: Trace, 18 | indexes: Vec, 19 | } 20 | 21 | impl IndexedTrace { 22 | fn traces(&self) -> Traces { 23 | Traces { 24 | master: &self, 25 | i: 0, 26 | } 27 | } 28 | 29 | // XXX this should not need to exist 30 | pub fn deindex(self) -> Trace { 31 | self.trace 32 | } 33 | } 34 | 35 | struct Traces<'a> { 36 | master: &'a IndexedTrace, 37 | i: usize, 38 | } 39 | 40 | impl<'a> Iterator for Traces<'a> { 41 | type Item = (usize, Symbols<'a>); 42 | fn next(&mut self) -> Option { 43 | if let Some(&offs) = self.master.indexes.get(self.i) { 44 | let i = self.i; 45 | self.i += 1; 46 | Some(( 47 | i, 48 | Symbols { 49 | buf: &self.master.trace.buf[offs..], 50 | }, 51 | )) 52 | } else { 53 | None 54 | } 55 | } 56 | } 57 | 58 | pub struct ToplevelMatches<'a, 'b> { 59 | pattern: Symbols<'a>, 60 | inputs: Traces<'b>, 61 | } 62 | 63 | impl Iterator for ToplevelMatches<'_, '_> { 64 | type Item = usize; 65 | fn next(&mut self) -> Option { 66 | for (i, mut input) in self.inputs.by_ref() { 67 | if is_match(self.pattern.clone(), &mut input) { 68 | return Some(i); 69 | } 70 | } 71 | None 72 | } 73 | } 74 | 75 | #[derive(PartialEq, Eq, Debug)] 76 | enum Symbol { 77 | Meta(u8), 78 | Literal(u8), 79 | Open, 80 | Close, 81 | } 82 | 83 | #[derive(Clone)] 84 | struct Symbols<'a> { 85 | buf: &'a [u8], 86 | } 87 | 88 | impl<'a> Iterator for Symbols<'a> { 89 | type Item = Symbol; 90 | 91 | fn next(&mut self) -> Option { 92 | if let Some((&x, rest)) = self.buf.split_first() { 93 | if x == META { 94 | let ((&x, rest)) = rest.split_first().unwrap(); 95 | self.buf = rest; 96 | Some(match x { 97 | META => Symbol::Literal(META), 98 | OPEN => Symbol::Open, 99 | CLOSE => Symbol::Close, 100 | x => Symbol::Meta(x), 101 | }) 102 | } else { 103 | self.buf = rest; 104 | Some(Symbol::Literal(x)) 105 | } 106 | } else { 107 | None 108 | } 109 | } 110 | 111 | fn size_hint(&self) -> (usize, Option) { 112 | // from 0 to half the characters remaining, rounded down, are metas that won't become 113 | // independent symbols 114 | ((self.buf.len() + 1) / 2, Some(self.buf.len())) 115 | } 116 | } 117 | 118 | impl Trace { 119 | fn symbols(&self) -> Symbols { 120 | Symbols { buf: &self.buf } 121 | } 122 | } 123 | 124 | impl Debug for Trace { 125 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 126 | write!(f, "{:?}", self.symbols()) 127 | } 128 | } 129 | 130 | impl Debug for Symbols<'_> { 131 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 132 | for x in self.clone() { 133 | match x { 134 | Symbol::Literal(x) => write!(f, "{:02x}", x)?, 135 | Symbol::Meta(x) => write!(f, "${}", u32::from(x))?, 136 | Symbol::Open => write!(f, "[")?, 137 | Symbol::Close => write!(f, "]")?, 138 | } 139 | } 140 | Ok(()) 141 | } 142 | } 143 | 144 | #[derive(Default)] 145 | pub(crate) struct Tracer { 146 | buf: Vec, 147 | datum: Option, 148 | // IndexedTrace 149 | indexes: Vec, 150 | } 151 | 152 | impl Tracer { 153 | pub fn push_byte(&mut self, data: u8) { 154 | self.buf.push(data); 155 | if data == META { 156 | self.buf.push(data); 157 | } 158 | } 159 | 160 | pub fn extend_bytes(&mut self, data: &[u8]) { 161 | // TODO: less pessimal impl? 162 | for x in data { 163 | self.push_byte(*x); 164 | } 165 | } 166 | 167 | pub fn open_subtree(&mut self) { 168 | self.buf.push(META); 169 | self.buf.push(OPEN); 170 | } 171 | 172 | pub fn close_subtree(&mut self) { 173 | self.buf.push(META); 174 | self.buf.push(CLOSE); 175 | } 176 | 177 | pub fn open_datum(&mut self) { 178 | self.buf.push(0); 179 | assert!(self.datum.is_none()); 180 | self.datum = Some(self.buf.len() - 1); 181 | } 182 | 183 | pub fn close_datum(&mut self) { 184 | let open = self.datum.expect("open before closing"); 185 | self.datum = None; 186 | let diff = self.buf.len() - open; 187 | let smol_diff = diff as u8; 188 | assert_eq!(usize::from(smol_diff), diff); 189 | assert!(smol_diff != META); 190 | assert!(smol_diff != OPEN); 191 | assert!(smol_diff != CLOSE); 192 | self.buf[open] = smol_diff; 193 | } 194 | 195 | pub fn push_mvar(&mut self, mvar: u8) { 196 | assert!(mvar != META); 197 | assert!(mvar != OPEN); 198 | assert!(mvar != CLOSE); 199 | self.buf.push(META); 200 | self.buf.push(mvar); 201 | } 202 | 203 | pub fn finish(self) -> Trace { 204 | Trace { buf: self.buf } 205 | } 206 | } 207 | 208 | // basic tracer also builds IndexedTrace 209 | impl Tracer { 210 | pub fn expect_marks(&mut self, n: usize) { 211 | self.indexes.reserve_exact(n); 212 | } 213 | 214 | pub fn mark(&mut self) { 215 | let pos = self.buf.len(); 216 | self.indexes.push(pos); 217 | } 218 | 219 | pub fn finish_with_indexes(self) -> IndexedTrace { 220 | IndexedTrace { 221 | trace: Trace { buf: self.buf }, 222 | indexes: self.indexes, 223 | } 224 | } 225 | } 226 | 227 | /// instead of emitting a Trace, follow along with a previous, noting any differing subtrees 228 | pub struct ReTracer<'a> { 229 | buf: &'a [u8], 230 | datum: Option, 231 | i: usize, 232 | diff_depth: usize, 233 | } 234 | 235 | impl<'t> ReTracer<'t> { 236 | pub fn new(trace: &'t Trace) -> Self { 237 | let buf = &trace.buf; 238 | ReTracer { 239 | buf, 240 | datum: None, 241 | i: 0, 242 | diff_depth: 0, 243 | } 244 | } 245 | pub fn finish(self) { 246 | // it's a programming error to attempt to complete a trace inside a subtree 247 | assert!(self.datum.is_none()); 248 | assert_eq!(self.diff_depth, 0); 249 | // old trace is expected to match structurally, so this should also not happen in a 250 | // completed run 251 | assert_eq!(self.i, self.buf.len()); 252 | } 253 | 254 | pub fn push_byte(&mut self, data: u8) { 255 | if self.diff_depth != 0 { 256 | return; 257 | } 258 | if self.buf[self.i] != data { 259 | self.diff_depth = 1; 260 | return; 261 | } 262 | self.i += 1; 263 | if data == META { 264 | if self.buf[self.i] != META { 265 | self.diff_depth = 1; 266 | return; 267 | } 268 | self.i += 1; 269 | } 270 | } 271 | 272 | pub fn extend_bytes(&mut self, data: &[u8]) { 273 | // TODO: less pessimal impl? 274 | for x in data { 275 | self.push_byte(*x); 276 | } 277 | } 278 | 279 | /// on failure, nothing has been consumed 280 | pub fn open_subtree(&mut self) -> Result<(), ()> { 281 | //trace!("ReTracer::open_subtree: diff_depth={}", self.diff_depth); 282 | if self.diff_depth != 0 { 283 | self.diff_depth += 1; 284 | // if parent mismatched, our status is neither here nor there 285 | return Ok(()); 286 | } 287 | if self.buf[self.i] == META && self.buf[self.i + 1] == OPEN { 288 | self.i += 2; 289 | Ok(()) 290 | } else { 291 | Err(()) 292 | } 293 | } 294 | 295 | // if a mismatch occurs: 296 | // - consume the subtree 297 | // - count depth within dead subtree 298 | 299 | /// on failure, this mismatching subtree has been consumed 300 | pub fn close_subtree(&mut self) -> Result<(), ()> { 301 | //trace!("ReTracer::close_subtree: diff_depth={}", self.diff_depth); 302 | if self.diff_depth == 0 { 303 | if self.buf[self.i] != META || self.buf[self.i + 1] != CLOSE { 304 | self.diff_depth = 1; 305 | } else { 306 | self.i += 2; 307 | return Ok(()); 308 | } 309 | } 310 | while self.diff_depth > 0 { 311 | if self.buf[self.i] == META || self.buf[self.i + 1] == CLOSE { 312 | self.i += 2; 313 | self.diff_depth -= 1; 314 | } else { 315 | self.i += 1; 316 | } 317 | } 318 | Err(()) 319 | } 320 | 321 | pub fn open_datum(&mut self) { 322 | assert!(self.datum.is_none()); 323 | self.datum = Some(self.i + usize::from(self.buf[self.i])); 324 | self.i += 1; 325 | } 326 | 327 | pub fn close_datum(&mut self) { 328 | assert_eq!(self.datum, Some(self.i)); 329 | self.datum = None; 330 | } 331 | 332 | pub fn consume_meta(&mut self) -> u8 { 333 | let x = self.buf[self.i]; 334 | assert_eq!(x, META); 335 | self.i += 1; 336 | let x = self.buf[self.i]; 337 | assert!(x != META); 338 | assert!(x != OPEN); 339 | assert!(x != CLOSE); 340 | self.i += 1; 341 | x 342 | } 343 | } 344 | 345 | /// transactional Tracer builder that can >/dev/null the current subtree to a specified depth and 346 | /// emit a replacement 347 | #[derive(Default)] 348 | pub(crate) struct TxTracer { 349 | trace: Tracer, 350 | pub replacement: Tracer, 351 | stack: Vec, 352 | rollbacks: usize, 353 | } 354 | 355 | impl TxTracer { 356 | pub fn new(trace: Tracer) -> Self { 357 | TxTracer { 358 | trace, 359 | replacement: Tracer::default(), 360 | stack: Vec::new(), 361 | rollbacks: 0, 362 | } 363 | } 364 | pub fn finish(self) -> Trace { 365 | assert!(self.stack.is_empty()); 366 | assert!(self.replacement.buf.is_empty()); 367 | assert_eq!(self.rollbacks, 0); 368 | self.trace.finish() 369 | } 370 | 371 | pub fn push_byte(&mut self, data: u8) { 372 | self.trace.push_byte(data); 373 | } 374 | pub fn extend_bytes(&mut self, data: &[u8]) { 375 | self.trace.extend_bytes(data); 376 | } 377 | pub fn open_datum(&mut self) { 378 | self.trace.open_datum(); 379 | } 380 | pub fn close_datum(&mut self) { 381 | self.trace.close_datum(); 382 | } 383 | 384 | pub fn open_subtree(&mut self) { 385 | self.stack.push(self.trace.buf.len()); 386 | self.trace.open_subtree(); 387 | //trace!("TxTracer::open_subtree: {:?}", self.trace); 388 | } 389 | pub fn close_subtree(&mut self) { 390 | self.trace.close_subtree(); 391 | let start = self.stack.pop().unwrap(); 392 | if self.rollbacks > 0 { 393 | //trace!("TxTracer::close_subtree: rollback: before: {:?}", self.trace); 394 | self.trace.buf.truncate(start); 395 | self.rollbacks -= 1; 396 | //trace!("TxTracer::close_subtree: rollback: after: {:?}", self.trace); 397 | if self.rollbacks == 0 { 398 | self.trace.buf.extend(&self.replacement.buf); 399 | self.replacement.buf.clear(); 400 | } 401 | //trace!("TxTracer::close_subtree: rollback: splice: {:?}", self.trace); 402 | } else { 403 | //trace!("TxTracer::close_subtree: {:?}", self.trace); 404 | } 405 | } 406 | pub fn rollback(&mut self, depth: usize) { 407 | assert_eq!(self.rollbacks, 0); 408 | self.rollbacks = depth + 1; 409 | //trace!("TxTracer::rollback: {}", self.rollbacks); 410 | } 411 | } 412 | 413 | /// build a new Tracer, following along with a previous trace and noting differences 414 | pub(crate) struct DeltaTracer<'t> { 415 | old: ReTracer<'t>, 416 | pub new: TxTracer, 417 | } 418 | 419 | impl<'t> DeltaTracer<'t> { 420 | pub fn new(old: &'t Trace) -> Self { 421 | let old_len = old.buf.len(); 422 | let old = ReTracer::new(old); 423 | let new = TxTracer::new(Tracer { 424 | buf: Vec::with_capacity(old_len), 425 | ..Tracer::default() 426 | }); 427 | DeltaTracer { old, new } 428 | } 429 | pub fn finish(self) -> Trace { 430 | self.old.finish(); 431 | self.new.finish() 432 | } 433 | 434 | pub fn push_byte(&mut self, data: u8) { 435 | self.old.push_byte(data); 436 | self.new.push_byte(data); 437 | } 438 | 439 | pub fn extend_bytes(&mut self, data: &[u8]) { 440 | self.old.extend_bytes(data); 441 | self.new.extend_bytes(data); 442 | } 443 | 444 | pub fn open_subtree(&mut self) -> Result<(), ()> { 445 | self.old.open_subtree()?; 446 | self.new.open_subtree(); 447 | Ok(()) 448 | } 449 | 450 | pub fn close_subtree(&mut self) -> Result<(), ()> { 451 | self.old.close_subtree()?; 452 | self.new.close_subtree(); 453 | Ok(()) 454 | } 455 | 456 | pub fn open_datum(&mut self) { 457 | self.old.open_datum(); 458 | self.new.open_datum(); 459 | } 460 | 461 | pub fn close_datum(&mut self) { 462 | self.old.close_datum(); 463 | self.new.close_datum(); 464 | } 465 | } 466 | 467 | impl Trace { 468 | pub fn toplevel_len(&self) -> usize { 469 | let mut syms = self.symbols(); 470 | let mut n = 0; 471 | while let Some(s) = syms.next() { 472 | if let Symbol::Open = s { 473 | n += 1; 474 | close_subtree(&mut syms); 475 | } 476 | } 477 | n 478 | } 479 | } 480 | 481 | impl Trace { 482 | /// Test for an (anchored) match 483 | pub fn is_match(&self, input: &Trace) -> bool { 484 | // performance: parsing symbols is probably slow. Faster to compare optimistically, and 485 | // backtrack 1 byte on mismatch to check meta status 486 | is_match(self.symbols(), &mut input.symbols()) 487 | } 488 | 489 | /// Return iterator of top-level matches 490 | pub fn toplevel_matches<'s, 'i>(&'s self, input: &'i IndexedTrace) -> ToplevelMatches<'s, 'i> { 491 | ToplevelMatches { 492 | pattern: self.symbols(), 493 | inputs: input.traces(), 494 | } 495 | } 496 | 497 | /// Search the input for this pattern at any depth. Return an iterator that, for each match, 498 | /// yields Trace that will show a visitor where to find the matching expression in the tree. 499 | pub fn internal_matches<'s, 'i>(&'s self, input: &'i IndexedTrace) -> InternalMatches<'s, 'i> { 500 | let in_len = input.trace.buf.len(); 501 | let input = input.trace.symbols(); 502 | InternalMatches::new(self.symbols(), input, in_len) 503 | } 504 | } 505 | 506 | pub struct InternalMatches<'p, 'i> { 507 | pattern: Symbols<'p>, 508 | orig_input: Symbols<'i>, 509 | input: Symbols<'i>, 510 | in_len: usize, 511 | } 512 | 513 | impl<'p, 'i> InternalMatches<'p, 'i> { 514 | fn new(pattern: Symbols<'p>, input: Symbols<'i>, in_len: usize) -> Self { 515 | // Degenerate case, should prevent at pattern compilation time. 516 | assert!(!pattern.buf.is_empty()); 517 | InternalMatches { 518 | pattern, 519 | orig_input: input.clone(), 520 | input, 521 | in_len, 522 | } 523 | } 524 | } 525 | 526 | impl<'p, 'i> Iterator for InternalMatches<'p, 'i> { 527 | type Item = Trace; 528 | 529 | fn next(&mut self) -> Option { 530 | // A pattern can't match anything shorter than itself. 531 | while self.input.buf.len() >= self.pattern.buf.len() { 532 | let remaining = self.input.buf.len(); 533 | let mut here = self.input.clone(); 534 | // Consume a symbol. 535 | self.input.next().unwrap(); 536 | // Performance note in is_match applies times N here. 537 | if is_match(self.pattern.clone(), &mut here) { 538 | let pos = self.in_len - remaining; 539 | let mut tracer = Tracer::default(); 540 | tracer.buf.extend_from_slice(&self.orig_input.buf[..pos]); 541 | tracer.push_mvar(1); 542 | tracer.buf.extend_from_slice(&here.buf); 543 | let trace = tracer.finish(); 544 | trace!("reconstructed: {:?}", trace); 545 | return Some(trace); 546 | } 547 | } 548 | None 549 | } 550 | } 551 | 552 | /////// The Matching. Expected Hot Path < n += 1, 560 | Symbol::Close => n -= 1, 561 | _ => (), 562 | } 563 | } 564 | } 565 | 566 | const CLOSER_LEN: usize = 2; 567 | 568 | // NB. for efficiency, caller should handle quitting when input is shorter than pattern 569 | fn is_match(pattern: Symbols, input: &mut Symbols) -> bool { 570 | //trace!("is_match:\n\tptn: {:?}\n\tinp: {:?}", pattern.clone(), input.clone()); 571 | let mut bindings = Vec::new(); 572 | for p in pattern { 573 | match (p, input.next()) { 574 | (ref x, Some(ref y)) if x == y => (), 575 | (Symbol::Meta(x), Some(Symbol::Open)) => { 576 | let x = x as usize; 577 | if x >= bindings.len() { 578 | bindings.resize(x + 1, None); 579 | } 580 | if let Some(matched) = bindings[x] { 581 | if !input.buf.starts_with(matched) { 582 | //trace!("is_match: false; binding differed: {}", x); 583 | return false; 584 | } 585 | //trace!("is_match: binding rematched: {}", x); 586 | let (_, rest) = input.buf.split_at(matched.len() + CLOSER_LEN); 587 | input.buf = rest; 588 | } else { 589 | // consume the subtree and remember its dimensions 590 | //trace!("is_match: bound a binding: {}", x); 591 | let (from_match, from_match_len) = (input.clone(), input.buf.len()); 592 | close_subtree(input); 593 | let subtree_len = from_match_len - input.buf.len() - CLOSER_LEN; 594 | let (matched, _) = from_match.buf.split_at(subtree_len); 595 | bindings[x] = Some(matched); 596 | } 597 | } 598 | (ref _x, ref _y) => { 599 | //trace!("is_match: false; structure differed {:02x?} // {:02x?}", _x, _y); 600 | return false; 601 | } 602 | } 603 | } 604 | //trace!("is_match: true!"); 605 | true 606 | } 607 | -------------------------------------------------------------------------------- /testcase/input.rs: -------------------------------------------------------------------------------- 1 | fn baz() { 2 | let mut thing1 = 23; 3 | let mut thing2 = 42; 4 | 5 | // let's manually swap some stuff! 6 | let temp = thing1; 7 | thing1 = thing2; 8 | thing2 = temp; 9 | 10 | // there are two things 11 | let _ = thing1 + thing2; 12 | 13 | // now let's do things with parentheses! 14 | let something_else_happening = (thing1 + thing1); 15 | } 16 | -------------------------------------------------------------------------------- /testcase/patterns.rs: -------------------------------------------------------------------------------- 1 | macro manual_swap($t: ident, $x: expr, $y: expr) { 2 | let $t = $x; 3 | $x = $y; 4 | $y = $t; 5 | } 6 | 7 | macro test_bracketed($x:expr) { 8 | ($x + $x) 9 | } 10 | 11 | macro test_bracketed2($x:expr, $y:ident) { 12 | ($x + $y) 13 | } 14 | --------------------------------------------------------------------------------