├── .gitignore ├── Cargo.toml ├── README.md ├── datapond-derive ├── Cargo.toml ├── src │ └── lib.rs └── tests │ ├── fail │ ├── arg_mismatch.rs │ ├── arg_mismatch.stderr │ ├── kwargs.rs │ └── kwargs.stderr │ ├── pass │ ├── example1.rs │ ├── kwargs.rs │ ├── missing_args.rs │ ├── negation.rs │ ├── simple1.rs │ └── transitive_closure.rs │ └── test.rs ├── datapond-macro ├── Cargo.toml └── src │ └── lib.rs ├── examples └── generate_skeleton.rs ├── src ├── ast.rs ├── data_structures.rs ├── generator.rs ├── generator_new │ ├── ast.rs │ ├── encode.rs │ ├── mod.rs │ └── to_tokens.rs ├── lib.rs ├── parser │ ├── ast.rs │ └── mod.rs └── typechecker.rs └── tests ├── cspa_rules.rs ├── flow_sensitive_equality_rules.rs └── naive_rules.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datapond" 3 | version = "0.1.0" 4 | authors = ["lqd"] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | rustc-hash = "1.0.0" 11 | syn = { version = "1.0.8", features = ["extra-traits"] } 12 | proc-macro2 = { version = "1.0.6", features = ["span-locations"] } 13 | quote = "1.0.2" 14 | log = "0.4.8" 15 | 16 | [dev-dependencies] 17 | datafrog = "2.0.0" 18 | pretty_assertions = "0.6.1" 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Datapond 2 | 3 | A set of tools for working with [datafrog](https://github.com/rust-lang/datafrog) (until they are ready to be upstreamed there). 4 | 5 | All these are made to be used in the limited context of [Polonius](https://github.com/rust-lang/polonius): not in general for all datalog computations. (Not to mention, datafrog itself has its peculiarities, and API requirements) 6 | 7 | Currently containing: 8 | 9 | 1) a datalog-to-datafrog prototyping generator 10 | 2) some bugs 11 | 12 | In the future, possibly: 13 | - provenance information for generated tuples, and tracing provenance for a target tuple 14 | - some kind of rule transformations (probably Demand Transformation) 15 | 16 | --- 17 | 18 | ### Datalog-to-datafrog prototyping generator 19 | 20 | For _prototyping_ purposes, this will help generate a skeleton of a datafrog computation, from a set of Soufflé-like declarations, and _valid_ datalog rules (there is basically no error checking in the parser yet). 21 | 22 | The generated skeleton should (hopefully) build as-is, but won't know where to get the data required by the relations. So it can help bootstrap using datafrog, and maybe get 80-90% of the way there. 23 | 24 | In the Polonius context, this was used to bootstrap the `Naive` rules variant (the simpler variant) successfully just by filling the relations with data. 25 | 26 | 27 | Using a set of declarations like these: 28 | 29 | ```prolog 30 | input borrow_region(O: Origin, L: Loan, P: Point) 31 | input cfg_edge(P: Point, Q: Point) 32 | input killed(L: Loan, P: Point) 33 | input outlives(O1: Origin, O2: Origin, P: Point) 34 | input region_live_at(O: Origin, P: Point) 35 | internal subset(O1: Origin, O2: Origin, P: Point) 36 | internal requires(O: Origin, L: Loan, P: Point) 37 | internal borrow_live_at(L: Loan, P: Point) 38 | input invalidates(L: Loan, P: Point) 39 | output errors(L: Loan, P: Point) 40 | ``` 41 | 42 | And the `Naive` polonius rules: 43 | 44 | ```prolog 45 | // R1 46 | subset(O1, O2, P) :- 47 | outlives(O1, O2, P). 48 | 49 | // R2 50 | subset(O1, O3, P) :- 51 | subset(O1, O2, P), 52 | subset(O2, O3, P). 53 | 54 | // R3 55 | subset(O1, O2, Q) :- 56 | subset(O1, O2, P), 57 | cfg_edge(P, Q), 58 | region_live_at(O1, Q), 59 | region_live_at(O2, Q). 60 | 61 | // R4 62 | requires(O, L, P) :- 63 | borrow_region(O, L, P). 64 | 65 | // R5 66 | requires(O2, L, P) :- 67 | requires(O1, L, P), 68 | subset(O1, O2, P). 69 | 70 | // R6 71 | requires(O, L, Q) :- 72 | requires(O, L, P), 73 | !killed(L, P), 74 | cfg_edge(P, Q), 75 | region_live_at(O, Q). 76 | 77 | // R7 78 | borrow_live_at(L, P) :- 79 | requires(O, L, P), 80 | region_live_at(O, P). 81 | 82 | // R8 83 | errors(L, P) :- 84 | borrow_live_at(L, P), 85 | invalidates(L, P). 86 | ``` 87 | 88 | The `datapond::generate_skeleton_datafrog` function will generate this piece of code (`rustfmt`-ed here) 89 |
90 | 91 | ```rust 92 | // Extensional predicates, and their indices 93 | 94 | let borrow_region: Relation<(Origin, Loan, Point)> = Vec::new().into(); 95 | 96 | // Note: `cfg_edge_p` is an indexed version of the input facts `cfg_edge` 97 | let cfg_edge_p: Relation<(Point, Point)> = Vec::new().into(); 98 | 99 | let invalidates: Relation<((Loan, Point), ())> = Vec::new().into(); 100 | let killed: Relation<(Loan, Point)> = Vec::new().into(); 101 | let outlives: Relation<(Origin, Origin, Point)> = Vec::new().into(); 102 | let region_live_at: Relation<((Origin, Point), ())> = Vec::new().into(); 103 | 104 | // `errors` inferred as the output relation 105 | let errors = { 106 | let mut iteration = Iteration::new(); 107 | 108 | // Intensional predicates, and their indices 109 | 110 | let borrow_live_at = iteration.variable::<((Loan, Point), ())>("borrow_live_at"); 111 | let errors = iteration.variable::<(Loan, Point)>("errors"); 112 | let requires = iteration.variable::<(Origin, Loan, Point)>("requires"); 113 | 114 | // Note: `requires_lp` is an indexed version of the `requires` relation 115 | let requires_lp = iteration.variable::<((Loan, Point), Origin)>("requires_lp"); 116 | 117 | // Note: `requires_op` is an indexed version of the `requires` relation 118 | let requires_op = iteration.variable::<((Origin, Point), Loan)>("requires_op"); 119 | let requires_step_6_1 = iteration.variable("requires_step_6_1"); 120 | let requires_step_6_2 = iteration.variable("requires_step_6_2"); 121 | let subset = iteration.variable::<(Origin, Origin, Point)>("subset"); 122 | 123 | // Note: `subset_o1p` is an indexed version of the `subset` relation 124 | let subset_o1p = iteration.variable::<((Origin, Point), Origin)>("subset_o1p"); 125 | 126 | // Note: `subset_o2p` is an indexed version of the `subset` relation 127 | let subset_o2p = iteration.variable::<((Origin, Point), Origin)>("subset_o2p"); 128 | 129 | // Note: `subset_p` is an indexed version of the `subset` relation 130 | let subset_p = iteration.variable::<(Point, (Origin, Origin))>("subset_p"); 131 | let subset_step_3_1 = iteration.variable("subset_step_3_1"); 132 | let subset_step_3_2 = iteration.variable("subset_step_3_2"); 133 | 134 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 135 | subset.extend(outlives.iter().clone()); 136 | 137 | // R04: requires(O, L, P) :- borrow_region(O, L, P). 138 | requires.extend(borrow_region.iter().clone()); 139 | 140 | while iteration.changed() { 141 | // Index maintenance 142 | requires_op.from_map(&requires, |&(o, l, p)| ((o, p), l)); 143 | requires_lp.from_map(&requires, |&(o, l, p)| ((l, p), o)); 144 | subset_o2p.from_map(&subset, |&(o1, o2, p)| ((o2, p), o1)); 145 | subset_o1p.from_map(&subset, |&(o1, o2, p)| ((o1, p), o2)); 146 | subset_p.from_map(&subset, |&(o1, o2, p)| (p, (o1, o2))); 147 | 148 | // Rules 149 | 150 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 151 | // `outlives` is a static input, already loaded into `subset`. 152 | 153 | // R02: subset(O1, O3, P) :- subset(O1, O2, P), subset(O2, O3, P). 154 | subset.from_join(&subset_o2p, &subset_o1p, |&(_o2, p), &o1, &o3| (o1, o3, p)); 155 | 156 | // R03: subset(O1, O2, Q) :- subset(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 157 | subset_step_3_1.from_join(&subset_p, &cfg_edge_p, |&_p, &(o1, o2), &q| ((o1, q), o2)); 158 | subset_step_3_2.from_join(&subset_step_3_1, ®ion_live_at, |&(o1, q), &o2, _| { 159 | ((o2, q), o1) 160 | }); 161 | subset.from_join(&subset_step_3_2, ®ion_live_at, |&(o2, q), &o1, _| { 162 | (o1, o2, q) 163 | }); 164 | 165 | // R04: requires(O, L, P) :- borrow_region(O, L, P). 166 | // `borrow_region` is a static input, already loaded into `requires`. 167 | 168 | // R05: requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 169 | requires.from_join(&requires_op, &subset_o1p, |&(_o1, p), &l, &o2| (o2, l, p)); 170 | 171 | // R06: requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 172 | requires_step_6_1.from_antijoin(&requires_lp, &killed, |&(l, p), &o| (p, (l, o))); 173 | requires_step_6_2.from_join(&requires_step_6_1, &cfg_edge_p, |&_p, &(l, o), &q| { 174 | ((o, q), l) 175 | }); 176 | requires.from_join(&requires_step_6_2, ®ion_live_at, |&(o, q), &l, _| { 177 | (o, l, q) 178 | }); 179 | 180 | // R07: borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 181 | borrow_live_at.from_join(&requires_op, ®ion_live_at, |&(_o, p), &l, _| { 182 | ((l, p), ()) 183 | }); 184 | 185 | // R08: errors(L, P) :- borrow_live_at(L, P), invalidates(L, P). 186 | errors.from_join(&borrow_live_at, &invalidates, |&(l, p), _, _| (l, p)); 187 | } 188 | 189 | errors.complete() 190 | }; 191 | ``` 192 | 193 |
194 | 195 | 196 | More detailed examples, showing usage and output, can also be found in the [examples](./examples) and [tests](./tests) directories. 197 | 198 | Currently not supported: 199 | - purely extensional joins in rules (datafrog is mostly made for intensional predicate computations), maybe later but we're not using those right now. 200 | - datafrog's API requires intensional predicates to be first in join steps, there should be a warning/error for datalog rules not following this pattern. The skeleton generator will generate a "correct join" (with respect to key and values tuples) but it will not compile. The 2 predicates can easily be swapped if that happens. 201 | - populating extensional indices (unlike intensional indices). 202 | - some kinds of self joins. 203 | - leapjoins: those are mostly optimisations over regular joins. Since the purpose of this generator is to easily try different rules and transformations, they can be added later (or manually after generating the skeleton). -------------------------------------------------------------------------------- /datapond-derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datapond-derive" 3 | version = "0.1.0" 4 | authors = ["Vytautas Astrauskas "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | proc-macro-hack = "0.5" 9 | datapond-macro = { path = "../datapond-macro" } 10 | 11 | [dev-dependencies] 12 | trybuild = "1.0" 13 | datafrog = "2" 14 | -------------------------------------------------------------------------------- /datapond-derive/src/lib.rs: -------------------------------------------------------------------------------- 1 | use proc_macro_hack::proc_macro_hack; 2 | 3 | #[proc_macro_hack] 4 | pub use datapond_macro::datapond; 5 | -------------------------------------------------------------------------------- /datapond-derive/tests/fail/arg_mismatch.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | fn main() { 4 | let inp = vec![(1, 2), (2, 3)]; 5 | let out; 6 | datapond! { 7 | input inp(x: u32, y: u32, z: u32) 8 | output out(x: u32, y: u32) 9 | out(x, y) :- inp(y, x). 10 | }; 11 | } 12 | -------------------------------------------------------------------------------- /datapond-derive/tests/fail/arg_mismatch.stderr: -------------------------------------------------------------------------------- 1 | error: Wrong number of arguments for inp: expected 2, found 3. 2 | --> $DIR/arg_mismatch.rs:9:22 3 | | 4 | 9 | out(x, y) :- inp(y, x). 5 | | ^^^ 6 | 7 | error: The predicate inp was declared here. 8 | --> $DIR/arg_mismatch.rs:7:15 9 | | 10 | 7 | input inp(x: u32, y: u32, z: u32) 11 | | ^^^ 12 | -------------------------------------------------------------------------------- /datapond-derive/tests/fail/kwargs.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | fn test1() { 4 | let inp = vec![(1, 2, 0), (2, 3, 0)]; 5 | let out; 6 | datapond! { 7 | input inp(x: u32, y: u32, z: u32) 8 | output out(x: u32, y: u32) 9 | out(x, y) :- inp(.y=y, .y=x). 10 | }; 11 | assert_eq!(out.len(), 2); 12 | } 13 | 14 | fn test2() { 15 | let inp = vec![(1, 2, 0), (2, 3, 0)]; 16 | let out; 17 | datapond! { 18 | input inp(x: u32, y: u32, z: u32) 19 | output out(x: u32, y: u32) 20 | out(x, y) :- inp(.a=y, .y=x). 21 | }; 22 | assert_eq!(out.len(), 2); 23 | } 24 | 25 | fn main() { 26 | test1(); 27 | test2(); 28 | } -------------------------------------------------------------------------------- /datapond-derive/tests/fail/kwargs.stderr: -------------------------------------------------------------------------------- 1 | error: Parameter already bound: y 2 | --> $DIR/kwargs.rs:9:33 3 | | 4 | 9 | out(x, y) :- inp(.y=y, .y=x). 5 | | ^ 6 | 7 | error: Unknown parameter a in predicate inp. Available parameters are: x,y,z. 8 | --> $DIR/kwargs.rs:20:27 9 | | 10 | 20 | out(x, y) :- inp(.a=y, .y=x). 11 | | ^ 12 | -------------------------------------------------------------------------------- /datapond-derive/tests/pass/example1.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | #[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Copy)] 4 | struct Origin(u64); 5 | #[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Copy)] 6 | struct Loan(u64); 7 | #[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Copy)] 8 | struct Point(u64); 9 | 10 | 11 | fn main() { 12 | let borrow_region = vec![]; 13 | let cfg_edge = vec![]; 14 | let killed = vec![]; 15 | let outlives = vec![]; 16 | let region_live_at = vec![]; 17 | let invalidates = vec![]; 18 | let errors; 19 | datapond! { 20 | input borrow_region(O: Origin, L: Loan, P: Point) 21 | input cfg_edge(P: Point, Q: Point) 22 | input killed(L: Loan, P: Point) 23 | input outlives(O1: Origin, O2: Origin, P: Point) 24 | input region_live_at(O: Origin, P: Point) 25 | input invalidates(L: Loan, P: Point) 26 | internal subset(O1: Origin, O2: Origin, P: Point) 27 | internal requires(O: Origin, L: Loan, P: Point) 28 | internal borrow_live_at(L: Loan, P: Point) 29 | internal equals(O1: Origin, O2: Origin, P: Point) 30 | output errors(L: Loan, P: Point) 31 | 32 | // R1 33 | subset(O1, O2, P) :- outlives(O1, O2, P). 34 | 35 | // R2 36 | subset(O1, O3, P) :- 37 | subset(O1, O2, P), 38 | outlives(O2, O3, P). 39 | 40 | // R3: this is the transitive relation 41 | equals(O1, O2, P) :- 42 | subset(O1, O2, P), 43 | subset(O2, O1, P). 44 | 45 | // R4 46 | equals(O1, O2, Q) :- 47 | equals(O1, O2, P), 48 | cfg_edge(P, Q). 49 | 50 | // R5 51 | requires(O2, L, P) :- 52 | requires(O1, L, P), 53 | equals(O1, O2, P). 54 | 55 | // R6 56 | requires(O, L, P) :- borrow_region(O, L, P). 57 | 58 | // R7 59 | requires(O2, L, P) :- 60 | requires(O1, L, P), 61 | subset(O1, O2, P). 62 | 63 | // R8 64 | requires(O, L, Q) :- 65 | requires(O, L, P), 66 | !killed(L, P), 67 | cfg_edge(P, Q), 68 | region_live_at(O, Q). 69 | 70 | // R9 71 | borrow_live_at(L, P) :- 72 | requires(O, L, P), 73 | region_live_at(O, P). 74 | 75 | // R10 76 | errors(L, P) :- 77 | borrow_live_at(L, P), 78 | invalidates(L, P). 79 | }; 80 | assert!(errors.is_empty()); 81 | } 82 | -------------------------------------------------------------------------------- /datapond-derive/tests/pass/kwargs.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | fn main() { 4 | let inp = vec![(1, 2, 0), (2, 3, 0)]; 5 | let out; 6 | let out2; 7 | datapond! { 8 | input inp(x: u32, y: u32, z: u32) 9 | 10 | output out(x: u32, y: u32) 11 | out(x, y) :- inp(.y=y, .x=x). 12 | 13 | output out2(x: u32, y: u32) 14 | out2(a, b) :- inp(.y=a, .x=b). 15 | }; 16 | assert_eq!(out.len(), 2); 17 | assert_eq!(out[0], (1, 2)); 18 | assert_eq!(out[1], (2, 3)); 19 | 20 | assert_eq!(out2.len(), 2); 21 | assert_eq!(out2[0], (2, 1)); 22 | assert_eq!(out2[1], (3, 2)); 23 | } 24 | -------------------------------------------------------------------------------- /datapond-derive/tests/pass/missing_args.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | fn test1() { 4 | let inp = vec![(1, 2), (2, 3)]; 5 | let out; 6 | datapond! { 7 | input inp(x: u32, y: u32) 8 | output out(x: u32) 9 | out(x) :- inp(x, _). 10 | }; 11 | assert!(out.len() == 2); 12 | assert!(out[0] == (1,)); 13 | assert!(out[1] == (2,)); 14 | } 15 | 16 | fn test2() { 17 | let inp = vec![(1, 2), (2, 3)]; 18 | let out; 19 | datapond! { 20 | input inp(x: u32, y: u32) 21 | output out(x: u32) 22 | out(x) :- inp(x, _), inp(_, x). 23 | }; 24 | assert!(out.len() == 1); 25 | assert!(out[0] == (2,)); 26 | } 27 | 28 | fn main() { 29 | test1(); 30 | test2(); 31 | } 32 | -------------------------------------------------------------------------------- /datapond-derive/tests/pass/negation.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | fn main() { 4 | let inp = vec![(1, 2), (2, 3)]; 5 | let kill = vec![(3,), (4,), (5,)]; 6 | let out; 7 | datapond! { 8 | input inp(x: u32, y: u32) 9 | input kill(y: u32) 10 | output out(x: u32, y: u32) 11 | out(x, y) :- inp(x, y), !kill(y). 12 | }; 13 | assert!(out.len() == 1); 14 | assert!(out[0] == (1, 2)); 15 | } 16 | -------------------------------------------------------------------------------- /datapond-derive/tests/pass/simple1.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | fn main() { 4 | let inp = vec![(1, 2), (2, 3)]; 5 | let out; 6 | datapond! { 7 | input inp(x: u32, y: u32) 8 | output out(x: u32, y: u32) 9 | out(x, y) :- inp(y, x). 10 | }; 11 | assert!(out.len() == 2); 12 | assert!(out[0] == (2, 1)); 13 | assert!(out[1] == (3, 2)); 14 | } 15 | -------------------------------------------------------------------------------- /datapond-derive/tests/pass/transitive_closure.rs: -------------------------------------------------------------------------------- 1 | use datapond_derive::datapond; 2 | 3 | fn main() { 4 | let inp = vec![(1, 2), (2, 3)]; 5 | let out; 6 | datapond! { 7 | input inp(x: u32, y: u32) 8 | output out(x: u32, y: u32) 9 | out(x, y) :- inp(x, y). 10 | out(x, y) :- out(x, z), out(z, y). 11 | }; 12 | assert!(out.len() == 3); 13 | assert!(out[0] == (1, 2)); 14 | assert!(out[1] == (1, 3)); 15 | assert!(out[2] == (2, 3)); 16 | } 17 | -------------------------------------------------------------------------------- /datapond-derive/tests/test.rs: -------------------------------------------------------------------------------- 1 | #[test] 2 | fn tests() { 3 | let runner = trybuild::TestCases::new(); 4 | runner.pass("tests/pass/*.rs"); 5 | runner.compile_fail("tests/fail/*.rs"); 6 | } 7 | -------------------------------------------------------------------------------- /datapond-macro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datapond-macro" 3 | version = "0.1.0" 4 | authors = ["Vytautas Astrauskas "] 5 | edition = "2018" 6 | 7 | [lib] 8 | proc-macro = true 9 | 10 | [dependencies] 11 | datapond = { path = ".." } 12 | proc-macro-hack = "0.5" 13 | -------------------------------------------------------------------------------- /datapond-macro/src/lib.rs: -------------------------------------------------------------------------------- 1 | use proc_macro::TokenStream; 2 | use proc_macro_hack::proc_macro_hack; 3 | 4 | #[proc_macro_hack] 5 | pub fn datapond(input: TokenStream) -> TokenStream { 6 | datapond::generate_datafrog(input.into()).into() 7 | } 8 | -------------------------------------------------------------------------------- /examples/generate_skeleton.rs: -------------------------------------------------------------------------------- 1 | use datapond; 2 | use std::env; 3 | 4 | fn main() { 5 | if env::var("RUST_LOG").is_ok() { 6 | start_logging().expect("Initializing logger failed"); 7 | } 8 | 9 | let text = r#" 10 | input borrow_region(O: Origin, L: Loan, P: Point) 11 | input cfg_edge(P: Point, Q: Point) 12 | input killed(L: Loan, P: Point) 13 | input outlives(O1: Origin, O2: Origin, P: Point) 14 | input region_live_at(O: Origin, P: Point) 15 | input invalidates(L: Loan, P: Point) 16 | internal subset(O1: Origin, O2: Origin, P: Point) 17 | internal requires(O: Origin, L: Loan, P: Point) 18 | internal borrow_live_at(L: Loan, P: Point) 19 | internal equals(O1: Origin, O2: Origin, P: Point) 20 | output errors(L: Loan, P: Point) 21 | 22 | // R1 23 | subset(O1, O2, P) :- outlives(O1, O2, P). 24 | 25 | // R2 26 | subset(O1, O3, P) :- 27 | subset(O1, O2, P), 28 | outlives(O2, O3, P). 29 | 30 | // R3: this is the transitive relation 31 | equals(O1, O2, P) :- 32 | subset(O1, O2, P), 33 | subset(O2, O1, P). 34 | 35 | // R4 36 | equals(O1, O2, Q) :- 37 | equals(O1, O2, P), 38 | cfg_edge(P, Q). 39 | 40 | // R5 41 | requires(O2, L, P) :- 42 | requires(O1, L, P), 43 | equals(O1, O2, P). 44 | 45 | // R6 46 | requires(O, L, P) :- borrow_region(O, L, P). 47 | 48 | // R7 49 | requires(O2, L, P) :- 50 | requires(O1, L, P), 51 | subset(O1, O2, P). 52 | 53 | // R8 54 | requires(O, L, Q) :- 55 | requires(O, L, P), 56 | !killed(L, P), 57 | cfg_edge(P, Q), 58 | region_live_at(O, Q). 59 | 60 | // R9 61 | borrow_live_at(L, P) :- 62 | requires(O, L, P), 63 | region_live_at(O, P). 64 | 65 | // R10 66 | errors(L, P) :- 67 | borrow_live_at(L, P), 68 | invalidates(L, P). 69 | "#; 70 | 71 | let output = datapond::generate_skeleton_datafrog(text); 72 | println!("{}", output); 73 | } 74 | 75 | use log::{Level, LevelFilter, Metadata, Record, SetLoggerError}; 76 | 77 | struct Logger; 78 | 79 | impl log::Log for Logger { 80 | fn enabled(&self, metadata: &Metadata) -> bool { 81 | metadata.level() <= Level::Info 82 | } 83 | 84 | fn log(&self, record: &Record) { 85 | if self.enabled(record.metadata()) { 86 | eprintln!("{} {} - {}", record.level(), record.target(), record.args()); 87 | } 88 | } 89 | 90 | fn flush(&self) {} 91 | } 92 | 93 | static LOGGER: Logger = Logger; 94 | 95 | fn start_logging() -> Result<(), SetLoggerError> { 96 | log::set_logger(&LOGGER).map(|()| log::set_max_level(LevelFilter::Info)) 97 | } 98 | -------------------------------------------------------------------------------- /src/ast.rs: -------------------------------------------------------------------------------- 1 | //! This file contains the typed AST. 2 | 3 | use crate::data_structures::OrderedMap; 4 | use proc_macro2::Ident; 5 | use quote::ToTokens; 6 | use std::fmt; 7 | 8 | /// The predicate kind regarding IO. 9 | #[derive(Debug, PartialEq, Clone, Eq, Hash)] 10 | pub enum PredicateKind { 11 | /// Instances of this predicate can be provided only as input facts. 12 | Input, 13 | /// Instances of this predicate can be used for computation but cannot be output. 14 | Internal, 15 | /// Instances of this predicate can be used for computation and also can be output. 16 | Output, 17 | } 18 | 19 | impl fmt::Display for PredicateKind { 20 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 21 | match self { 22 | PredicateKind::Input => write!(f, "input"), 23 | PredicateKind::Internal => write!(f, "internal"), 24 | PredicateKind::Output => write!(f, "output"), 25 | } 26 | } 27 | } 28 | 29 | /// Parameter information of the predicate declaration. 30 | #[derive(Clone)] 31 | pub struct ParamDecl { 32 | pub name: Ident, 33 | pub typ: syn::Type, 34 | } 35 | 36 | impl fmt::Debug for ParamDecl { 37 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 38 | write!(f, "{}: {}", self.name, self.typ.to_token_stream()) 39 | } 40 | } 41 | 42 | impl fmt::Display for ParamDecl { 43 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 44 | write!(f, "{}: {}", self.name, self.typ.to_token_stream()) 45 | } 46 | } 47 | 48 | impl PartialEq for ParamDecl { 49 | fn eq(&self, other: &Self) -> bool { 50 | self.name == other.name 51 | } 52 | } 53 | 54 | impl Eq for ParamDecl {} 55 | 56 | impl ParamDecl { 57 | pub fn typ_as_string(&self) -> String { 58 | self.typ.to_token_stream().to_string() 59 | } 60 | } 61 | 62 | /// A declaration of the predicate. 63 | /// 64 | /// ```plain 65 | /// input Input(x: u32, y: u32) 66 | /// internal Internal(x: u32, y: u32) 67 | /// output Output(x: u32, y: u32) 68 | /// ``` 69 | #[derive(Debug, Clone, PartialEq, Eq)] 70 | pub struct PredicateDecl { 71 | pub kind: PredicateKind, 72 | pub name: Ident, 73 | pub parameters: Vec, 74 | } 75 | 76 | impl fmt::Display for PredicateDecl { 77 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 78 | write!(f, "{} {}(", self.kind, self.name)?; 79 | let mut first = true; 80 | for parameter in &self.parameters { 81 | if first { 82 | first = false; 83 | } else { 84 | write!(f, ", ")?; 85 | } 86 | write!(f, "{}", parameter)?; 87 | } 88 | write!(f, ")") 89 | } 90 | } 91 | 92 | /// An argument. 93 | #[derive(Debug, Clone, PartialEq, Eq)] 94 | pub enum Arg { 95 | /// Identifier `arg`. 96 | Ident(Ident), 97 | /// Wildcard argument. 98 | Wildcard, 99 | } 100 | 101 | impl Arg { 102 | pub fn to_ident(&self) -> syn::Ident { 103 | match self { 104 | Arg::Ident(ident) => ident.clone(), 105 | Arg::Wildcard => syn::Ident::new("_", proc_macro2::Span::call_site()), 106 | } 107 | } 108 | pub fn is_wildcard(&self) -> bool { 109 | match self { 110 | Arg::Ident(_) => false, 111 | Arg::Wildcard => true, 112 | } 113 | } 114 | } 115 | 116 | impl fmt::Display for Arg { 117 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 118 | match self { 119 | Arg::Ident(ident) => write!(f, "{}", ident), 120 | Arg::Wildcard => write!(f, "_"), 121 | } 122 | } 123 | } 124 | 125 | /// A richer type of atom, which can be negated, and used as 126 | /// premises/hypotheses in rules. 127 | #[derive(Debug, Clone, PartialEq, Eq)] 128 | pub struct Literal { 129 | pub is_negated: bool, 130 | pub predicate: Ident, 131 | pub args: Vec, 132 | } 133 | 134 | impl fmt::Display for Literal { 135 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 136 | if self.is_negated { 137 | write!(f, "!")?; 138 | } 139 | write!(f, "{}(", self.predicate)?; 140 | let mut first = true; 141 | for arg in &self.args { 142 | if first { 143 | first = false; 144 | } else { 145 | write!(f, ", ")?; 146 | } 147 | write!(f, "{}", arg)?; 148 | } 149 | write!(f, ")") 150 | } 151 | } 152 | 153 | /// A head of a rule. 154 | #[derive(Debug, Clone, PartialEq, Eq)] 155 | pub struct RuleHead { 156 | pub predicate: Ident, 157 | pub args: Vec, 158 | } 159 | 160 | impl fmt::Display for RuleHead { 161 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 162 | write!(f, "{}(", self.predicate)?; 163 | let mut first = true; 164 | for arg in &self.args { 165 | if first { 166 | first = false; 167 | } else { 168 | write!(f, ", ")?; 169 | } 170 | write!(f, "{}", arg)?; 171 | } 172 | write!(f, ")") 173 | } 174 | } 175 | 176 | /// A rule describing how to derive new facts. 177 | /// 178 | /// ```plain 179 | /// Internal(x, y) :- Input(x, y). 180 | /// ``` 181 | #[derive(Debug, Clone, PartialEq, Eq)] 182 | pub struct Rule { 183 | pub head: RuleHead, 184 | pub body: Vec, 185 | } 186 | 187 | impl fmt::Display for Rule { 188 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 189 | write!(f, "{} :- ", self.head)?; 190 | let mut first = true; 191 | for literal in &self.body { 192 | if first { 193 | first = false; 194 | } else { 195 | write!(f, ", ")?; 196 | } 197 | write!(f, "{}", literal)?; 198 | } 199 | write!(f, ".") 200 | } 201 | } 202 | 203 | /// A Datalog program. 204 | #[derive(Debug, Clone)] 205 | pub struct Program { 206 | pub decls: OrderedMap, 207 | pub rules: Vec, 208 | } 209 | -------------------------------------------------------------------------------- /src/data_structures.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | /// A map that tracks insertion order. 4 | #[derive(Debug, Clone)] 5 | pub struct OrderedMap 6 | where 7 | K: Eq + std::hash::Hash, 8 | { 9 | key_order: Vec, 10 | map: HashMap, 11 | } 12 | 13 | impl OrderedMap { 14 | pub fn new() -> Self { 15 | Self { 16 | key_order: Vec::new(), 17 | map: HashMap::new(), 18 | } 19 | } 20 | pub fn len(&self) -> usize { 21 | self.map.len() 22 | } 23 | pub fn insert(&mut self, k: K, v: V) { 24 | assert!(self.map.insert(k.clone(), v).is_none()); 25 | self.key_order.push(k); 26 | } 27 | pub fn get(&self, k: &K) -> Option<&V> { 28 | self.map.get(k) 29 | } 30 | pub fn values<'a>(&'a self) -> Vec<&'a V> { 31 | self.key_order.iter().map(|k| &self.map[k]).collect() 32 | } 33 | } 34 | 35 | impl std::iter::FromIterator<(K, V)> for OrderedMap { 36 | fn from_iter>(iter: I) -> Self { 37 | let mut s = Self { 38 | key_order: Vec::new(), 39 | map: HashMap::new(), 40 | }; 41 | for (k, v) in iter { 42 | s.insert(k, v); 43 | } 44 | s 45 | } 46 | } 47 | 48 | impl std::ops::Index<&K> for OrderedMap { 49 | type Output = V; 50 | 51 | fn index(&self, key: &K) -> &Self::Output { 52 | &self.map[key] 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/generator.rs: -------------------------------------------------------------------------------- 1 | use crate::data_structures::OrderedMap; 2 | use crate::{ast, parser, typechecker}; 3 | use quote::ToTokens; 4 | use rustc_hash::{FxHashMap, FxHashSet}; 5 | use std::fmt::{self, Write}; 6 | 7 | type HashMap = OrderedMap; 8 | 9 | /// The representation of what a datalog rule does in datafrog terms 10 | enum Operation { 11 | StaticMap(), 12 | DynamicMap(MapStep), 13 | Join(Vec), 14 | } 15 | 16 | impl ast::Arg { 17 | fn ident(&self) -> &syn::Ident { 18 | match self { 19 | ast::Arg::Ident(ident) => ident, 20 | _ => unimplemented!("Generator does not support wildcards yet"), 21 | } 22 | } 23 | fn to_string(&self) -> String { 24 | self.ident().to_string() 25 | } 26 | } 27 | 28 | /// The representation of a join, with the data required to serialize it as Rust code 29 | #[derive(Debug)] 30 | struct JoinStep { 31 | src_a: String, 32 | src_b: String, 33 | 34 | is_antijoin: bool, 35 | 36 | key: Vec, 37 | args: Vec, 38 | 39 | remaining_args_a: Vec, 40 | remaining_args_b: Vec, 41 | 42 | dest_predicate: String, 43 | dest_key: Vec, 44 | dest_args: Vec, 45 | } 46 | 47 | #[derive(Debug)] 48 | struct MapStep { 49 | src_args: String, 50 | dest_args: String, 51 | } 52 | 53 | pub fn generate_skeleton_datafrog(text: &str) -> String { 54 | let mut output = String::new(); 55 | generate_skeleton_into(text, &mut output); 56 | 57 | // tidy up: filter multiple empty lines in a row 58 | let filtered = output.replace("\n\n\n", "\n\n"); 59 | filtered 60 | } 61 | 62 | fn generate_skeleton_into(text: &str, output: &mut String) { 63 | // Step 0: parse everything. 64 | let program = parser::parse(text); 65 | let program = match typechecker::typecheck(program) { 66 | Ok(program) => program, 67 | Err(err) => panic!("Error: {:?} (at {:?})", err, err.span.start()), 68 | }; 69 | let decls = program.decls; 70 | let rules = program.rules; 71 | 72 | // Step 1: analyze rules to separate extensional and intensional predicates. 73 | // These will end up being emitted as datafrog `Relation`s and `Variable`s, respectively. 74 | let mut intensional = FxHashSet::default(); 75 | let mut extensional = FxHashSet::default(); 76 | for decl in decls.values() { 77 | if decl.kind != ast::PredicateKind::Input { 78 | intensional.insert(decl.name.to_string()); 79 | } else { 80 | extensional.insert(decl.name.to_string()); 81 | } 82 | } 83 | 84 | // Step 2: visit rules and emit a datafrog "query plan". 85 | 86 | // Actually used predicates and indices 87 | let mut extensional_inputs = FxHashSet::default(); 88 | let mut intensional_inputs = FxHashSet::default(); 89 | 90 | let mut extensional_indices = FxHashMap::default(); 91 | let mut intensional_indices = FxHashMap::default(); 92 | 93 | // All relations used as keys need to be encoded as `((K, V), ())` tuples, 94 | // as the joins are done on the keys. 95 | let mut predicates_consumed_as_keys = FxHashSet::default(); 96 | 97 | // The skeleton code: 98 | // - the inital data loading, before the loop, to fill the `Variable`s with 99 | // data from the `Relation`s they join with in the rules. 100 | // - the dynamic computation data: the loop itself, executing the joins 101 | let mut generated_code_static_input: Vec = Vec::new(); 102 | let mut generated_code_dynamic_computation: Vec = Vec::new(); 103 | 104 | let mut operations = Vec::new(); 105 | 106 | // Generate an `Operation` per rule, describing what the rule does, and 107 | // the data required to serialize it as rust code later. This is done in 2 steps 108 | // because we need to know which predicates are used as complete keys _before_ 109 | // serializing them to code: the tuple produced by each rule would be different 110 | // depending on the join key of later rules. 111 | for (rule_idx, rule) in rules.iter().enumerate() { 112 | let body: Vec<_> = rule.body.iter().collect(); 113 | 114 | let operation = match body.len() { 115 | 0 => unreachable!(), 116 | 117 | 1 => { 118 | // This a `map` operation. 119 | 120 | // Record used inputs to filter code generation later 121 | if !intensional.contains(&body[0].predicate.to_string()) { 122 | extensional_inputs.insert(body[0].predicate.to_string()); 123 | } 124 | 125 | let operation = { 126 | // If this is mapping over an extensional predicate, we can emit 127 | // this outside of the datalog computation loop, since the input is static. 128 | if extensional.contains(&body[0].predicate.to_string()) { 129 | Operation::StaticMap() 130 | } else { 131 | // otherwise, it's a map during computation 132 | 133 | let args_a: Vec<_> = rule.head.args.clone(); 134 | let args_b: Vec<_> = body[0].args.clone(); 135 | 136 | let src_args = args_b 137 | .iter() 138 | .map(|arg: &ast::Arg| { 139 | let arg = arg.ident(); 140 | if args_a.contains(arg) { 141 | arg.to_string().to_lowercase() 142 | } else { 143 | format!("_{}", arg.to_string().to_lowercase()) 144 | } 145 | }) 146 | .collect::>() 147 | .join(", "); 148 | 149 | let mut dest_args = args_a 150 | .iter() 151 | .map(|arg| { 152 | if args_a.contains(arg) { 153 | arg.to_string().to_lowercase() 154 | } else { 155 | format!("_{}", arg.to_string().to_lowercase()) 156 | } 157 | }) 158 | .collect::>() 159 | .join(", "); 160 | 161 | if args_a.len() == 1 { 162 | dest_args = format!("{}, ()", dest_args); 163 | } 164 | 165 | let operation = MapStep { 166 | src_args, 167 | dest_args, 168 | }; 169 | Operation::DynamicMap(operation) 170 | } 171 | }; 172 | operation 173 | } 174 | 175 | _ => { 176 | // This is a `join` operation 177 | 178 | // TODO: check if there is only one intensional predicate and the rest are extensional 179 | // so that we can output a leapjoin instead of a regular join 180 | 181 | let mut steps: Vec = Vec::new(); 182 | 183 | for (literal_idx, literal) in body.iter().enumerate().skip(1) { 184 | // We're joining 2 literals, but a step at a time (1 literal at a time), 185 | // using the previous join output with the current step's literal. 186 | // So the first `step_idx` of the join will start at `literal_idx` 1, 187 | // joining `body[0]` and `body[1]`. 188 | let step_idx = literal_idx - 1; 189 | 190 | let is_first_step = step_idx == 0; 191 | let is_last_step = literal_idx == body.len() - 1; 192 | 193 | // TODO: datafrog has requirements that the joined Variable is the first 194 | // argument, so if this is the first (or only) step of a join, the second literal 195 | // should be a Variable (or emit an error), and swap the order here, or maybe also 196 | // emit an error asking to swap in the source directly. 197 | 198 | // When we're at the first step, there is no previous step result with which 199 | // to join. But when we're at `literal_idx` of at least 2, we've joined 2 200 | // literals already and continue to join that result, with the current step's literal. 201 | let mut previous_step = if is_first_step { 202 | None 203 | } else { 204 | Some(&mut steps[step_idx - 1]) 205 | }; 206 | 207 | // The destination where we produce our join's tuples can be: 208 | // - a temporary relation, when we're at an intermediary of 209 | // the multiple-step join 210 | // - the rule's conclusion when we're on the last step 211 | let dest_predicate = if is_last_step { 212 | rule.head.predicate.to_string() 213 | } else { 214 | format!( 215 | "{}_step_{}_{}", 216 | rule.head.predicate, 217 | rule_idx + 1, 218 | step_idx + 1 219 | ) 220 | }; 221 | 222 | // Record used inputs to filter code generation later 223 | intensional.insert(dest_predicate.clone()); 224 | 225 | // The arguments to the source literals can either come from the 226 | // first 2 literals in the body (at the firs step of the join), 227 | // or from the previous step's result and current step's literal. 228 | let args_a: Vec = if let Some(ref mut previous_step) = previous_step { 229 | previous_step 230 | .key 231 | .iter() 232 | .chain(previous_step.args.iter()) 233 | .map(|v| v.to_string()) 234 | .collect() 235 | } else { 236 | body[0].args.iter().map(|a| a.to_string()).collect() 237 | }; 238 | 239 | let args_b: Vec<_> = literal.args.iter().map(|a| a.to_string()).collect(); 240 | 241 | // The join key is the shared variables between the 2 relations 242 | let mut key: Vec<_> = args_b 243 | .iter() 244 | .map(|v| v.to_string()) 245 | .filter(|v| args_a.contains(v)) 246 | .collect(); 247 | 248 | // We now need to know which arguments were not used in the key: they will be the 249 | // arguments that the datafrog closure producing the tuples of the join 250 | // will _receive_. 251 | let is_arg_used_later = |arg: &str, skip| { 252 | // if the argument is used in later steps, we need to retain it 253 | for literal in body.iter().skip(skip) { 254 | if literal.args.iter().any(|a| &a.to_string() == arg) { 255 | return true; 256 | } 257 | } 258 | 259 | // similarly, if the variable is produced by the join process itself 260 | if rule.head.args.iter().any(|a| &a.to_string() == arg) { 261 | return true; 262 | } 263 | 264 | // else, the argument is unused, and we can avoid producing it at this join step 265 | false 266 | }; 267 | 268 | let remaining_args_a: Vec = args_a 269 | .iter() 270 | .filter(|v| !key.contains(v)) 271 | .filter(|v| is_arg_used_later(v, step_idx + 1)) 272 | .map(|v| v.to_string()) 273 | .collect(); 274 | let remaining_args_b: Vec = args_b 275 | .iter() 276 | .filter(|v| !key.contains(v)) 277 | .filter(|v| is_arg_used_later(v, step_idx + 1)) 278 | .map(|v| v.to_string()) 279 | .collect(); 280 | 281 | // This step's arguments, which will be used by the next step when computing 282 | // its join key. 283 | let mut args = Vec::new(); 284 | for arg in remaining_args_a.iter().chain(remaining_args_b.iter()) { 285 | args.push(arg.clone()); 286 | } 287 | 288 | // Compute the source predicates: 289 | // - if we're at the first step it'll be the first 2 literals 290 | // - if we're at a later step, it'll be the previous step result, and the 291 | // current literal 292 | // 293 | // In both cases, predicates can be joined via some index, when only some of 294 | // the arguments in the key are used. In this case, either source index can be 295 | // used instead of the relation with full tuples. 296 | // 297 | // The "left" relation in the join could come from the previous step, 298 | // in that case, there is no specific index to lookup. 299 | // 300 | let src_a = if let Some(ref mut previous_step) = previous_step { 301 | previous_step.dest_predicate.clone() 302 | } else { 303 | if remaining_args_a.is_empty() { 304 | body[0].predicate.to_string() 305 | } else { 306 | generate_index_relation( 307 | &decls, 308 | &body[0], 309 | &args_a, 310 | &key, 311 | &remaining_args_a, 312 | &mut extensional, 313 | &mut extensional_indices, 314 | &mut intensional, 315 | &mut intensional_inputs, 316 | &mut intensional_indices, 317 | ) 318 | } 319 | }; 320 | 321 | let mut src_b = if remaining_args_b.is_empty() { 322 | literal.predicate.to_string() 323 | } else { 324 | generate_index_relation( 325 | &decls, 326 | &literal, 327 | &args_b, 328 | &key, 329 | &remaining_args_b, 330 | &mut extensional, 331 | &mut extensional_indices, 332 | &mut intensional, 333 | &mut intensional_inputs, 334 | &mut intensional_indices, 335 | ) 336 | }; 337 | 338 | // Self-joins with intensional predicates consumed as keys need to be 339 | // special-cased in datafrog: the same arguments are used, but in their own order, 340 | // so we need an index with that order for the tuple-keys. The join key is also unused, 341 | // as the whole tuple is the key in its canonical order (I think ?) or the order of the 342 | // left element. Those cases are rare, so let's handle it the way I know 343 | // works in our Polonius use-cases. 344 | if src_a == src_b && remaining_args_a.is_empty() && remaining_args_b.is_empty() 345 | { 346 | // A self join with the same ordering would be a no-op, so look for the element 347 | // which has the canonical ordering 348 | 349 | let canonical_args = decls[&src_a] 350 | .parameters 351 | .iter() 352 | .map(|decl| decl.name.to_string().to_uppercase()); 353 | let canonicalized_args_a = args_a.iter().map(|arg| arg.to_string()); 354 | if canonicalized_args_a.eq(canonical_args) { 355 | // The left element has the canonical ordering, the right element needs to be indexed 356 | // in a new wrapped-tuple relation 357 | 358 | let relation_args = &args_b; 359 | let value_args = Vec::new(); 360 | 361 | let index_relation = generate_index_relation_name( 362 | &decls, 363 | &literal.predicate, 364 | &args_a, 365 | relation_args, 366 | ); 367 | 368 | // Index maintenance 369 | if extensional.contains(&literal.predicate.to_string()) { 370 | record_extensional_index_use( 371 | &decls, 372 | &literal.predicate, 373 | &index_relation, 374 | relation_args, 375 | &args_a, 376 | &value_args, 377 | &mut extensional, 378 | &mut extensional_indices, 379 | ); 380 | } else { 381 | record_intensional_index_use( 382 | &literal, 383 | &args_b, 384 | &value_args, 385 | &index_relation, 386 | &mut intensional, 387 | &mut intensional_inputs, 388 | &mut intensional_indices, 389 | ); 390 | } 391 | 392 | src_b = index_relation; 393 | key = args_a; 394 | } else { 395 | // The right element has the canonical ordering, the left element needs to be indexed 396 | // in a new wrapped-tuple relation 397 | unimplemented!("no case currently hits this ?"); 398 | } 399 | } 400 | 401 | // The arguments that the datafrog closure will need to _produce_. 402 | // Since these are only known in the next step, the next loop iteration 403 | // will fill them. When we're at the last step, we produce what the rule 404 | // asked us to produce in the first place. 405 | let dest_args = if is_last_step { 406 | rule.head.args.iter().map(|a| a.to_string()).collect() 407 | } else { 408 | Vec::new() 409 | }; 410 | 411 | // Now that we have computed what this join step requires from the previous step, 412 | // we can back patch the previous one, to tell it what key-value tuples to produce. 413 | if let Some(ref mut previous_step) = previous_step { 414 | previous_step.dest_key = key.clone(); 415 | previous_step.dest_args = remaining_args_a.clone(); 416 | } 417 | 418 | let is_antijoin = literal.is_negated; 419 | 420 | if !is_antijoin { 421 | if remaining_args_a.is_empty() { 422 | predicates_consumed_as_keys.insert(src_a.clone()); 423 | } 424 | 425 | if remaining_args_b.is_empty() { 426 | predicates_consumed_as_keys.insert(src_b.clone()); 427 | } 428 | } 429 | 430 | let step = JoinStep { 431 | src_a, 432 | src_b, 433 | is_antijoin, 434 | key, 435 | args, 436 | remaining_args_a, 437 | remaining_args_b, 438 | dest_predicate, 439 | dest_key: Vec::new(), 440 | dest_args, 441 | }; 442 | 443 | steps.push(step); 444 | } 445 | 446 | Operation::Join(steps) 447 | } 448 | }; 449 | 450 | operations.push(operation); 451 | } 452 | 453 | // Serialize rule operations as string to generate the skeleton code 454 | for (rule_idx, (rule, operation)) in rules.iter().zip(operations.into_iter()).enumerate() { 455 | let rule_id = format!("R{:02}", rule_idx + 1); 456 | let rule_comment = format!("// {}: {}", rule_id, rule); 457 | 458 | generated_code_dynamic_computation.push(rule_comment.clone()); 459 | 460 | match operation { 461 | Operation::StaticMap() => { 462 | // A `map` operation depends on: 463 | // - whether the predicate was consumed as a key, where we can simply 464 | // wrap the tuple 465 | // - whether body already projects to what the rule expects, where we can simply 466 | // clone the input 467 | // - all the other cases, where we'll do the projection of the input relation 468 | 469 | // static `map` operations are composed of a projection of a single relation 470 | assert!(rule.body.len() == 1); 471 | 472 | let projection_matches_head = rule 473 | .head 474 | .args 475 | .iter() 476 | .eq(rule.body[0].args.iter().map(|x| x.ident())); 477 | 478 | // The encoding of predicates consumed as keys requires to 479 | // wrap the key-value tuple as a key in another tuple, and a unit value. 480 | let produced_tuple = if predicates_consumed_as_keys 481 | .contains(&rule.head.predicate.to_string()) 482 | { 483 | "map(|&tuple| (tuple, ()))".to_string() 484 | } else if projection_matches_head { 485 | // If the projection matches the head of the rule, we can simply clone the input 486 | // (as the arguments are the same). 487 | "clone()".to_string() 488 | } else { 489 | // If the body arguments do not match the head of the rule, we need to `map` over 490 | // the input to project what the rule expects to output. 491 | let source_args: Vec<_> = 492 | rule.body[0].args.iter().map(|a| a.to_string()).collect(); 493 | let target_args: Vec<_> = 494 | rule.head.args.iter().map(|a| a.to_string()).collect(); 495 | 496 | let tupled_src = join_args_as_tuple(&source_args, &target_args, &target_args); 497 | let tupled_target = 498 | join_args_as_tuple(&target_args, &target_args, &target_args); 499 | format!("map(|&{}| {})", tupled_src, tupled_target) 500 | }; 501 | 502 | let operation = format!( 503 | "{}.extend({}.iter().{});", 504 | rule.head.predicate, rule.body[0].predicate, produced_tuple 505 | ); 506 | 507 | generated_code_static_input.push(rule_comment); 508 | generated_code_static_input.push(operation); 509 | 510 | generated_code_dynamic_computation.push(format!( 511 | "// `{}` is a static input, already loaded into `{}`.", 512 | rule.body[0].predicate, rule.head.predicate, 513 | )); 514 | } 515 | Operation::DynamicMap(step) => { 516 | warn!( 517 | "warning: untested code generation! dynamic map step: {:?}", 518 | step 519 | ); 520 | 521 | // The encoding of these predicates consumed as keys requires to 522 | // wrap the key-value tuple as a key in another tuple, and a unit value. 523 | let src_args = 524 | if predicates_consumed_as_keys.contains(&rule.head.predicate.to_string()) { 525 | format!("({}), _", step.src_args) 526 | } else { 527 | step.src_args 528 | }; 529 | 530 | let operation = format!( 531 | "{dest}.from_map(&{src}, |&({src_args})| ({dest_args}));", 532 | dest = rule.head.predicate, 533 | src = rule.body[0].predicate, 534 | src_args = src_args, 535 | dest_args = step.dest_args, 536 | ); 537 | generated_code_dynamic_computation.push(operation); 538 | 539 | unimplemented!("no case currently hits this ?"); 540 | } 541 | Operation::Join(steps) => { 542 | for (step_idx, step) in steps.iter().enumerate() { 543 | let is_last_step = step_idx == steps.len() - 1; 544 | 545 | // Stringify the datafrog join closure arguments: 546 | // - the key 547 | // - the unused arguments from the first relation 548 | // - the unused arguments from the second relation 549 | let tupled_src_key = 550 | join_args_as_tuple(&step.key, &step.dest_key, &step.dest_args); 551 | 552 | let tupled_args_a = match step.remaining_args_a.len() { 553 | 0 => "_".to_string(), 554 | _ => format!( 555 | "&{}", 556 | join_args_as_tuple( 557 | &step.remaining_args_a, 558 | &step.dest_key, 559 | &step.dest_args 560 | ) 561 | ), 562 | }; 563 | 564 | let tupled_args_b = match step.remaining_args_b.len() { 565 | 0 => "_".to_string(), 566 | _ => format!( 567 | "&{}", 568 | join_args_as_tuple( 569 | &step.remaining_args_b, 570 | &step.dest_key, 571 | &step.dest_args 572 | ) 573 | ), 574 | }; 575 | 576 | // Stringify the datafrog closure body: the value it will produce, and which can be 577 | // a simple value, or a key-value tuple, depending on the join step, and the destination 578 | // relation layout. 579 | let mut produced_tuple = { 580 | if is_last_step { 581 | // we're on the last step, so we must produce what the rule's conclusion expects 582 | step.dest_args.join(", ").to_lowercase() 583 | } else { 584 | // we're at an intermediary step of the multi-step join, so we must produce 585 | // what the next step expects 586 | let tupled_dest_key = 587 | join_args_as_tuple(&step.dest_key, &step.dest_key, &step.dest_args); 588 | let tupled_dest_args = join_args_as_tuple( 589 | &step.dest_args, 590 | &step.dest_key, 591 | &step.dest_args, 592 | ); 593 | format!("{}, {}", tupled_dest_key, tupled_dest_args) 594 | } 595 | }; 596 | 597 | // The encoding of these predicates consumed as keys requires to 598 | // wrap the key-value tuple as a key in another tuple, and a unit value. 599 | if predicates_consumed_as_keys.contains(&step.dest_predicate) { 600 | produced_tuple = format!("({}), ()", produced_tuple); 601 | } 602 | 603 | let operation = if step.is_antijoin { "antijoin" } else { "join" }; 604 | 605 | // Adapt the closure signature to the specific join, we're doing. Antijoins 606 | // consume all arguments, there will be no unused arguments for the join closure 607 | // to receive. 608 | let args = if step.is_antijoin { 609 | tupled_args_a 610 | } else { 611 | format!( 612 | "{args_a}, {args_b}", 613 | args_a = tupled_args_a, 614 | args_b = tupled_args_b, 615 | ) 616 | }; 617 | 618 | // If either predicates is not intensional: it's either a declared extensional 619 | // predicate, or one which was generated as an index of a declared relation, 620 | // we'll record its use to only emit actually used `Relation`s. 621 | // Technically, extensional predicates can only appear in the right element 622 | // of a regular join; but we can reorder, and have to handle leapjoins, so let's 623 | // check both right and left elements. 624 | record_predicate_use( 625 | &step.src_a, 626 | &intensional, 627 | &mut extensional_inputs, 628 | &mut intensional_inputs, 629 | ); 630 | record_predicate_use( 631 | &step.src_b, 632 | &intensional, 633 | &mut extensional_inputs, 634 | &mut intensional_inputs, 635 | ); 636 | 637 | let operation = format!( 638 | "{dest}.from_{operation}(&{src_a}, &{src_b}, |&{key}, {args}| ({tuple}));", 639 | dest = step.dest_predicate, 640 | operation = operation, 641 | src_a = step.src_a, 642 | src_b = step.src_b, 643 | key = tupled_src_key, 644 | args = args, 645 | tuple = produced_tuple, 646 | ); 647 | generated_code_dynamic_computation.push(operation); 648 | } 649 | } 650 | } 651 | 652 | // Add an empty line after every datalog rule conversion 653 | if rule_idx < rules.len() - 1 { 654 | generated_code_dynamic_computation.push("".to_string()); 655 | } 656 | } 657 | 658 | // Infer the output of the computation: the difference between all the intensional 659 | // predicates and the ones used as inputs. 660 | let main_relation_candidates: Vec<_> = intensional 661 | .difference(&intensional_inputs) 662 | .cloned() 663 | .collect(); 664 | 665 | println!( 666 | "{} extensional predicates/indices used (out of {}) and which can be a datafrog `Relation`:", 667 | extensional_inputs.len(), 668 | extensional.len(), 669 | ); 670 | let mut extensional: Vec<_> = extensional_inputs.into_iter().collect(); 671 | extensional.sort(); 672 | for (idx, relation) in extensional.iter().enumerate() { 673 | let is_index = match extensional_indices.get(relation) { 674 | Some((original_predicate, ..)) => format!(" (index on `{}`)", original_predicate), 675 | None => "".to_string(), 676 | }; 677 | 678 | println!("{:02}: `{}`{}", idx + 1, relation, is_index); 679 | } 680 | 681 | println!( 682 | "\n{} intensional predicates (including {} indices) requiring a datafrog `Variable`:", 683 | intensional.len(), 684 | intensional_indices.len(), 685 | ); 686 | 687 | let mut intensional: Vec<_> = intensional.into_iter().collect(); 688 | intensional.sort(); 689 | for (idx, variable) in intensional.iter().enumerate() { 690 | let is_index = match intensional_indices.get(variable) { 691 | Some((original_literal, ..)) => format!(" (index on `{}`)", original_literal.predicate), 692 | None => "".to_string(), 693 | }; 694 | 695 | println!("{:02}: `{}`{}", idx + 1, variable, is_index); 696 | } 697 | 698 | generate_skeleton_code( 699 | output, 700 | decls, 701 | extensional, 702 | extensional_indices, 703 | intensional, 704 | intensional_indices, 705 | predicates_consumed_as_keys, 706 | main_relation_candidates, 707 | generated_code_static_input, 708 | generated_code_dynamic_computation, 709 | ) 710 | .expect("Skeleton code generation failed"); 711 | } 712 | 713 | fn generate_skeleton_code( 714 | output: &mut String, 715 | decls: HashMap, 716 | extensional_predicates: Vec, 717 | extensional_indices: FxHashMap, 718 | intensional_predicates: Vec, 719 | intensional_indices: FxHashMap, Vec)>, 720 | predicates_consumed_as_keys: FxHashSet, 721 | main_relation_candidates: Vec, 722 | generated_code_static_input: Vec, 723 | generated_code_dynamic_computation: Vec, 724 | ) -> fmt::Result { 725 | writeln!(output, "")?; 726 | writeln!(output, "// Extensional predicates, and their indices")?; 727 | writeln!(output, "")?; 728 | 729 | for relation in extensional_predicates.iter() { 730 | if let Some(arg_decls) = decls.get(relation) { 731 | // This is one the initial extensional predicates 732 | let arg_types: Vec<_> = arg_decls 733 | .parameters 734 | .iter() 735 | .map(|decl| decl.typ_as_string()) 736 | .collect(); 737 | 738 | let arg_types = if predicates_consumed_as_keys.contains(relation) { 739 | format!("({}), ()", arg_types.join(", ")) 740 | } else { 741 | arg_types.join(", ") 742 | }; 743 | 744 | writeln!( 745 | output, 746 | "let {relation}: Relation<({arg_types})> = Vec::new().into();", 747 | relation = relation, 748 | arg_types = arg_types, 749 | )?; 750 | } else { 751 | // This is an index over an extensional predicate 752 | let (original_predicate, arg_types) = &extensional_indices[relation]; 753 | 754 | let arg_types = if predicates_consumed_as_keys.contains(relation) { 755 | format!("({}), ()", arg_types) 756 | } else { 757 | arg_types.clone() 758 | }; 759 | 760 | writeln!(output, "")?; 761 | writeln!( 762 | output, 763 | "// Note: `{relation}` is an indexed version of the input facts `{original_predicate}`", 764 | relation = relation, 765 | original_predicate = original_predicate, 766 | )?; 767 | writeln!( 768 | output, 769 | "let {relation}: Relation<({arg_types})> = Vec::new().into();", 770 | relation = relation, 771 | arg_types = arg_types, 772 | )?; 773 | writeln!(output, "")?; 774 | } 775 | } 776 | 777 | writeln!(output, "")?; 778 | 779 | // There can be only one 'main' intensional predicate 780 | if main_relation_candidates.len() == 1 { 781 | let main = &main_relation_candidates[0]; 782 | writeln!(output, "// `{}` inferred as the output relation", main)?; 783 | writeln!(output, "let {} = {{", main)?; 784 | } else { 785 | writeln!( 786 | output, 787 | "// Note: couldn't infer output relation automatically" 788 | )?; 789 | } 790 | 791 | writeln!(output, "")?; 792 | writeln!(output, "let mut iteration = Iteration::new();")?; 793 | 794 | writeln!(output, "")?; 795 | writeln!(output, "// Intensional predicates, and their indices")?; 796 | writeln!(output, "")?; 797 | 798 | for variable in intensional_predicates.iter() { 799 | if let Some(arg_decls) = decls.get(variable) { 800 | // This is one of the initial intensional predicates 801 | let arg_types: Vec<_> = arg_decls 802 | .parameters 803 | .iter() 804 | .map(|decl| decl.typ_as_string()) 805 | .collect(); 806 | 807 | // The encoding of these predicates consumed as keys requires to 808 | // wrap the key-value tuple as a key in another tuple, and a unit value. 809 | let arg_types = if predicates_consumed_as_keys.contains(variable) { 810 | format!("({}), ()", arg_types.join(", ")) 811 | } else { 812 | arg_types.join(", ") 813 | }; 814 | 815 | writeln!( 816 | output, 817 | "let {variable} = iteration.variable::<({arg_types})>({variable:?});", 818 | variable = variable, 819 | arg_types = arg_types, 820 | )?; 821 | } else if let Some((original_literal, key, args)) = intensional_indices.get(variable) { 822 | // This is an index over an intensional predicate 823 | let original_predicate = &original_literal.predicate; 824 | 825 | writeln!(output, "")?; 826 | writeln!(output, 827 | "// Note: `{variable}` is an indexed version of the `{original_predicate}` relation", 828 | variable = variable, 829 | original_predicate = original_predicate, 830 | )?; 831 | 832 | let arg_names = original_literal 833 | .args 834 | .iter() 835 | .map(|a| a.to_string()) 836 | .collect(); 837 | let key_types: Vec<_> = key 838 | .iter() 839 | .map(|v| { 840 | canonicalize_arg_type(&decls, original_predicate, &arg_names, v).to_string() 841 | }) 842 | .collect(); 843 | let args_types: Vec<_> = args 844 | .iter() 845 | .map(|v| { 846 | canonicalize_arg_type(&decls, original_predicate, &arg_names, v).to_string() 847 | }) 848 | .collect(); 849 | 850 | // The encoding of these predicates consumed as keys requires to 851 | // wrap the key-value tuple as a key in another tuple, and a unit value. 852 | let variable_type = join_types_as_tuple(key_types, args_types); 853 | let variable_type = if predicates_consumed_as_keys.contains(variable) { 854 | format!("{}, ()", variable_type) 855 | } else { 856 | variable_type 857 | }; 858 | 859 | writeln!( 860 | output, 861 | "let {variable} = iteration.variable::<({variable_type})>({variable:?});", 862 | variable = variable, 863 | variable_type = variable_type, 864 | )?; 865 | } else { 866 | // This is an intermediary step variable used in joins 867 | writeln!( 868 | output, 869 | "let {variable} = iteration.variable({variable:?});", 870 | variable = variable 871 | )?; 872 | } 873 | } 874 | 875 | // Initial data loading 876 | writeln!(output, "")?; 877 | for data_loading_operation in generated_code_static_input.chunks(2) { 878 | // Static data-loading generates pairs of lines: 879 | // - a comment describing the rule the data-loading operation is for 880 | // - the data-loading operation itself: the code creating a datafrog `Relation` 881 | let rule_comment = &data_loading_operation[0]; 882 | writeln!(output, "{}", rule_comment)?; 883 | 884 | let code = &data_loading_operation[1]; 885 | writeln!(output, "{}\n", code)?; 886 | } 887 | 888 | writeln!(output, "while iteration.changed() {{")?; 889 | 890 | // Intensional indices maintenance 891 | writeln!(output, "")?; 892 | writeln!(output, " // Index maintenance")?; 893 | for (index_relation, (indexed_literal, key, args)) in intensional_indices.iter() { 894 | let original_relation = &indexed_literal.predicate; 895 | 896 | let mut produced_key = join_args_as_tuple(&key, &key, &args); 897 | let mut produced_args = join_args_as_tuple(&args, &key, &args); 898 | 899 | let arg_decls = &decls[&original_relation.to_string()]; 900 | let declared_args: Vec<_> = arg_decls 901 | .parameters 902 | .iter() 903 | .map(|decl| decl.name.to_string().to_lowercase()) 904 | .collect(); 905 | 906 | // The encoding of these predicates consumed as keys is a tuple 907 | // wrapping the key-value tuple as a key in another tuple, and a unit value, so we need to 908 | // take care of the unit value to correctly read the source tuples. 909 | let relation_args = if predicates_consumed_as_keys.contains(&original_relation.to_string()) 910 | { 911 | let relation_args = join_args_as_tuple(&declared_args, &key, &args); 912 | format!("({}, _)", relation_args) 913 | } else { 914 | let arg_names: Vec<_> = indexed_literal.args.iter().map(|v| v.to_string()).collect(); 915 | 916 | let canonicalized_key: Vec<_> = key 917 | .iter() 918 | .map(|v| canonicalize_arg_name(&decls, &indexed_literal.predicate, &arg_names, v)) 919 | .collect(); 920 | 921 | let canonicalized_args: Vec<_> = args 922 | .iter() 923 | .map(|v| canonicalize_arg_name(&decls, &indexed_literal.predicate, &arg_names, v)) 924 | .collect(); 925 | 926 | produced_key = 927 | join_args_as_tuple(&canonicalized_key, &canonicalized_key, &canonicalized_args); 928 | produced_args = 929 | join_args_as_tuple(&canonicalized_args, &canonicalized_key, &canonicalized_args); 930 | 931 | let relation_args = 932 | join_args_as_tuple(&declared_args, &canonicalized_key, &canonicalized_args); 933 | 934 | relation_args 935 | }; 936 | 937 | writeln!(output, 938 | " {index_relation}.from_map(&{original_relation}, |&{relation_args}| ({produced_key}, {produced_args}));", 939 | index_relation = index_relation, 940 | original_relation = original_relation, 941 | relation_args = relation_args, 942 | produced_key = produced_key, 943 | produced_args = produced_args, 944 | )?; 945 | } 946 | 947 | // Finally, output the computation rules 948 | writeln!(output, "")?; 949 | writeln!(output, " // Rules")?; 950 | writeln!(output, "")?; 951 | for line in generated_code_dynamic_computation { 952 | if line.is_empty() { 953 | writeln!(output, "")?; 954 | } else { 955 | writeln!(output, " {}", line)?; 956 | } 957 | } 958 | 959 | writeln!(output, "}}")?; 960 | 961 | if main_relation_candidates.len() == 1 { 962 | writeln!(output, "")?; 963 | writeln!(output, "{}.complete()", main_relation_candidates[0])?; 964 | writeln!(output, "}};")?; 965 | } 966 | 967 | Ok(()) 968 | } 969 | 970 | fn generate_index_relation<'a>( 971 | decls: &HashMap, 972 | literal: &'a ast::Literal, // the literal being indexed 973 | relation_args: &Vec, // the order and names of the arguments of the index 974 | key_args: &Vec, // the arguments used in the index key 975 | value_args: &Vec, // the arguments used in the index value 976 | extensional_predicates: &mut FxHashSet, 977 | extensional_indices: &mut FxHashMap, 978 | intensional_predicates: &mut FxHashSet, 979 | intensional_inputs: &mut FxHashSet, 980 | intensional_indices: &mut FxHashMap, Vec)>, 981 | ) -> String { 982 | let index_relation = 983 | generate_index_relation_name(&decls, &literal.predicate, &key_args, &relation_args); 984 | 985 | // Index maintenance 986 | if extensional_predicates.contains(&literal.predicate.to_string()) { 987 | record_extensional_index_use( 988 | decls, 989 | &literal.predicate, 990 | &index_relation, 991 | &relation_args, 992 | &key_args, 993 | &value_args, 994 | extensional_predicates, 995 | extensional_indices, 996 | ); 997 | } else { 998 | record_intensional_index_use( 999 | &literal, 1000 | &key_args, 1001 | &value_args, 1002 | &index_relation, 1003 | intensional_predicates, 1004 | intensional_inputs, 1005 | intensional_indices, 1006 | ); 1007 | } 1008 | 1009 | index_relation 1010 | } 1011 | 1012 | fn record_predicate_use( 1013 | predicate: &str, 1014 | intensional_predicates: &FxHashSet, 1015 | extensional_inputs: &mut FxHashSet, 1016 | intensional_inputs: &mut FxHashSet, 1017 | ) { 1018 | if !intensional_predicates.contains(predicate) { 1019 | extensional_inputs.insert(predicate.to_string()); 1020 | } else { 1021 | intensional_inputs.insert(predicate.to_string()); 1022 | } 1023 | } 1024 | 1025 | fn record_extensional_index_use<'a>( 1026 | decls: &HashMap, 1027 | origin_predicate: &'a syn::Ident, // the relation over which the index relation maps 1028 | index_predicate: &str, // the index relation 1029 | index_args: &Vec, // the index arguments name and order 1030 | key_args: &Vec, // the indexed arguments used in the "key" 1031 | value_args: &Vec, // the indexed arguments used in the "value" 1032 | extensional_predicates: &mut FxHashSet, 1033 | extensional_indices: &mut FxHashMap, 1034 | ) { 1035 | // Canonicalize the types of the keys and value arguments 1036 | let key_types: Vec<_> = key_args 1037 | .iter() 1038 | .map(|v| canonicalize_arg_type(&decls, origin_predicate, index_args, v).to_string()) 1039 | .collect(); 1040 | let arg_types: Vec<_> = value_args 1041 | .iter() 1042 | .map(|v| canonicalize_arg_type(&decls, origin_predicate, index_args, v).to_string()) 1043 | .collect(); 1044 | 1045 | extensional_predicates.insert(index_predicate.to_string()); 1046 | extensional_indices.insert( 1047 | index_predicate.to_string(), 1048 | (origin_predicate, join_types_as_tuple(key_types, arg_types)), 1049 | ); 1050 | } 1051 | 1052 | fn record_intensional_index_use<'a>( 1053 | literal: &'a ast::Literal, 1054 | key_args: &Vec, 1055 | value_args: &Vec, 1056 | index_relation: &str, 1057 | intensional_predicates: &mut FxHashSet, 1058 | intensional_inputs: &mut FxHashSet, 1059 | intensional_indices: &mut FxHashMap, Vec)>, 1060 | ) { 1061 | // When using an index, we're effectively using both `Variables` 1062 | intensional_predicates.insert(index_relation.to_string()); 1063 | intensional_inputs.insert(literal.predicate.to_string()); 1064 | 1065 | intensional_indices.insert( 1066 | index_relation.to_string(), 1067 | (literal, key_args.clone(), value_args.clone()), 1068 | ); 1069 | } 1070 | 1071 | fn find_arg_decl<'a>( 1072 | global_decls: &'a HashMap, 1073 | predicate: &syn::Ident, 1074 | args: &Vec, 1075 | variable: &str, 1076 | ) -> &'a ast::ParamDecl { 1077 | let idx = args.iter().position(|arg| arg == variable).expect(&format!( 1078 | "Couldn't find variable {:?} in the specified args: {:?}", 1079 | variable, args 1080 | )); 1081 | 1082 | let predicate_arg_decls = &global_decls[&predicate.to_string()]; 1083 | let arg_decl = &predicate_arg_decls.parameters[idx]; 1084 | arg_decl 1085 | } 1086 | 1087 | // Find the canonical names of arguments via their usage in the indexed relation. 1088 | // For example, when indexing `outlives(o1, o2, p)` via `outlives(o2, o3, p)` in a join, 1089 | // we need to map the local names in the join to their original relation names and order, 1090 | // to find that the index's `(o2, o3, p)` is mapping over `(o1, o2, p)`. 1091 | fn canonicalize_arg_name( 1092 | global_decls: &HashMap, 1093 | predicate: &syn::Ident, 1094 | args: &Vec, 1095 | variable: &str, 1096 | ) -> String { 1097 | find_arg_decl(global_decls, predicate, args, variable) 1098 | .name 1099 | .to_string() 1100 | .to_lowercase() 1101 | } 1102 | 1103 | // Find the canonical types of arguments via their usage in the indexed relation. 1104 | // For example, when indexing `outlives(o1, o2, p)` via `outlives(o2, o3, p)` in a join, 1105 | // we need to map the local names in the join to their original relation names and order, 1106 | // to find that the index's `(o2, o3, p)` is mapping over `(o1, o2, p)`, to finally find 1107 | // these canonical arguments' types and generate a `Relation<(Origin, Origin, Point)>`. 1108 | fn canonicalize_arg_type<'a>( 1109 | global_decls: &'a HashMap, 1110 | predicate: &syn::Ident, 1111 | args: &Vec, 1112 | variable: &str, 1113 | ) -> String { 1114 | find_arg_decl(global_decls, predicate, args, variable) 1115 | .typ 1116 | .to_token_stream() 1117 | .to_string() 1118 | } 1119 | 1120 | fn generate_index_relation_name( 1121 | decls: &HashMap, 1122 | predicate: &syn::Ident, 1123 | key: &Vec, 1124 | args: &Vec, 1125 | ) -> String { 1126 | let mut index_args = String::new(); 1127 | for v in key { 1128 | let idx_key = canonicalize_arg_name(&decls, predicate, &args, v); 1129 | index_args.push_str(&idx_key.to_string()); 1130 | } 1131 | 1132 | format!("{}_{}", predicate, index_args) 1133 | } 1134 | 1135 | /// Generate tupled rust names for the datalog arguments, potentially prefixed 1136 | /// with _ to avoid generating a warning when it's not actually used 1137 | /// to produce the tuple, and potentially "untupled" if there's only one. 1138 | fn join_args_as_tuple( 1139 | variables: &Vec, 1140 | uses_key: &Vec, 1141 | uses_args: &Vec, 1142 | ) -> String { 1143 | let name_arg = |arg| { 1144 | if uses_key.contains(arg) 1145 | || uses_key.contains(&arg.to_uppercase()) 1146 | || uses_args.contains(arg) 1147 | || uses_args.contains(&arg.to_uppercase()) 1148 | { 1149 | arg.to_string().to_lowercase() 1150 | } else { 1151 | format!("_{}", arg.to_string().to_lowercase()) 1152 | } 1153 | }; 1154 | 1155 | if variables.len() == 1 { 1156 | name_arg(&variables[0]) 1157 | } else { 1158 | format!( 1159 | "({})", 1160 | variables 1161 | .iter() 1162 | .map(name_arg) 1163 | .collect::>() 1164 | .join(", ") 1165 | ) 1166 | } 1167 | } 1168 | 1169 | fn join_types_as_tuple(key_types: Vec, args_types: Vec) -> String { 1170 | let join_as_tuple = |types: Vec| { 1171 | if types.len() == 1 { 1172 | types[0].to_string() 1173 | } else { 1174 | format!("({})", types.into_iter().collect::>().join(", ")) 1175 | } 1176 | }; 1177 | 1178 | let tupled_key_types = join_as_tuple(key_types); 1179 | 1180 | if args_types.is_empty() { 1181 | format!("{}", tupled_key_types) 1182 | } else { 1183 | let tupled_args_types = join_as_tuple(args_types); 1184 | format!("{}, {}", tupled_key_types, tupled_args_types) 1185 | } 1186 | } 1187 | -------------------------------------------------------------------------------- /src/generator_new/ast.rs: -------------------------------------------------------------------------------- 1 | //! # Examples 2 | //! 3 | //! ## Example 1 4 | //! 5 | //! ```Datalog 6 | //! input in(x: u32, y: u32); 7 | //! output r(x: u32, y: u32); 8 | //! r(x, y) = in(y, x); 9 | //! ``` 10 | //! ``in`` is assumed to be a variable of type ``&Vec<(u32, u32)>``. 11 | //! ```ignore 12 | //! let r = in.iter().map(|(y, x)| {(x, y)}); 13 | //! ``` 14 | //! 15 | //! ## Example 2 16 | //! 17 | //! ```Datalog 18 | //! input in(x: u32, y: u32); 19 | //! output r(x: u32, y: u32); 20 | //! r(x, y) = in(y, x); 21 | //! r(x, y) = r(x, z), r(z, y); 22 | //! ``` 23 | //! ``in`` is assumed to be a variable of type ``&Vec<(u32, u32)>``. 24 | //! ```ignore 25 | //! let mut iteration = Iteration::new(); 26 | //! let r = iteration.variable::<(u32, u32)>("r"); 27 | //! let r_1 = iteration.variable::<(u32, u32)>("r_1"); 28 | //! let r_2 = iteration.variable::<(u32, u32)>("r_2"); 29 | //! while iteration.changed() { 30 | //! r_1.from_map(&r, |(x, z)| {(z, x)}); 31 | //! r_2.from_map(&r, |(z, y)| {(z, y)}); 32 | //! r.from_join(&r_1, &r_2, |(z, x, y)| {z, x, y}); 33 | //! } 34 | //! let r = in.iter().map(|(y, x)| {(x, y)}); 35 | //! ``` 36 | 37 | use crate::data_structures::OrderedMap; 38 | use std::collections::HashMap; 39 | 40 | /// A Datalog variable. 41 | /// 42 | /// For example, `x` in the following: 43 | /// ```ignore 44 | /// r_1.from_map(&r, |(x, z)| {(z, x)}); 45 | /// ``` 46 | #[derive(Debug, Clone)] 47 | pub(crate) struct DVar { 48 | pub name: syn::Ident, 49 | } 50 | 51 | impl DVar { 52 | pub fn new(name: syn::Ident) -> Self { 53 | Self { name: name } 54 | } 55 | } 56 | 57 | /// A flat tuple of `DVar`s. Typically used to represent the user defined types. 58 | #[derive(Debug)] 59 | pub(crate) struct DVarTuple { 60 | pub vars: Vec, 61 | } 62 | 63 | impl DVarTuple { 64 | pub fn new(args: Vec) -> Self { 65 | Self { 66 | vars: args.into_iter().map(|ident| DVar::new(ident)).collect(), 67 | } 68 | } 69 | } 70 | 71 | /// A (key, value) representation of `DVar`s. It is used for joins. 72 | #[derive(Debug)] 73 | pub(crate) struct DVarKeyVal { 74 | pub key: Vec, 75 | pub value: Vec, 76 | } 77 | 78 | /// An ordered set of `DVar`s. 79 | #[derive(Debug)] 80 | pub(crate) enum DVars { 81 | Tuple(DVarTuple), 82 | KeyVal(DVarKeyVal), 83 | } 84 | 85 | impl DVars { 86 | pub fn new_tuple(args: Vec) -> Self { 87 | DVars::Tuple(DVarTuple::new(args)) 88 | } 89 | pub fn new_key_val(key: Vec, value: Vec) -> Self { 90 | DVars::KeyVal(DVarKeyVal { 91 | key: key.into_iter().map(|ident| DVar::new(ident)).collect(), 92 | value: value.into_iter().map(|ident| DVar::new(ident)).collect(), 93 | }) 94 | } 95 | } 96 | 97 | /// A type that matches some `DVars`. 98 | #[derive(Debug)] 99 | pub(crate) enum DVarTypes { 100 | Tuple(Vec), 101 | KeyVal { 102 | key: Vec, 103 | value: Vec, 104 | }, 105 | } 106 | 107 | impl std::convert::From> for DVarTypes { 108 | fn from(types: Vec) -> Self { 109 | DVarTypes::Tuple(types) 110 | } 111 | } 112 | 113 | /// A Datafrog relation. 114 | #[derive(Debug)] 115 | pub(crate) struct RelationDecl { 116 | pub var: Variable, 117 | pub typ: Vec, 118 | } 119 | 120 | /// A Datafrog variable. 121 | /// 122 | /// For example, `rule` in the following: 123 | /// ```ignore 124 | /// let rule = iteration.variable::<(u32, u32)>("rule"); 125 | /// ``` 126 | #[derive(Debug)] 127 | pub(crate) struct VariableDecl { 128 | pub var: Variable, 129 | /// The type by shape must match `DVarKeyVal`. 130 | pub typ: DVarTypes, 131 | pub is_output: bool, 132 | } 133 | 134 | /// A reference to a Datafrog relation or variable. 135 | #[derive(Debug, Clone)] 136 | pub(crate) struct Variable { 137 | pub name: syn::Ident, 138 | } 139 | 140 | impl Variable { 141 | pub fn with_counter(&self, counter: usize) -> Self { 142 | Self { 143 | name: syn::Ident::new( 144 | &format!("{}_{}", self.name, counter), 145 | proc_macro2::Span::call_site(), 146 | ), 147 | } 148 | } 149 | } 150 | 151 | /// An operation that reorders and potentially drops Datalog variables. 152 | /// 153 | /// It is encoded as a Datafrog `from_map`. 154 | #[derive(Debug)] 155 | pub(crate) struct ReorderOp { 156 | /// A variable into which we write the result. 157 | pub output: Variable, 158 | /// A variable from which we read the input. 159 | pub input: Variable, 160 | pub input_vars: DVars, 161 | pub output_vars: DVars, 162 | } 163 | 164 | /// An operation that evaluates the given expression and adds it as a last output variable. 165 | #[derive(Debug)] 166 | pub(crate) struct BindVarOp { 167 | /// A variable into which we write the result. 168 | pub output: Variable, 169 | /// A variable from which we read the input. 170 | pub input: Variable, 171 | /// Input variables that are copied to output and potentially used for evaluating `expr`. 172 | pub vars: DVarTuple, 173 | /// The expression whose result is bound to a new variable. 174 | pub expr: syn::Expr, 175 | } 176 | 177 | /// An operation that joins two variables. 178 | #[derive(Debug)] 179 | pub(crate) struct JoinOp { 180 | /// A variable into which we write the result. 181 | pub output: Variable, 182 | /// The first variable, which we use in join. 183 | pub input_first: Variable, 184 | /// The second variable, which we use in join. 185 | pub input_second: Variable, 186 | /// Datalog variables used for joining. 187 | pub key: DVarTuple, 188 | /// Datalog value variables from the first variable. 189 | pub value_first: DVarTuple, 190 | /// Datalog value variables from the second variable. 191 | pub value_second: DVarTuple, 192 | } 193 | 194 | /// An operation that removes facts from the variable that belong to the relation. 195 | #[derive(Debug)] 196 | pub(crate) struct AntiJoinOp { 197 | /// The variable into which we write the result. 198 | pub output: Variable, 199 | /// The variable from which we take facts. 200 | pub input_variable: Variable, 201 | /// The relation in which we check facts. 202 | pub input_relation: Variable, 203 | /// Datalog variables used for joining. 204 | pub key: DVarTuple, 205 | /// Datalog value variables from the variable. 206 | pub value: DVarTuple, 207 | } 208 | 209 | /// An operation that filters out facts. 210 | #[derive(Debug)] 211 | pub(crate) struct FilterOp { 212 | /// A variable which we want to filter. 213 | pub variable: Variable, 214 | pub vars: DVars, 215 | /// A boolean expression used for filtering. 216 | pub expr: syn::Expr, 217 | } 218 | 219 | /// An operation that inserts the relation into a variable. 220 | #[derive(Debug)] 221 | pub(crate) struct InsertOp { 222 | /// The variable into which we want to insert the relation. 223 | pub variable: Variable, 224 | /// The relation to be inserted. 225 | pub relation: Variable, 226 | } 227 | 228 | #[derive(Debug)] 229 | pub(crate) enum Operation { 230 | Reorder(ReorderOp), 231 | // BindVar(BindVarOp), 232 | Join(JoinOp), 233 | AntiJoin(AntiJoinOp), 234 | // Filter(FilterOp), 235 | Insert(InsertOp), 236 | } 237 | 238 | /// A Datafrog iteration. 239 | #[derive(Debug)] 240 | pub(crate) struct Iteration { 241 | /// Variables that are converted relations. 242 | relation_variables: HashMap, 243 | pub relations: OrderedMap, 244 | pub variables: OrderedMap, 245 | /// Operations performed before entering the iteration. 246 | pub pre_operations: Vec, 247 | /// Operations performed in the body of the iteration. 248 | pub body_operations: Vec, 249 | /// Operations performed after exiting the iteration. 250 | pub post_operations: Vec, 251 | } 252 | 253 | impl Iteration { 254 | pub fn new(relations: Vec, variables: Vec) -> Self { 255 | Self { 256 | relation_variables: HashMap::new(), 257 | relations: relations 258 | .into_iter() 259 | .map(|decl| (decl.var.name.clone(), decl)) 260 | .collect(), 261 | variables: variables 262 | .into_iter() 263 | .map(|decl| (decl.var.name.clone(), decl)) 264 | .collect(), 265 | pre_operations: Vec::new(), 266 | body_operations: Vec::new(), 267 | post_operations: Vec::new(), 268 | } 269 | } 270 | /// Convert a Datafrog relation to a Datafrog variable and return its identifier. 271 | pub fn convert_relation_to_variable(&mut self, variable: &Variable) -> Variable { 272 | if let Some(name) = self.relation_variables.get(&variable.name) { 273 | return self.variables[name].var.clone(); 274 | } 275 | let decl = &self.relations[&variable.name]; 276 | let variable_decl = VariableDecl { 277 | var: decl.var.with_counter(self.variables.len()), 278 | typ: decl.typ.clone().into(), 279 | is_output: false, 280 | }; 281 | let new_variable = variable_decl.var.clone(); 282 | self.relation_variables 283 | .insert(variable.name.clone(), new_variable.name.clone()); 284 | self.variables 285 | .insert(new_variable.name.clone(), variable_decl); 286 | self.pre_operations.push(Operation::Insert(InsertOp { 287 | variable: new_variable.clone(), 288 | relation: decl.var.clone(), 289 | })); 290 | new_variable 291 | } 292 | /// Get Datafrog variable that corresponds to the given predicate name. If 293 | /// we have only a relation, then convert it into a variable. 294 | pub fn get_or_convert_variable(&mut self, predicate: &syn::Ident) -> Variable { 295 | if let Some(variable) = self.get_relation_var(predicate) { 296 | // TODO: Avoid converting the same relation multiple times. 297 | self.convert_relation_to_variable(&variable) 298 | } else { 299 | self.get_variable(predicate) 300 | } 301 | } 302 | pub fn get_relation_var(&self, variable_name: &syn::Ident) -> Option { 303 | self.relations 304 | .get(variable_name) 305 | .map(|decl| decl.var.clone()) 306 | } 307 | pub fn get_variable(&self, variable_name: &syn::Ident) -> Variable { 308 | self.variables[variable_name].var.clone() 309 | } 310 | pub fn add_operation(&mut self, operation: Operation) { 311 | self.body_operations.push(operation); 312 | } 313 | pub fn get_variable_tuple_types(&self, variable: &Variable) -> Vec { 314 | let decl = &self.variables[&variable.name]; 315 | match &decl.typ { 316 | DVarTypes::Tuple(types) => types.clone(), 317 | DVarTypes::KeyVal { .. } => unreachable!(), 318 | } 319 | } 320 | pub fn create_key_val_variable( 321 | &mut self, 322 | variable: &Variable, 323 | key: Vec, 324 | value: Vec, 325 | ) -> Variable { 326 | self.create_variable(variable, DVarTypes::KeyVal { key, value }) 327 | } 328 | pub fn create_tuple_variable( 329 | &mut self, 330 | variable: &Variable, 331 | types: Vec, 332 | ) -> Variable { 333 | self.create_variable(variable, DVarTypes::Tuple(types)) 334 | } 335 | pub fn create_variable(&mut self, variable: &Variable, typ: DVarTypes) -> Variable { 336 | let variable_decl = VariableDecl { 337 | var: variable.with_counter(self.variables.len()), 338 | typ: typ, 339 | is_output: false, 340 | }; 341 | let new_variable = variable_decl.var.clone(); 342 | self.variables 343 | .insert(new_variable.name.clone(), variable_decl); 344 | new_variable 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /src/generator_new/encode.rs: -------------------------------------------------------------------------------- 1 | use crate::ast; 2 | use crate::generator_new::ast as gen; 3 | 4 | /// Divide the arguments into three sets: 5 | /// 6 | /// 1. `key` – the arguments that are common in `first` and `second`. 7 | /// 2. `first_remainder` – the arguments that are unique in `first`. 8 | /// 3. `second_remainder` – the arguments that are unique in `second`. 9 | fn common_args( 10 | first: &Vec, 11 | first_types: &Vec, 12 | second: &Vec, 13 | second_types: &Vec, 14 | ) -> ( 15 | (Vec, Vec), 16 | (Vec, Vec), 17 | (Vec, Vec), 18 | ) { 19 | assert!(first.len() == first_types.len()); 20 | assert!(second.len() == second_types.len()); 21 | 22 | let mut key = Vec::new(); 23 | let mut key_types = Vec::new(); 24 | let mut first_remainder = Vec::new(); 25 | let mut first_remainder_types = Vec::new(); 26 | 27 | for (arg1, arg1_type) in first.iter().zip(first_types) { 28 | if arg1.is_wildcard() { 29 | continue; 30 | } 31 | let mut found = false; 32 | for arg2 in second { 33 | if arg1 == arg2 { 34 | key.push(arg1.clone()); 35 | key_types.push(arg1_type.clone()); 36 | found = true; 37 | break; 38 | } 39 | } 40 | if !found { 41 | first_remainder.push(arg1.clone()); 42 | first_remainder_types.push(arg1_type.clone()); 43 | } 44 | } 45 | let mut second_remainder = Vec::new(); 46 | let mut second_remainder_types = Vec::new(); 47 | for (arg2, arg2_type) in second.iter().zip(second_types) { 48 | if arg2.is_wildcard() { 49 | continue; 50 | } 51 | if !key.contains(arg2) { 52 | second_remainder.push(arg2.clone()); 53 | second_remainder_types.push(arg2_type.clone()); 54 | } 55 | } 56 | 57 | ( 58 | (key, key_types), 59 | (first_remainder, first_remainder_types), 60 | (second_remainder, second_remainder_types), 61 | ) 62 | } 63 | 64 | pub(crate) fn encode(program: ast::Program) -> gen::Iteration { 65 | let mut relations = Vec::new(); 66 | let mut variables = Vec::new(); 67 | for decl in program.decls.values() { 68 | let var = gen::Variable { 69 | name: decl.name.clone(), 70 | }; 71 | let typ = decl 72 | .parameters 73 | .iter() 74 | .map(|param| param.typ.clone()) 75 | .collect(); 76 | match decl.kind { 77 | ast::PredicateKind::Input => { 78 | relations.push(gen::RelationDecl { var: var, typ: typ }); 79 | } 80 | ast::PredicateKind::Internal => { 81 | variables.push(gen::VariableDecl { 82 | var: var, 83 | typ: gen::DVarTypes::Tuple(typ), 84 | is_output: false, 85 | }); 86 | } 87 | ast::PredicateKind::Output => { 88 | variables.push(gen::VariableDecl { 89 | var: var, 90 | typ: gen::DVarTypes::Tuple(typ), 91 | is_output: true, 92 | }); 93 | } 94 | } 95 | } 96 | let mut iteration = gen::Iteration::new(relations, variables); 97 | for rule in &program.rules { 98 | let head_variable = iteration.get_variable(&rule.head.predicate); 99 | let mut iter = rule.body.iter(); 100 | let literal1 = iter.next().unwrap(); 101 | assert!(!literal1.is_negated); 102 | let mut variable = iteration.get_or_convert_variable(&literal1.predicate); 103 | let mut args = literal1.args.clone(); 104 | 105 | while let Some(literal) = iter.next() { 106 | // TODO: Check during the typechecking phase that no literal has two 107 | // arguments with the same name. 108 | let (new_variable, new_args) = if literal.is_negated { 109 | encode_antijoin(&mut iteration, &head_variable, variable, args, literal) 110 | } else { 111 | encode_join(&mut iteration, &head_variable, variable, args, literal) 112 | }; 113 | variable = new_variable; 114 | args = new_args; 115 | } 116 | let reorder_op = gen::ReorderOp { 117 | output: head_variable, 118 | input: variable, 119 | input_vars: args.into(), 120 | output_vars: rule.head.args.clone().into(), 121 | }; 122 | iteration.add_operation(gen::Operation::Reorder(reorder_op)); 123 | } 124 | iteration 125 | } 126 | 127 | fn encode_antijoin( 128 | iteration: &mut gen::Iteration, 129 | head_variable: &gen::Variable, 130 | variable: gen::Variable, 131 | args: Vec, 132 | literal: &ast::Literal, 133 | ) -> (gen::Variable, Vec) { 134 | let relation_variable = iteration 135 | .get_relation_var(&literal.predicate) 136 | .expect("Negations are currently supported only on relations."); 137 | let arg_types = iteration.get_variable_tuple_types(&variable); 138 | let literal_arg_types = iteration.relations[&relation_variable.name].typ.clone(); 139 | 140 | // TODO: Lift this limitation. 141 | for arg in &literal.args { 142 | if !args.contains(arg) { 143 | unimplemented!("Currently all variables from the negated relation must be used."); 144 | } 145 | } 146 | let mut remainder = Vec::new(); 147 | let mut remainder_types = Vec::new(); 148 | for (arg, arg_type) in args.iter().zip(&arg_types) { 149 | if !literal.args.contains(arg) { 150 | remainder.push(arg.clone()); 151 | remainder_types.push(arg_type.clone()); 152 | } 153 | } 154 | 155 | let first_variable = iteration.create_key_val_variable( 156 | &variable, 157 | literal_arg_types.clone(), 158 | remainder_types.clone(), 159 | ); 160 | let reorder_first_op = gen::ReorderOp { 161 | output: first_variable.clone(), 162 | input: variable, 163 | input_vars: args.into(), 164 | output_vars: (literal.args.clone(), remainder.clone()).into(), 165 | }; 166 | iteration.add_operation(gen::Operation::Reorder(reorder_first_op)); 167 | 168 | let result_types = literal_arg_types 169 | .into_iter() 170 | .chain(remainder_types) 171 | .collect(); 172 | let args = literal 173 | .args 174 | .clone() 175 | .into_iter() 176 | .chain(remainder.clone()) 177 | .collect(); 178 | let variable = iteration.create_tuple_variable(&head_variable, result_types); 179 | let join_op = gen::AntiJoinOp { 180 | output: variable.clone(), 181 | input_variable: first_variable, 182 | input_relation: relation_variable, 183 | key: literal.args.clone().into(), 184 | value: remainder.into(), 185 | }; 186 | iteration.add_operation(gen::Operation::AntiJoin(join_op)); 187 | (variable, args) 188 | } 189 | 190 | fn encode_join( 191 | iteration: &mut gen::Iteration, 192 | head_variable: &gen::Variable, 193 | variable: gen::Variable, 194 | args: Vec, 195 | literal: &ast::Literal, 196 | ) -> (gen::Variable, Vec) { 197 | let joined_variable = iteration.get_or_convert_variable(&literal.predicate); 198 | let arg_types = iteration.get_variable_tuple_types(&variable); 199 | let literal_arg_types = iteration.get_variable_tuple_types(&joined_variable); 200 | let ((key, key_types), (remainder1, remainder1_types), (remainder2, remainder2_types)) = 201 | common_args(&args, &arg_types, &literal.args, &literal_arg_types); 202 | let first_variable = 203 | iteration.create_key_val_variable(&variable, key_types.clone(), remainder1_types.clone()); 204 | let reorder_first_op = gen::ReorderOp { 205 | output: first_variable.clone(), 206 | input: variable, 207 | input_vars: args.into(), 208 | output_vars: (key.clone(), remainder1.clone()).into(), 209 | }; 210 | iteration.add_operation(gen::Operation::Reorder(reorder_first_op)); 211 | let second_variable = iteration.create_key_val_variable( 212 | &joined_variable, 213 | key_types.clone(), 214 | remainder2_types.clone(), 215 | ); 216 | let reorder_second_op = gen::ReorderOp { 217 | output: second_variable.clone(), 218 | input: joined_variable, 219 | input_vars: literal.args.clone().into(), 220 | output_vars: (key.clone(), remainder2.clone()).into(), 221 | }; 222 | iteration.add_operation(gen::Operation::Reorder(reorder_second_op)); 223 | let result_types = key_types 224 | .into_iter() 225 | .chain(remainder1_types) 226 | .chain(remainder2_types) 227 | .collect(); 228 | let args = key 229 | .clone() 230 | .into_iter() 231 | .chain(remainder1.clone()) 232 | .chain(remainder2.clone()) 233 | .collect(); 234 | let variable = iteration.create_tuple_variable(&head_variable, result_types); 235 | let join_op = gen::JoinOp { 236 | output: variable.clone(), 237 | input_first: first_variable, 238 | input_second: second_variable, 239 | key: key.into(), 240 | value_first: remainder1.into(), 241 | value_second: remainder2.into(), 242 | }; 243 | iteration.add_operation(gen::Operation::Join(join_op)); 244 | (variable, args) 245 | } 246 | 247 | impl std::convert::From> for gen::DVars { 248 | fn from(args: Vec) -> Self { 249 | gen::DVars::new_tuple(args.into_iter().map(|arg| arg.to_ident()).collect()) 250 | } 251 | } 252 | 253 | impl std::convert::From> for gen::DVarTuple { 254 | fn from(args: Vec) -> Self { 255 | gen::DVarTuple::new(args.into_iter().map(|arg| arg.to_ident()).collect()) 256 | } 257 | } 258 | 259 | impl std::convert::From<(Vec, Vec)> for gen::DVars { 260 | fn from((key, value): (Vec, Vec)) -> Self { 261 | gen::DVars::new_key_val( 262 | key.into_iter().map(|arg| arg.to_ident()).collect(), 263 | value.into_iter().map(|arg| arg.to_ident()).collect(), 264 | ) 265 | } 266 | } 267 | 268 | impl std::convert::From> for gen::DVars { 269 | fn from(args: Vec) -> Self { 270 | gen::DVars::new_tuple(args) 271 | } 272 | } 273 | 274 | #[cfg(test)] 275 | mod tests { 276 | use super::*; 277 | use crate::parser::parse; 278 | use crate::typechecker::typecheck; 279 | use proc_macro2::TokenStream; 280 | use quote::ToTokens; 281 | use std::str::FromStr; 282 | 283 | fn compare(datalog_source: &str, exptected_encoding: &str) { 284 | let parsed_program = parse(datalog_source); 285 | let typechecked_program = typecheck(parsed_program).unwrap(); 286 | let iteration = encode(typechecked_program); 287 | let tokens = iteration.to_token_stream().to_string(); 288 | eprintln!("{}", tokens); 289 | let expected_tokens = TokenStream::from_str(exptected_encoding).unwrap(); 290 | assert_eq!(tokens.to_string(), expected_tokens.to_string()); 291 | } 292 | 293 | #[test] 294 | fn encode_simple1() { 295 | compare( 296 | " 297 | input inp(x: u32, y: u32) 298 | output out(x: u32, y: u32) 299 | out(x, y) :- inp(y, x). 300 | ", 301 | r##" 302 | { 303 | let mut iteration = datafrog::Iteration::new(); 304 | let var_inp = datafrog::Relation:: <(u32, u32,)> ::from_vec(inp); 305 | let var_out = iteration.variable:: <(u32, u32,)>("out"); 306 | let var_inp_1 = iteration.variable:: <(u32, u32,)>("inp_1"); 307 | var_inp_1.insert(var_inp); 308 | while iteration.changed() { 309 | var_out.from_map(&var_inp_1, | &(y, x,)| (x, y,)); 310 | } 311 | out = var_out.complete(); 312 | } 313 | "##, 314 | ); 315 | } 316 | #[test] 317 | fn encode_transitive_closure() { 318 | compare( 319 | " 320 | input inp(x: u32, y: u32) 321 | output out(x: u32, y: u32) 322 | out(x, y) :- inp(x, y). 323 | out(x, y) :- out(x, z), out(z, y). 324 | ", 325 | r##" 326 | { 327 | let mut iteration = datafrog::Iteration::new(); 328 | let var_inp = datafrog::Relation:: <(u32, u32,)> ::from_vec(inp); 329 | let var_out = iteration.variable:: <(u32, u32,)>("out"); 330 | let var_inp_1 = iteration.variable:: <(u32, u32,)>("inp_1"); 331 | let var_out_2 = iteration.variable:: <((u32,), (u32,))>("out_2"); 332 | let var_out_3 = iteration.variable:: <((u32,), (u32,))>("out_3"); 333 | let var_out_4 = iteration.variable:: <(u32, u32, u32,)>("out_4"); 334 | var_inp_1.insert(var_inp); 335 | while iteration.changed() { 336 | var_out.from_map(&var_inp_1, | &(x, y,)| (x, y,)); 337 | var_out_2.from_map(&var_out, | &(x, z,)| ((z,), (x,))); 338 | var_out_3.from_map(&var_out, | &(z, y,)| ((z,), (y,))); 339 | var_out_4.from_join(&var_out_2, &var_out_3, | &(z,), &(x,), &(y,)| (z, x, y,)); 340 | var_out.from_map(&var_out_4, | &(z, x, y,)| (x, y,)); 341 | } 342 | out = var_out.complete(); 343 | } 344 | "##, 345 | ); 346 | } 347 | #[test] 348 | fn encode_rule_with_wildcards() { 349 | compare( 350 | " 351 | input inp(x: u32, y: u32) 352 | output out(x: u32) 353 | out(x) :- inp(x, _), inp(_, x). 354 | ", 355 | r##" 356 | { 357 | let mut iteration = datafrog::Iteration::new(); 358 | let var_inp = datafrog::Relation:: <(u32, u32,)> ::from_vec(inp); 359 | let var_out = iteration.variable:: <(u32,)>("out"); 360 | let var_inp_1 = iteration.variable:: <(u32, u32,)>("inp_1"); 361 | let var_inp_1_2 = iteration.variable:: <((u32,), ())>("inp_1_2"); 362 | let var_inp_1_3 = iteration.variable:: <((u32,), ())>("inp_1_3"); 363 | let var_out_4 = iteration.variable:: <(u32,)>("out_4"); 364 | var_inp_1.insert(var_inp); 365 | while iteration.changed() { 366 | var_inp_1_2.from_map(&var_inp_1, | &(x, _,)| ((x,), ())); 367 | var_inp_1_3.from_map(&var_inp_1, | &(_, x,)| ((x,), ())); 368 | var_out_4.from_join(&var_inp_1_2, &var_inp_1_3, | &(x,), &(), &()| (x,)); 369 | var_out.from_map(&var_out_4, | &(x,)| (x,)); 370 | } 371 | out = var_out.complete(); 372 | } 373 | "##, 374 | ); 375 | } 376 | #[test] 377 | fn encode_kill() { 378 | compare( 379 | " 380 | input inp(x: u32, y: u32) 381 | input kill(y: u32) 382 | output out(x: u32, y: u32) 383 | out(x, y) :- inp(x, y), !kill(y). 384 | ", 385 | r##" 386 | { 387 | let mut iteration = datafrog::Iteration::new(); 388 | let var_inp = datafrog::Relation:: <(u32, u32,)> ::from_vec(inp); 389 | let var_kill = datafrog::Relation:: <(u32,)> ::from_vec(kill); 390 | let var_out = iteration.variable:: <(u32, u32,)>("out"); 391 | let var_inp_1 = iteration.variable:: <(u32, u32,)>("inp_1"); 392 | let var_inp_1_2 = iteration.variable:: <((u32,), (u32,))>("inp_1_2"); 393 | let var_out_3 = iteration.variable:: <(u32, u32,)>("out_3"); 394 | var_inp_1.insert(var_inp); 395 | while iteration.changed() { 396 | var_inp_1_2.from_map(&var_inp_1, | &(x, y,)| ((y,), (x,))); 397 | var_out_3.from_antijoin(&var_inp_1_2, &var_kill, | &(y,), &(x,)| (y, x,)); 398 | var_out.from_map(&var_out_3, | &(y, x,)| (x, y,)); 399 | } 400 | out = var_out.complete(); 401 | } 402 | "##, 403 | ); 404 | } 405 | } 406 | -------------------------------------------------------------------------------- /src/generator_new/mod.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::TokenStream; 2 | use quote::{quote, ToTokens}; 3 | 4 | mod ast; 5 | mod encode; 6 | mod to_tokens; 7 | 8 | pub fn generate_datafrog(input: TokenStream) -> TokenStream { 9 | let parsed_program = match syn::parse2(input) { 10 | Ok(program) => program, 11 | Err(err) => { 12 | let tokens = TokenStream::from(err.to_compile_error()); 13 | return quote! { {#tokens }}; 14 | } 15 | }; 16 | let typechecked_program = match crate::typechecker::typecheck(parsed_program) { 17 | Ok(program) => program, 18 | Err(err) => { 19 | let tokens = TokenStream::from(err.to_syn_error().to_compile_error()); 20 | return quote! { {#tokens }}; 21 | } 22 | }; 23 | let encoded_program = encode::encode(typechecked_program); 24 | encoded_program.to_token_stream() 25 | } 26 | -------------------------------------------------------------------------------- /src/generator_new/to_tokens.rs: -------------------------------------------------------------------------------- 1 | use crate::generator_new::ast::*; 2 | use proc_macro2::{Span, TokenStream}; 3 | use quote::quote; 4 | use quote::ToTokens; 5 | 6 | fn type_vec_to_tokens(type_vec: &Vec) -> TokenStream { 7 | let mut type_tokens = TokenStream::new(); 8 | for typ in type_vec { 9 | type_tokens.extend(quote! {#typ,}); 10 | } 11 | type_tokens 12 | } 13 | 14 | fn var_vec_to_tokens(var_vec: &Vec) -> TokenStream { 15 | let mut var_tokens = TokenStream::new(); 16 | for var in var_vec { 17 | var_tokens.extend(quote! {#var,}); 18 | } 19 | var_tokens 20 | } 21 | 22 | impl ToTokens for DVar { 23 | fn to_tokens(&self, tokens: &mut TokenStream) { 24 | let name = &self.name; 25 | tokens.extend(quote! {#name}); 26 | } 27 | } 28 | 29 | impl ToTokens for DVarTuple { 30 | fn to_tokens(&self, tokens: &mut TokenStream) { 31 | let vars = var_vec_to_tokens(&self.vars); 32 | tokens.extend(quote! {(#vars)}); 33 | } 34 | } 35 | 36 | impl ToTokens for DVarKeyVal { 37 | fn to_tokens(&self, tokens: &mut TokenStream) { 38 | let key = var_vec_to_tokens(&self.key); 39 | let value = var_vec_to_tokens(&self.value); 40 | tokens.extend(quote! {((#key), (#value))}); 41 | } 42 | } 43 | 44 | impl ToTokens for DVars { 45 | fn to_tokens(&self, tokens: &mut TokenStream) { 46 | match self { 47 | DVars::Tuple(tuple) => tuple.to_tokens(tokens), 48 | DVars::KeyVal(key_val) => key_val.to_tokens(tokens), 49 | } 50 | } 51 | } 52 | 53 | impl ToTokens for DVarTypes { 54 | fn to_tokens(&self, tokens: &mut TokenStream) { 55 | match self { 56 | DVarTypes::Tuple(types) => { 57 | let type_tokens = type_vec_to_tokens(types); 58 | tokens.extend(quote! {(#type_tokens)}); 59 | } 60 | DVarTypes::KeyVal { key, value } => { 61 | let key_tokens = type_vec_to_tokens(key); 62 | let value_tokens = type_vec_to_tokens(value); 63 | tokens.extend(quote! {((#key_tokens), (#value_tokens))}); 64 | } 65 | } 66 | } 67 | } 68 | 69 | impl ToTokens for Variable { 70 | fn to_tokens(&self, tokens: &mut TokenStream) { 71 | let var_name = format!("var_{}", self.name); 72 | let ident = syn::Ident::new(&var_name, Span::call_site()); 73 | tokens.extend(quote! {#ident}); 74 | } 75 | } 76 | 77 | impl ToTokens for ReorderOp { 78 | fn to_tokens(&self, tokens: &mut TokenStream) { 79 | let ReorderOp { 80 | output, 81 | input, 82 | input_vars, 83 | output_vars, 84 | } = self; 85 | tokens.extend(quote! { 86 | #output.from_map(&#input, |&#input_vars| #output_vars); 87 | }); 88 | } 89 | } 90 | 91 | impl ToTokens for BindVarOp { 92 | fn to_tokens(&self, _tokens: &mut TokenStream) { 93 | unimplemented!(); 94 | } 95 | } 96 | 97 | impl ToTokens for JoinOp { 98 | fn to_tokens(&self, tokens: &mut TokenStream) { 99 | let JoinOp { 100 | output, 101 | input_first, 102 | input_second, 103 | key, 104 | value_first, 105 | value_second, 106 | } = self; 107 | let flattened = DVarTuple { 108 | vars: key 109 | .vars 110 | .iter() 111 | .chain(&value_first.vars) 112 | .chain(&value_second.vars) 113 | .cloned() 114 | .collect(), 115 | }; 116 | 117 | tokens.extend(quote! { 118 | #output.from_join( 119 | &#input_first, 120 | &#input_second, 121 | |&#key, &#value_first, &#value_second| #flattened); 122 | }); 123 | } 124 | } 125 | 126 | impl ToTokens for AntiJoinOp { 127 | fn to_tokens(&self, tokens: &mut TokenStream) { 128 | let AntiJoinOp { 129 | output, 130 | input_variable, 131 | input_relation, 132 | key, 133 | value, 134 | } = self; 135 | let flattened = DVarTuple { 136 | vars: key.vars.iter().chain(&value.vars).cloned().collect(), 137 | }; 138 | 139 | tokens.extend(quote! { 140 | #output.from_antijoin( 141 | &#input_variable, 142 | &#input_relation, 143 | |&#key, &#value| #flattened); 144 | }); 145 | } 146 | } 147 | 148 | impl ToTokens for FilterOp { 149 | fn to_tokens(&self, _tokens: &mut TokenStream) { 150 | unimplemented!(); 151 | } 152 | } 153 | 154 | impl ToTokens for InsertOp { 155 | fn to_tokens(&self, tokens: &mut TokenStream) { 156 | let InsertOp { variable, relation } = self; 157 | tokens.extend(quote! { 158 | #variable.insert(#relation); 159 | }); 160 | } 161 | } 162 | 163 | impl ToTokens for Operation { 164 | fn to_tokens(&self, tokens: &mut TokenStream) { 165 | match self { 166 | Operation::Reorder(op) => op.to_tokens(tokens), 167 | // Operation::BindVar(op) => op.to_tokens(tokens), 168 | Operation::Join(op) => op.to_tokens(tokens), 169 | Operation::AntiJoin(op) => op.to_tokens(tokens), 170 | // Operation::Filter(op) => op.to_tokens(tokens), 171 | Operation::Insert(op) => op.to_tokens(tokens), 172 | } 173 | } 174 | } 175 | 176 | fn operation_vec_to_tokens(operations: &Vec) -> TokenStream { 177 | let mut tokens = TokenStream::new(); 178 | for operation in operations { 179 | operation.to_tokens(&mut tokens); 180 | } 181 | tokens 182 | } 183 | 184 | impl ToTokens for Iteration { 185 | fn to_tokens(&self, tokens: &mut TokenStream) { 186 | let mut declare_relations = TokenStream::new(); 187 | for relation in self.relations.values() { 188 | let vec_name = &relation.var.name; 189 | let var = relation.var.to_token_stream(); 190 | let typ = type_vec_to_tokens(&relation.typ); 191 | declare_relations.extend(quote! { 192 | let #var = datafrog::Relation::<(#typ)>::from_vec(#vec_name); 193 | }); 194 | } 195 | let mut declare_variables = TokenStream::new(); 196 | let mut output_results = TokenStream::new(); 197 | for variable in self.variables.values() { 198 | let var = variable.var.to_token_stream(); 199 | let var_name = variable.var.name.to_string(); 200 | let typ = variable.typ.to_token_stream(); 201 | declare_variables.extend(quote! { 202 | let #var = iteration.variable::<#typ>(#var_name); 203 | }); 204 | if variable.is_output { 205 | let new_var = &variable.var.name; 206 | output_results.extend(quote! { 207 | #new_var = #var.complete(); 208 | }); 209 | } 210 | } 211 | let pre_operations = operation_vec_to_tokens(&self.pre_operations); 212 | let body_operations = operation_vec_to_tokens(&self.body_operations); 213 | let post_operations = operation_vec_to_tokens(&self.post_operations); 214 | tokens.extend(quote! { 215 | { 216 | let mut iteration = datafrog::Iteration::new(); 217 | #declare_relations 218 | #declare_variables 219 | #pre_operations 220 | while iteration.changed() { 221 | #body_operations 222 | } 223 | #post_operations 224 | #output_results 225 | } 226 | }); 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate log; 3 | 4 | mod ast; 5 | mod data_structures; 6 | mod generator; 7 | mod generator_new; 8 | mod parser; 9 | mod typechecker; 10 | 11 | pub use generator::generate_skeleton_datafrog; 12 | pub use generator_new::generate_datafrog; 13 | -------------------------------------------------------------------------------- /src/parser/ast.rs: -------------------------------------------------------------------------------- 1 | //! This file contains the parse AST. 2 | 3 | use proc_macro2::Ident; 4 | use std::fmt; 5 | 6 | pub(crate) use crate::ast::{ParamDecl, PredicateDecl, PredicateKind, RuleHead}; 7 | 8 | /// A positional argument `arg2`. 9 | #[derive(Debug, Clone)] 10 | pub(crate) enum PositionalArg { 11 | Ident(Ident), 12 | Wildcard, 13 | } 14 | 15 | impl fmt::Display for PositionalArg { 16 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 17 | match self { 18 | PositionalArg::Ident(ident) => write!(f, "{}", ident), 19 | PositionalArg::Wildcard => write!(f, "_"), 20 | } 21 | } 22 | } 23 | 24 | /// A named argument `.param2=arg2`. 25 | #[derive(Debug, Clone)] 26 | pub(crate) struct NamedArg { 27 | pub param: Ident, 28 | pub arg: Ident, 29 | } 30 | 31 | /// The list of atom's arguments. 32 | #[derive(Debug, Clone)] 33 | pub(crate) enum ArgList { 34 | /// arg1, arg2 35 | Positional(Vec), 36 | /// .param1=arg1, .param2=arg2 37 | Named(Vec), 38 | } 39 | 40 | impl fmt::Display for ArgList { 41 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 42 | let mut first = true; 43 | match self { 44 | ArgList::Positional(args) => { 45 | for arg in args { 46 | if first { 47 | first = false; 48 | } else { 49 | write!(f, ", ")?; 50 | } 51 | write!(f, "{}", arg)?; 52 | } 53 | } 54 | ArgList::Named(args) => { 55 | for kwarg in args { 56 | if first { 57 | first = false; 58 | } else { 59 | write!(f, ", ")?; 60 | } 61 | write!(f, ".{}={}", kwarg.param, kwarg.arg)?; 62 | } 63 | } 64 | } 65 | Ok(()) 66 | } 67 | } 68 | 69 | /// A richer type of atom, which can be negated, and used as 70 | /// premises/hypotheses in rules. 71 | #[derive(Debug, Clone)] 72 | pub(crate) struct Literal { 73 | pub is_negated: bool, 74 | pub predicate: Ident, 75 | pub args: ArgList, 76 | } 77 | 78 | impl fmt::Display for Literal { 79 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 80 | if self.is_negated { 81 | write!(f, "!")?; 82 | } 83 | write!(f, "{}({})", self.predicate, self.args) 84 | } 85 | } 86 | 87 | /// A rule describing how to compute facts. 88 | /// 89 | /// ```plain 90 | /// Internal(x, y) :- Input(x, y). 91 | /// ``` 92 | #[derive(Debug, Clone)] 93 | pub(crate) struct Rule { 94 | pub head: RuleHead, 95 | pub body: Vec, 96 | } 97 | 98 | impl fmt::Display for Rule { 99 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 100 | write!(f, "{} :- ", self.head)?; 101 | let mut first = true; 102 | for literal in &self.body { 103 | if first { 104 | first = false; 105 | } else { 106 | write!(f, ", ")?; 107 | } 108 | write!(f, "{}", literal)?; 109 | } 110 | write!(f, ".") 111 | } 112 | } 113 | 114 | /// Items present in the program. 115 | #[derive(Debug, Clone)] 116 | pub(crate) enum ProgramItem { 117 | PredicateDecl(PredicateDecl), 118 | Rule(Rule), 119 | } 120 | 121 | impl fmt::Display for ProgramItem { 122 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 123 | match self { 124 | ProgramItem::PredicateDecl(decl) => write!(f, "{}", decl), 125 | ProgramItem::Rule(rule) => write!(f, "{}", rule), 126 | } 127 | } 128 | } 129 | 130 | /// A Datalog program. 131 | #[derive(Debug, Clone)] 132 | pub(crate) struct Program { 133 | pub items: Vec, 134 | } 135 | 136 | impl fmt::Display for Program { 137 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 138 | for item in &self.items { 139 | writeln!(f, "{}", item)?; 140 | } 141 | Ok(()) 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Ident; 2 | use syn::parse::{Parse, ParseStream}; 3 | use syn::{parenthesized, punctuated::Punctuated, Token}; 4 | 5 | pub(crate) mod ast; 6 | 7 | mod kw { 8 | syn::custom_keyword!(internal); 9 | syn::custom_keyword!(input); 10 | syn::custom_keyword!(output); 11 | } 12 | 13 | impl Parse for ast::PredicateKind { 14 | fn parse(input: ParseStream) -> syn::Result { 15 | let lookahead = input.lookahead1(); 16 | if lookahead.peek(kw::input) { 17 | input.parse::()?; 18 | Ok(ast::PredicateKind::Input) 19 | } else if lookahead.peek(kw::internal) { 20 | input.parse::()?; 21 | Ok(ast::PredicateKind::Internal) 22 | } else if lookahead.peek(kw::output) { 23 | input.parse::()?; 24 | Ok(ast::PredicateKind::Output) 25 | } else { 26 | Err(lookahead.error()) 27 | } 28 | } 29 | } 30 | 31 | impl Parse for ast::ParamDecl { 32 | fn parse(input: ParseStream) -> syn::Result { 33 | let name = input.parse()?; 34 | input.parse::()?; 35 | let typ = input.parse()?; 36 | Ok(ast::ParamDecl { name, typ }) 37 | } 38 | } 39 | 40 | impl Parse for ast::PredicateDecl { 41 | fn parse(input: ParseStream) -> syn::Result { 42 | let kind = input.parse()?; 43 | let name = input.parse()?; 44 | let content; 45 | parenthesized!(content in input); 46 | let parsed_content: Punctuated = 47 | content.parse_terminated(ast::ParamDecl::parse)?; 48 | let parameters = parsed_content 49 | .into_pairs() 50 | .map(|pair| pair.into_value()) 51 | .collect(); 52 | Ok(ast::PredicateDecl { 53 | kind, 54 | name, 55 | parameters, 56 | }) 57 | } 58 | } 59 | 60 | impl Parse for ast::NamedArg { 61 | fn parse(input: ParseStream) -> syn::Result { 62 | input.parse::()?; 63 | let param: Ident = input.parse()?; 64 | input.parse::()?; 65 | let arg: Ident = input.parse()?; 66 | Ok(ast::NamedArg { param, arg }) 67 | } 68 | } 69 | 70 | impl Parse for ast::PositionalArg { 71 | fn parse(input: ParseStream) -> syn::Result { 72 | if input.peek(Token![_]) { 73 | input.parse::()?; 74 | Ok(ast::PositionalArg::Wildcard) 75 | } else { 76 | let ident = input.parse()?; 77 | Ok(ast::PositionalArg::Ident(ident)) 78 | } 79 | } 80 | } 81 | 82 | impl Parse for ast::ArgList { 83 | fn parse(input: ParseStream) -> syn::Result { 84 | let content; 85 | parenthesized!(content in input); 86 | if content.peek(Token![.]) { 87 | let punctuated: Punctuated = 88 | content.parse_terminated(ast::NamedArg::parse)?; 89 | let args = punctuated 90 | .into_pairs() 91 | .map(|pair| pair.into_value()) 92 | .collect(); 93 | Ok(ast::ArgList::Named(args)) 94 | } else { 95 | let punctuated: Punctuated = 96 | content.parse_terminated(ast::PositionalArg::parse)?; 97 | let args = punctuated 98 | .into_pairs() 99 | .map(|pair| pair.into_value()) 100 | .collect(); 101 | Ok(ast::ArgList::Positional(args)) 102 | } 103 | } 104 | } 105 | 106 | impl Parse for ast::Literal { 107 | fn parse(input: ParseStream) -> syn::Result { 108 | let is_negated = input.peek(Token![!]); 109 | if is_negated { 110 | input.parse::()?; 111 | } 112 | let predicate = input.parse()?; 113 | let args = input.parse()?; 114 | Ok(ast::Literal { 115 | is_negated, 116 | predicate, 117 | args, 118 | }) 119 | } 120 | } 121 | 122 | impl Parse for ast::RuleHead { 123 | fn parse(input: ParseStream) -> syn::Result { 124 | let predicate = input.parse()?; 125 | let content; 126 | parenthesized!(content in input); 127 | let punctuated: Punctuated = content.parse_terminated(Ident::parse)?; 128 | let args = punctuated 129 | .into_pairs() 130 | .map(|pair| pair.into_value()) 131 | .collect(); 132 | Ok(ast::RuleHead { predicate, args }) 133 | } 134 | } 135 | 136 | impl Parse for ast::Rule { 137 | fn parse(input: ParseStream) -> syn::Result { 138 | let head = input.parse()?; 139 | // FIXME: For some reason, when getting input from a procedural macro, 140 | // a space is always inserted between `:` and `-`. Therefore, the parser 141 | // needs to accept the variant with a space. 142 | input.parse::()?; 143 | input.parse::()?; 144 | // input.step(|cursor| { 145 | // let rest = match cursor.token_tree() { 146 | // Some((proc_macro2::TokenTree::Punct(ref punct), next)) 147 | // if punct.as_char() == ':' && punct.spacing() == proc_macro2::Spacing::Joint => 148 | // { 149 | // next 150 | // } 151 | // _ => return Err(cursor.error(":- expected")), 152 | // }; 153 | // match rest.token_tree() { 154 | // Some((proc_macro2::TokenTree::Punct(ref punct), next)) 155 | // if punct.as_char() == '-' => 156 | // { 157 | // Ok(((), next)) 158 | // } 159 | // _ => Err(cursor.error(":- expected")), 160 | // } 161 | // })?; 162 | let body: Punctuated = 163 | Punctuated::parse_separated_nonempty(input)?; 164 | // Allow trailing punctuation. 165 | if input.peek(Token![,]) { 166 | input.parse::()?; 167 | } 168 | input.parse::()?; 169 | Ok(ast::Rule { 170 | head, 171 | body: body.into_pairs().map(|pair| pair.into_value()).collect(), 172 | }) 173 | } 174 | } 175 | 176 | impl Parse for ast::Program { 177 | fn parse(input: ParseStream) -> syn::Result { 178 | let mut items = Vec::new(); 179 | while !input.is_empty() { 180 | let lookahead = input.lookahead1(); 181 | if lookahead.peek(kw::internal) 182 | || lookahead.peek(kw::input) 183 | || lookahead.peek(kw::output) 184 | { 185 | let decl: ast::PredicateDecl = input.parse()?; 186 | items.push(ast::ProgramItem::PredicateDecl(decl)); 187 | } else { 188 | let rule: ast::Rule = input.parse()?; 189 | items.push(ast::ProgramItem::Rule(rule)); 190 | } 191 | } 192 | Ok(ast::Program { items }) 193 | } 194 | } 195 | 196 | /// Parse a Datalog program. 197 | pub(crate) fn parse(text: &str) -> ast::Program { 198 | info!("parse text: {}", text); 199 | match syn::parse_str(text) { 200 | Ok(program) => program, 201 | Err(err) => panic!("Error: {:?} (at {:?})", err, err.span().start()), 202 | } 203 | } 204 | 205 | #[cfg(test)] 206 | mod tests { 207 | use super::*; 208 | 209 | #[test] 210 | fn parse_relation_decl1() { 211 | let program = parse("input P ( x: u32 , y: u64)"); 212 | assert_eq!(program.items.len(), 1); 213 | assert_eq!(program.to_string(), "input P(x: u32, y: u64)\n"); 214 | } 215 | 216 | #[test] 217 | fn parse_relation_decl2() { 218 | let program = parse("internal P ( x: u32 , y: u64,)"); 219 | assert_eq!(program.items.len(), 1); 220 | assert_eq!(program.to_string(), "internal P(x: u32, y: u64)\n"); 221 | let program = parse("output P ( )"); 222 | assert_eq!(program.items.len(), 1); 223 | assert_eq!(program.to_string(), "output P()\n"); 224 | } 225 | 226 | #[test] 227 | fn parse_rule1() { 228 | let program = parse("P ( x , y,) :- Q( x, y), O(y, x) ."); 229 | assert_eq!(program.items.len(), 1); 230 | assert_eq!(program.to_string(), "P(x, y) :- Q(x, y), O(y, x).\n"); 231 | } 232 | 233 | #[test] 234 | fn parse_rule2() { 235 | let program = parse("P ( x , y,) :- Q( x, y), O(y, _) ."); 236 | assert_eq!(program.items.len(), 1); 237 | assert_eq!(program.to_string(), "P(x, y) :- Q(x, y), O(y, _).\n"); 238 | } 239 | 240 | #[test] 241 | fn parse_rule_trailing_comma() { 242 | let program = parse("P ( x , y,) :- Q( x, y), O(y, x) ."); 243 | assert_eq!(program.items.len(), 1); 244 | assert_eq!(program.to_string(), "P(x, y) :- Q(x, y), O(y, x).\n"); 245 | } 246 | 247 | #[test] 248 | fn parse_valid_datalog() { 249 | let program = parse( 250 | " 251 | input E(x: u32, y: u64) 252 | internal P(x: u32, y: u64) 253 | P(x, y) :- E(x, y). 254 | P(x, z) :- E(x, y), P(y, z). 255 | ", 256 | ); 257 | assert_eq!(program.items.len(), 4); 258 | assert_eq!("input E(x: u32, y: u64)", program.items[0].to_string()); 259 | assert_eq!("internal P(x: u32, y: u64)", program.items[1].to_string()); 260 | assert_eq!("P(x, y) :- E(x, y).", program.items[2].to_string()); 261 | assert_eq!("P(x, z) :- E(x, y), P(y, z).", program.items[3].to_string()); 262 | } 263 | 264 | #[test] 265 | fn parse_named_args() { 266 | let program = parse( 267 | " 268 | internal P(x: u32, y: u64) 269 | p(x, y) :- e(.field1 = x, .field2 = y). 270 | ", 271 | ); 272 | assert_eq!("internal P(x: u32, y: u64)", program.items[0].to_string()); 273 | assert_eq!( 274 | "p(x, y) :- e(.field1=x, .field2=y).", 275 | program.items[1].to_string() 276 | ); 277 | } 278 | 279 | #[test] 280 | fn parse_multiline_datalog() { 281 | let text = r#" 282 | subset(O1, O2, P) :- outlives(O1, O2, P). 283 | subset(O1, O3, P) :- subset(O1, O2, P), subset(O2, O3, P). 284 | subset(O1, O2, Q) :- subset(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 285 | requires(O, L, P) :- borrow_region(O, L, P). 286 | requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 287 | requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 288 | borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 289 | errors(L, P) :- invalidates(L, P), borrow_live_at(L, P)."#; 290 | 291 | let program = parse(text); 292 | let serialized = program 293 | .items 294 | .into_iter() 295 | .map(|item| item.to_string()) 296 | .collect::>() 297 | .join("\n"); 298 | 299 | let expected = r#"subset(O1, O2, P) :- outlives(O1, O2, P). 300 | subset(O1, O3, P) :- subset(O1, O2, P), subset(O2, O3, P). 301 | subset(O1, O2, Q) :- subset(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 302 | requires(O, L, P) :- borrow_region(O, L, P). 303 | requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 304 | requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 305 | borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 306 | errors(L, P) :- invalidates(L, P), borrow_live_at(L, P)."#; 307 | assert_eq!(expected, serialized); 308 | } 309 | 310 | #[test] 311 | fn parse_multiline_datalog_with_comments() { 312 | let text = r#" 313 | // `subset` rules 314 | subset(O1, O2, P) :- outlives(O1, O2, P). 315 | 316 | subset(O1, O3, P) :- subset(O1, O2, P), 317 | subset(O2, O3, P). 318 | subset(O1, O2, Q) :- 319 | subset(O1, O2, P), 320 | cfg_edge(P, Q), 321 | region_live_at(O1, Q), 322 | region_live_at(O2, Q). 323 | 324 | // `requires` rules 325 | requires(O, L, P) :- borrow_region(O, L, P). 326 | 327 | requires(O2, L, P) :- 328 | requires(O1, L, P),subset(O1, O2, P). 329 | 330 | requires(O, L, Q) :- 331 | requires(O, L, P), 332 | !killed(L, P), cfg_edge(P, Q), 333 | region_live_at(O, Q). 334 | 335 | // this one is commented out, nope(N, O, P, E) :- open(O, P, E, N). 336 | 337 | borrow_live_at(L, P) :- 338 | requires(O, L, P), 339 | region_live_at(O, P). 340 | 341 | errors(L, P) :- 342 | invalidates(L, P), 343 | borrow_live_at(L, P)."#; 344 | 345 | let items = parse(&text).items; 346 | 347 | let serialized = items 348 | .into_iter() 349 | .map(|rule| rule.to_string()) 350 | .collect::>() 351 | .join("\n"); 352 | 353 | let expected = r#"subset(O1, O2, P) :- outlives(O1, O2, P). 354 | subset(O1, O3, P) :- subset(O1, O2, P), subset(O2, O3, P). 355 | subset(O1, O2, Q) :- subset(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 356 | requires(O, L, P) :- borrow_region(O, L, P). 357 | requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 358 | requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 359 | borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 360 | errors(L, P) :- invalidates(L, P), borrow_live_at(L, P)."#; 361 | assert_eq!(expected, serialized); 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /src/typechecker.rs: -------------------------------------------------------------------------------- 1 | use crate::ast; 2 | use crate::data_structures::OrderedMap; 3 | use crate::parser::ast as past; 4 | use proc_macro2::Span; 5 | use std::collections::{HashMap, HashSet}; 6 | use std::fmt; 7 | 8 | #[derive(Debug)] 9 | pub struct Error { 10 | pub msg: String, 11 | pub span: Span, 12 | pub hint: Option<(String, Span)>, 13 | } 14 | 15 | impl Error { 16 | fn new(msg: String, span: Span) -> Self { 17 | Self { 18 | msg: msg, 19 | span: span, 20 | hint: None, 21 | } 22 | } 23 | fn with_hint_span(msg: String, span: Span, hint_msg: String, hint_span: Span) -> Self { 24 | Self { 25 | msg: msg, 26 | span: span, 27 | hint: Some((hint_msg, hint_span)), 28 | } 29 | } 30 | pub fn to_syn_error(&self) -> syn::Error { 31 | let mut error = syn::Error::new(self.span, &self.msg); 32 | if let Some((hint_msg, hint_span)) = &self.hint { 33 | error.combine(syn::Error::new(hint_span.clone(), hint_msg)); 34 | } 35 | error 36 | } 37 | } 38 | 39 | impl fmt::Display for Error { 40 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 41 | if let Some((hint_msg, hint_span)) = &self.hint { 42 | write!( 43 | f, 44 | "{} at {:?} ({} at {:?})", 45 | self.msg, 46 | self.span.start(), 47 | hint_msg, 48 | hint_span.start() 49 | ) 50 | } else { 51 | write!(f, "{} at {:?}", self.msg, self.span.start()) 52 | } 53 | } 54 | } 55 | 56 | fn check_head( 57 | head: &past::RuleHead, 58 | decls: &OrderedMap, 59 | ) -> Result<(), Error> { 60 | let decl = decls.get(&head.predicate.to_string()).ok_or_else(|| { 61 | Error::new( 62 | format!("Unknown predicate {}", head.predicate), 63 | head.predicate.span(), 64 | ) 65 | })?; 66 | if head.args.len() != decl.parameters.len() { 67 | let msg = format!( 68 | "Wrong number of arguments for {}: expected {}, found {}.", 69 | head.predicate, 70 | decl.parameters.len(), 71 | head.args.len(), 72 | ); 73 | return Err(Error::with_hint_span( 74 | msg, 75 | head.predicate.span(), 76 | format!("The predicate {} was declared here.", head.predicate), 77 | decl.name.span(), 78 | )); 79 | } 80 | Ok(()) 81 | } 82 | 83 | fn check_body( 84 | body: Vec, 85 | decls: &OrderedMap, 86 | ) -> Result, Error> { 87 | let mut new_body = Vec::new(); 88 | for literal in body { 89 | let decl = decls.get(&literal.predicate.to_string()).ok_or_else(|| { 90 | Error::new( 91 | format!("Unknown predicate {}", literal.predicate), 92 | literal.predicate.span(), 93 | ) 94 | })?; 95 | let args = match literal.args { 96 | past::ArgList::Positional(positional_args) => { 97 | if positional_args.len() != decl.parameters.len() { 98 | let msg = format!( 99 | "Wrong number of arguments for {}: expected {}, found {}.", 100 | literal.predicate, 101 | positional_args.len(), 102 | decl.parameters.len() 103 | ); 104 | return Err(Error::with_hint_span( 105 | msg, 106 | literal.predicate.span(), 107 | format!("The predicate {} was declared here.", decl.name), 108 | decl.name.span(), 109 | )); 110 | } 111 | positional_args 112 | .into_iter() 113 | .map(|arg| match arg { 114 | past::PositionalArg::Ident(ident) => ast::Arg::Ident(ident), 115 | past::PositionalArg::Wildcard => ast::Arg::Wildcard, 116 | }) 117 | .collect() 118 | } 119 | past::ArgList::Named(named_args) => { 120 | let mut kwargs = HashMap::new(); 121 | let mut used_parameters = HashSet::new(); 122 | for named_arg in named_args { 123 | let param_name = named_arg.param.to_string(); 124 | if used_parameters.contains(¶m_name) { 125 | return Err(Error::new( 126 | format!("Parameter already bound: {}", param_name), 127 | named_arg.param.span(), 128 | )); 129 | } 130 | used_parameters.insert(param_name.clone()); 131 | kwargs.insert(param_name, named_arg); 132 | } 133 | let mut args = Vec::new(); 134 | let mut available_parameters = HashSet::new(); 135 | for parameter in &decl.parameters { 136 | let param_name = parameter.name.to_string(); 137 | let arg = match kwargs.get(¶m_name) { 138 | Some(past::NamedArg { arg: ident, .. }) => { 139 | let ident_str = ident.to_string(); 140 | used_parameters.insert(ident_str); 141 | ast::Arg::Ident(ident.clone()) 142 | } 143 | None => ast::Arg::Wildcard, 144 | }; 145 | available_parameters.insert(param_name); 146 | args.push(arg); 147 | } 148 | for key in kwargs.keys() { 149 | if !available_parameters.contains(key) { 150 | let mut available_parameters: Vec<_> = 151 | available_parameters.into_iter().collect(); 152 | available_parameters.sort(); 153 | let parameter_span = kwargs[key].param.span(); 154 | return Err(Error::new( 155 | format!("Unknown parameter {} in predicate {}. Available parameters are: {}.", 156 | key, literal.predicate, available_parameters.join(","), 157 | ), 158 | parameter_span, 159 | )); 160 | } 161 | } 162 | args 163 | } 164 | }; 165 | let new_literal = ast::Literal { 166 | is_negated: literal.is_negated, 167 | predicate: literal.predicate, 168 | args: args, 169 | }; 170 | new_body.push(new_literal); 171 | } 172 | Ok(new_body) 173 | } 174 | 175 | pub(crate) fn typecheck(program: past::Program) -> Result { 176 | let mut decls = OrderedMap::new(); 177 | let mut rules = Vec::new(); 178 | 179 | for item in program.items { 180 | match item { 181 | past::ProgramItem::PredicateDecl(decl) => { 182 | decls.insert(decl.name.to_string(), decl); 183 | } 184 | past::ProgramItem::Rule(past::Rule { head, body }) => { 185 | check_head(&head, &decls)?; 186 | let body = check_body(body, &decls)?; 187 | rules.push(ast::Rule { head, body }); 188 | } 189 | } 190 | } 191 | Ok(ast::Program { 192 | decls: decls, 193 | rules: rules, 194 | }) 195 | } 196 | 197 | #[cfg(test)] 198 | mod tests { 199 | use super::*; 200 | 201 | #[test] 202 | fn typecheck_valid_datalog() { 203 | let text = r#" 204 | internal P(x: u32, y: u64) 205 | input Q(x: u32, y: u64) 206 | 207 | P(x, y) :- Q(x, y). 208 | P(x, y) :- Q(.y=y, .x=x). 209 | 210 | "#; 211 | match typecheck(crate::parser::parse(text)) { 212 | Ok(program) => { 213 | assert_eq!(program.decls.len(), 2); 214 | assert_eq!(program.rules.len(), 2); 215 | assert_eq!(program.rules[0], program.rules[1]); 216 | } 217 | Err(_) => unreachable!(), 218 | } 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /tests/cspa_rules.rs: -------------------------------------------------------------------------------- 1 | use datapond::generate_skeleton_datafrog; 2 | use pretty_assertions::assert_eq; 3 | 4 | #[test] 5 | fn generate_cspa_rules() { 6 | // Context-sensitive Points-to Analysis rules 7 | // adapted from http://pages.cs.wisc.edu/~aws/papers/vldb19.pdf p. 9 8 | 9 | let text = r#" 10 | input assign(a: u32, b: u32) 11 | input dereference(a: u32, b: u32) 12 | 13 | internal value_flow(a: u32, b: u32) 14 | internal memory_alias(a: u32, b: u32) 15 | internal value_alias(a: u32, b: u32) 16 | 17 | value_flow(y, x) :- assign(y, x). 18 | value_flow(x, y) :- memory_alias(z, y), assign(x, z). 19 | value_flow(x, y) :- value_flow(x, z), value_flow(z, y). 20 | 21 | memory_alias(x, w) :- value_alias(y, z), dereference(y, x), dereference(z, w). 22 | 23 | value_alias(x, y) :- value_flow(z, x), value_flow(z, y). 24 | value_alias(x, y) :- value_flow(z, x), memory_alias(z, w), value_flow(w, y). 25 | 26 | value_flow(x, x) :- assign(x, y). 27 | value_flow(x, x) :- assign(y, x). 28 | 29 | memory_alias(x, x) :- assign(y, x). 30 | memory_alias(x, x) :- assign(x, y). 31 | "#; 32 | 33 | let output = generate_skeleton_datafrog(text); 34 | 35 | let expected = r#" 36 | // Extensional predicates, and their indices 37 | 38 | let assign: Relation<(u32, u32)> = Vec::new().into(); 39 | 40 | // Note: `assign_b` is an indexed version of the input facts `assign` 41 | let assign_b: Relation<(u32, u32)> = Vec::new().into(); 42 | 43 | // Note: `dereference_a` is an indexed version of the input facts `dereference` 44 | let dereference_a: Relation<(u32, u32)> = Vec::new().into(); 45 | 46 | // Note: couldn't infer output relation automatically 47 | 48 | let mut iteration = Iteration::new(); 49 | 50 | // Intensional predicates, and their indices 51 | 52 | let memory_alias = iteration.variable::<(u32, u32)>("memory_alias"); 53 | 54 | // Note: `memory_alias_a` is an indexed version of the `memory_alias` relation 55 | let memory_alias_a = iteration.variable::<(u32, u32)>("memory_alias_a"); 56 | let memory_alias_step_4_1 = iteration.variable("memory_alias_step_4_1"); 57 | let value_alias = iteration.variable::<(u32, u32)>("value_alias"); 58 | 59 | // Note: `value_alias_a` is an indexed version of the `value_alias` relation 60 | let value_alias_a = iteration.variable::<(u32, u32)>("value_alias_a"); 61 | let value_alias_step_6_1 = iteration.variable("value_alias_step_6_1"); 62 | let value_flow = iteration.variable::<(u32, u32)>("value_flow"); 63 | 64 | // Note: `value_flow_a` is an indexed version of the `value_flow` relation 65 | let value_flow_a = iteration.variable::<(u32, u32)>("value_flow_a"); 66 | 67 | // Note: `value_flow_b` is an indexed version of the `value_flow` relation 68 | let value_flow_b = iteration.variable::<(u32, u32)>("value_flow_b"); 69 | 70 | // R01: value_flow(y, x) :- assign(y, x). 71 | value_flow.extend(assign.iter().clone()); 72 | 73 | // R07: value_flow(x, x) :- assign(x, y). 74 | value_flow.extend(assign.iter().map(|&(x, _y)| (x, x))); 75 | 76 | // R08: value_flow(x, x) :- assign(y, x). 77 | value_flow.extend(assign.iter().map(|&(_y, x)| (x, x))); 78 | 79 | // R09: memory_alias(x, x) :- assign(y, x). 80 | memory_alias.extend(assign.iter().map(|&(_y, x)| (x, x))); 81 | 82 | // R10: memory_alias(x, x) :- assign(x, y). 83 | memory_alias.extend(assign.iter().map(|&(x, _y)| (x, x))); 84 | 85 | while iteration.changed() { 86 | 87 | // Index maintenance 88 | value_flow_b.from_map(&value_flow, |&(a, b)| (b, a)); 89 | value_flow_a.from_map(&value_flow, |&(a, b)| (a, b)); 90 | value_alias_a.from_map(&value_alias, |&(a, b)| (a, b)); 91 | memory_alias_a.from_map(&memory_alias, |&(a, b)| (a, b)); 92 | 93 | // Rules 94 | 95 | // R01: value_flow(y, x) :- assign(y, x). 96 | // `assign` is a static input, already loaded into `value_flow`. 97 | 98 | // R02: value_flow(x, y) :- memory_alias(z, y), assign(x, z). 99 | value_flow.from_join(&memory_alias_a, &assign_b, |&_z, &y, &x| (x, y)); 100 | 101 | // R03: value_flow(x, y) :- value_flow(x, z), value_flow(z, y). 102 | value_flow.from_join(&value_flow_b, &value_flow_a, |&_z, &x, &y| (x, y)); 103 | 104 | // R04: memory_alias(x, w) :- value_alias(y, z), dereference(y, x), dereference(z, w). 105 | memory_alias_step_4_1.from_join(&value_alias_a, &dereference_a, |&_y, &z, &x| (z, x)); 106 | memory_alias.from_join(&memory_alias_step_4_1, &dereference_a, |&_z, &x, &w| (x, w)); 107 | 108 | // R05: value_alias(x, y) :- value_flow(z, x), value_flow(z, y). 109 | value_alias.from_join(&value_flow_a, &value_flow_a, |&_z, &x, &y| (x, y)); 110 | 111 | // R06: value_alias(x, y) :- value_flow(z, x), memory_alias(z, w), value_flow(w, y). 112 | value_alias_step_6_1.from_join(&value_flow_a, &memory_alias_a, |&_z, &x, &w| (w, x)); 113 | value_alias.from_join(&value_alias_step_6_1, &value_flow_a, |&_w, &x, &y| (x, y)); 114 | 115 | // R07: value_flow(x, x) :- assign(x, y). 116 | // `assign` is a static input, already loaded into `value_flow`. 117 | 118 | // R08: value_flow(x, x) :- assign(y, x). 119 | // `assign` is a static input, already loaded into `value_flow`. 120 | 121 | // R09: memory_alias(x, x) :- assign(y, x). 122 | // `assign` is a static input, already loaded into `memory_alias`. 123 | 124 | // R10: memory_alias(x, x) :- assign(x, y). 125 | // `assign` is a static input, already loaded into `memory_alias`. 126 | } 127 | "#; 128 | println!("{}", output); 129 | assert_eq!(expected, output); 130 | } 131 | 132 | #[allow(dead_code, unused_variables)] 133 | fn ensure_generated_rules_build() { 134 | // shim to bring in datafrog so that the generated skeleton can build. 135 | use datafrog::{Iteration, Relation}; 136 | 137 | // ----- output from the skeleton generator follows below (+ manual comments) ----- 138 | 139 | // Extensional predicates, and their indices 140 | 141 | let assign: Relation<(u32, u32)> = Vec::new().into(); 142 | 143 | // Note: `assign_b` is an indexed version of the input facts `assign` 144 | let assign_b: Relation<(u32, u32)> = Vec::new().into(); 145 | 146 | // Note: `dereference_a` is an indexed version of the input facts `dereference` 147 | let dereference_a: Relation<(u32, u32)> = Vec::new().into(); 148 | 149 | // Note: couldn't infer output relation automatically 150 | 151 | let mut iteration = Iteration::new(); 152 | 153 | // Intensional predicates, and their indices 154 | 155 | let memory_alias = iteration.variable::<(u32, u32)>("memory_alias"); 156 | 157 | // Note: `memory_alias_a` is an indexed version of the `memory_alias` relation 158 | let memory_alias_a = iteration.variable::<(u32, u32)>("memory_alias_a"); 159 | let memory_alias_step_4_1 = iteration.variable("memory_alias_step_4_1"); 160 | let value_alias = iteration.variable::<(u32, u32)>("value_alias"); 161 | 162 | // Note: `value_alias_a` is an indexed version of the `value_alias` relation 163 | let value_alias_a = iteration.variable::<(u32, u32)>("value_alias_a"); 164 | let value_alias_step_6_1 = iteration.variable("value_alias_step_6_1"); 165 | let value_flow = iteration.variable::<(u32, u32)>("value_flow"); 166 | 167 | // Note: `value_flow_a` is an indexed version of the `value_flow` relation 168 | let value_flow_a = iteration.variable::<(u32, u32)>("value_flow_a"); 169 | 170 | // Note: `value_flow_b` is an indexed version of the `value_flow` relation 171 | let value_flow_b = iteration.variable::<(u32, u32)>("value_flow_b"); 172 | 173 | // R01: value_flow(y, x) :- assign(y, x). 174 | value_flow.extend(assign.iter().clone()); 175 | 176 | // R07: value_flow(x, x) :- assign(x, y). 177 | value_flow.extend(assign.iter().map(|&(x, _y)| (x, x))); 178 | 179 | // R08: value_flow(x, x) :- assign(y, x). 180 | value_flow.extend(assign.iter().map(|&(_y, x)| (x, x))); 181 | 182 | // R09: memory_alias(x, x) :- assign(y, x). 183 | memory_alias.extend(assign.iter().map(|&(_y, x)| (x, x))); 184 | 185 | // R10: memory_alias(x, x) :- assign(x, y). 186 | memory_alias.extend(assign.iter().map(|&(x, _y)| (x, x))); 187 | 188 | while iteration.changed() { 189 | // Index maintenance 190 | value_flow_b.from_map(&value_flow, |&(a, b)| (b, a)); 191 | value_flow_a.from_map(&value_flow, |&(a, b)| (a, b)); // useless index 192 | value_alias_a.from_map(&value_alias, |&(a, b)| (a, b)); // useless index 193 | memory_alias_a.from_map(&memory_alias, |&(a, b)| (a, b)); // useless index 194 | 195 | // Rules 196 | 197 | // R01: value_flow(y, x) :- assign(y, x). 198 | // `assign` is a static input, already loaded into `value_flow`. 199 | 200 | // R02: value_flow(x, y) :- memory_alias(z, y), assign(x, z). 201 | value_flow.from_join(&memory_alias_a, &assign_b, |&_z, &y, &x| (x, y)); 202 | 203 | // R03: value_flow(x, y) :- value_flow(x, z), value_flow(z, y). 204 | value_flow.from_join(&value_flow_b, &value_flow_a, |&_z, &x, &y| (x, y)); 205 | 206 | // R04: memory_alias(x, w) :- value_alias(y, z), dereference(y, x), dereference(z, w). 207 | memory_alias_step_4_1.from_join(&value_alias_a, &dereference_a, |&_y, &z, &x| (z, x)); 208 | memory_alias.from_join(&memory_alias_step_4_1, &dereference_a, |&_z, &x, &w| (x, w)); 209 | 210 | // R05: value_alias(x, y) :- value_flow(z, x), value_flow(z, y). 211 | value_alias.from_join(&value_flow_a, &value_flow_a, |&_z, &x, &y| (x, y)); 212 | 213 | // R06: value_alias(x, y) :- value_flow(z, x), memory_alias(z, w), value_flow(w, y). 214 | value_alias_step_6_1.from_join(&value_flow_a, &memory_alias_a, |&_z, &x, &w| (w, x)); 215 | value_alias.from_join(&value_alias_step_6_1, &value_flow_a, |&_w, &x, &y| (x, y)); 216 | 217 | // R07: value_flow(x, x) :- assign(x, y). 218 | // `assign` is a static input, already loaded into `value_flow`. 219 | 220 | // R08: value_flow(x, x) :- assign(y, x). 221 | // `assign` is a static input, already loaded into `value_flow`. 222 | 223 | // R09: memory_alias(x, x) :- assign(y, x). 224 | // `assign` is a static input, already loaded into `memory_alias`. 225 | 226 | // R10: memory_alias(x, x) :- assign(x, y). 227 | // `assign` is a static input, already loaded into `memory_alias`. 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /tests/flow_sensitive_equality_rules.rs: -------------------------------------------------------------------------------- 1 | use datapond::generate_skeleton_datafrog; 2 | use pretty_assertions::assert_eq; 3 | 4 | #[test] 5 | fn generate_flow_sensitive_equality_rules() { 6 | let text = r#" 7 | input borrow_region(O: Origin, L: Loan, P: Point) 8 | input cfg_edge(P: Point, Q: Point) 9 | input killed(L: Loan, P: Point) 10 | input outlives(O1: Origin, O2: Origin, P: Point) 11 | input region_live_at(O: Origin, P: Point) 12 | input invalidates(L: Loan, P: Point) 13 | internal subset(O1: Origin, O2: Origin, P: Point) 14 | internal requires(O: Origin, L: Loan, P: Point) 15 | internal borrow_live_at(L: Loan, P: Point) 16 | internal equals(O1: Origin, O2: Origin, P: Point) 17 | output errors(L: Loan, P: Point) 18 | 19 | // R1 20 | subset(O1, O2, P) :- 21 | outlives(O1, O2, P). 22 | 23 | // R2 24 | subset(O1, O3, P) :- 25 | subset(O1, O2, P), 26 | outlives(O2, O3, P). 27 | 28 | // R3 29 | equals(O1, O2, P) :- 30 | subset(O1, O2, P), 31 | subset(O2, O1, P). 32 | 33 | // R4 34 | equals(O1, O3, P) :- 35 | equals(O1, O2, P), 36 | equals(O2, O3, P). 37 | 38 | // R5 39 | equals(O1, O2, Q) :- 40 | equals(O1, O2, P), 41 | cfg_edge(P, Q), 42 | region_live_at(O1, Q), 43 | region_live_at(O2, Q). 44 | 45 | // R6 46 | requires(O2, L, P) :- 47 | requires(O1, L, P), 48 | equals(O1, O2, P). 49 | 50 | // R7 51 | requires(O, L, P) :- 52 | borrow_region(O, L, P). 53 | 54 | // R8 55 | requires(O2, L, P) :- 56 | requires(O1, L, P), 57 | subset(O1, O2, P). 58 | 59 | // R9 60 | requires(O, L, Q) :- 61 | requires(O, L, P), 62 | !killed(L, P), 63 | cfg_edge(P, Q), 64 | region_live_at(O, Q). 65 | 66 | // R10 67 | borrow_live_at(L, P) :- 68 | requires(O, L, P), 69 | region_live_at(O, P). 70 | 71 | // R11 72 | errors(L, P) :- 73 | borrow_live_at(L, P), 74 | invalidates(L, P). 75 | "#; 76 | 77 | let output = generate_skeleton_datafrog(text); 78 | 79 | let expected = r#" 80 | // Extensional predicates, and their indices 81 | 82 | let borrow_region: Relation<(Origin, Loan, Point)> = Vec::new().into(); 83 | 84 | // Note: `cfg_edge_p` is an indexed version of the input facts `cfg_edge` 85 | let cfg_edge_p: Relation<(Point, Point)> = Vec::new().into(); 86 | 87 | let invalidates: Relation<((Loan, Point), ())> = Vec::new().into(); 88 | let killed: Relation<(Loan, Point)> = Vec::new().into(); 89 | let outlives: Relation<(Origin, Origin, Point)> = Vec::new().into(); 90 | 91 | // Note: `outlives_o1p` is an indexed version of the input facts `outlives` 92 | let outlives_o1p: Relation<((Origin, Point), Origin)> = Vec::new().into(); 93 | 94 | let region_live_at: Relation<((Origin, Point), ())> = Vec::new().into(); 95 | 96 | // `errors` inferred as the output relation 97 | let errors = { 98 | 99 | let mut iteration = Iteration::new(); 100 | 101 | // Intensional predicates, and their indices 102 | 103 | let borrow_live_at = iteration.variable::<((Loan, Point), ())>("borrow_live_at"); 104 | let equals = iteration.variable::<(Origin, Origin, Point)>("equals"); 105 | 106 | // Note: `equals_o1p` is an indexed version of the `equals` relation 107 | let equals_o1p = iteration.variable::<((Origin, Point), Origin)>("equals_o1p"); 108 | 109 | // Note: `equals_o2p` is an indexed version of the `equals` relation 110 | let equals_o2p = iteration.variable::<((Origin, Point), Origin)>("equals_o2p"); 111 | 112 | // Note: `equals_p` is an indexed version of the `equals` relation 113 | let equals_p = iteration.variable::<(Point, (Origin, Origin))>("equals_p"); 114 | let equals_step_5_1 = iteration.variable("equals_step_5_1"); 115 | let equals_step_5_2 = iteration.variable("equals_step_5_2"); 116 | let errors = iteration.variable::<(Loan, Point)>("errors"); 117 | let requires = iteration.variable::<(Origin, Loan, Point)>("requires"); 118 | 119 | // Note: `requires_lp` is an indexed version of the `requires` relation 120 | let requires_lp = iteration.variable::<((Loan, Point), Origin)>("requires_lp"); 121 | 122 | // Note: `requires_op` is an indexed version of the `requires` relation 123 | let requires_op = iteration.variable::<((Origin, Point), Loan)>("requires_op"); 124 | let requires_step_9_1 = iteration.variable("requires_step_9_1"); 125 | let requires_step_9_2 = iteration.variable("requires_step_9_2"); 126 | let subset = iteration.variable::<((Origin, Origin, Point), ())>("subset"); 127 | 128 | // Note: `subset_o1p` is an indexed version of the `subset` relation 129 | let subset_o1p = iteration.variable::<((Origin, Point), Origin)>("subset_o1p"); 130 | 131 | // Note: `subset_o2o1p` is an indexed version of the `subset` relation 132 | let subset_o2o1p = iteration.variable::<((Origin, Origin, Point), ())>("subset_o2o1p"); 133 | 134 | // Note: `subset_o2p` is an indexed version of the `subset` relation 135 | let subset_o2p = iteration.variable::<((Origin, Point), Origin)>("subset_o2p"); 136 | 137 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 138 | subset.extend(outlives.iter().map(|&tuple| (tuple, ()))); 139 | 140 | // R07: requires(O, L, P) :- borrow_region(O, L, P). 141 | requires.extend(borrow_region.iter().clone()); 142 | 143 | while iteration.changed() { 144 | 145 | // Index maintenance 146 | equals_p.from_map(&equals, |&(o1, o2, p)| (p, (o1, o2))); 147 | equals_o1p.from_map(&equals, |&(o1, o2, p)| ((o1, p), o2)); 148 | subset_o1p.from_map(&subset, |&((o1, o2, p), _)| ((o1, p), o2)); 149 | equals_o2p.from_map(&equals, |&(o1, o2, p)| ((o2, p), o1)); 150 | subset_o2o1p.from_map(&subset, |&((o1, o2, p), _)| ((o2, o1, p), ())); 151 | requires_lp.from_map(&requires, |&(o, l, p)| ((l, p), o)); 152 | subset_o2p.from_map(&subset, |&((o1, o2, p), _)| ((o2, p), o1)); 153 | requires_op.from_map(&requires, |&(o, l, p)| ((o, p), l)); 154 | 155 | // Rules 156 | 157 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 158 | // `outlives` is a static input, already loaded into `subset`. 159 | 160 | // R02: subset(O1, O3, P) :- subset(O1, O2, P), outlives(O2, O3, P). 161 | subset.from_join(&subset_o2p, &outlives_o1p, |&(_o2, p), &o1, &o3| ((o1, o3, p), ())); 162 | 163 | // R03: equals(O1, O2, P) :- subset(O1, O2, P), subset(O2, O1, P). 164 | equals.from_join(&subset, &subset_o2o1p, |&(o1, o2, p), _, _| (o1, o2, p)); 165 | 166 | // R04: equals(O1, O3, P) :- equals(O1, O2, P), equals(O2, O3, P). 167 | equals.from_join(&equals_o2p, &equals_o1p, |&(_o2, p), &o1, &o3| (o1, o3, p)); 168 | 169 | // R05: equals(O1, O2, Q) :- equals(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 170 | equals_step_5_1.from_join(&equals_p, &cfg_edge_p, |&_p, &(o1, o2), &q| ((o1, q), o2)); 171 | equals_step_5_2.from_join(&equals_step_5_1, ®ion_live_at, |&(o1, q), &o2, _| ((o2, q), o1)); 172 | equals.from_join(&equals_step_5_2, ®ion_live_at, |&(o2, q), &o1, _| (o1, o2, q)); 173 | 174 | // R06: requires(O2, L, P) :- requires(O1, L, P), equals(O1, O2, P). 175 | requires.from_join(&requires_op, &equals_o1p, |&(_o1, p), &l, &o2| (o2, l, p)); 176 | 177 | // R07: requires(O, L, P) :- borrow_region(O, L, P). 178 | // `borrow_region` is a static input, already loaded into `requires`. 179 | 180 | // R08: requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 181 | requires.from_join(&requires_op, &subset_o1p, |&(_o1, p), &l, &o2| (o2, l, p)); 182 | 183 | // R09: requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 184 | requires_step_9_1.from_antijoin(&requires_lp, &killed, |&(l, p), &o| (p, (l, o))); 185 | requires_step_9_2.from_join(&requires_step_9_1, &cfg_edge_p, |&_p, &(l, o), &q| ((o, q), l)); 186 | requires.from_join(&requires_step_9_2, ®ion_live_at, |&(o, q), &l, _| (o, l, q)); 187 | 188 | // R10: borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 189 | borrow_live_at.from_join(&requires_op, ®ion_live_at, |&(_o, p), &l, _| ((l, p), ())); 190 | 191 | // R11: errors(L, P) :- borrow_live_at(L, P), invalidates(L, P). 192 | errors.from_join(&borrow_live_at, &invalidates, |&(l, p), _, _| (l, p)); 193 | } 194 | 195 | errors.complete() 196 | }; 197 | "#; 198 | println!("{}", output); 199 | assert_eq!(expected, output); 200 | } 201 | 202 | // Copied from the above output test, to ensure what is generated at least builds, even 203 | // if it cannot run due to having no data. The generic types are to bring in the ones 204 | // defined in the declarations. 205 | // TODO: add a build and generate step, and include the result in this function 206 | #[allow(dead_code, unused_variables)] 207 | fn ensure_generated_rules_build() 208 | where 209 | Origin: Ord + Copy + 'static, 210 | Loan: Ord + Copy + 'static, 211 | Point: Ord + Copy + 'static, 212 | { 213 | // shim to bring in datafrog so that the generated skeleton can build. 214 | use datafrog::{Iteration, Relation}; 215 | 216 | // ----- output from the skeleton generator follows below ----- 217 | 218 | // Extensional predicates, and their indices 219 | 220 | let borrow_region: Relation<(Origin, Loan, Point)> = Vec::new().into(); 221 | 222 | // Note: `cfg_edge_p` is an indexed version of the input facts `cfg_edge` 223 | let cfg_edge_p: Relation<(Point, Point)> = Vec::new().into(); 224 | 225 | let invalidates: Relation<((Loan, Point), ())> = Vec::new().into(); 226 | let killed: Relation<(Loan, Point)> = Vec::new().into(); 227 | let outlives: Relation<(Origin, Origin, Point)> = Vec::new().into(); 228 | 229 | // Note: `outlives_o1p` is an indexed version of the input facts `outlives` 230 | let outlives_o1p: Relation<((Origin, Point), Origin)> = Vec::new().into(); 231 | 232 | let region_live_at: Relation<((Origin, Point), ())> = Vec::new().into(); 233 | 234 | // `errors` inferred as the output relation 235 | let errors = { 236 | let mut iteration = Iteration::new(); 237 | 238 | // Intensional predicates, and their indices 239 | 240 | let borrow_live_at = iteration.variable::<((Loan, Point), ())>("borrow_live_at"); 241 | let equals = iteration.variable::<(Origin, Origin, Point)>("equals"); 242 | 243 | // Note: `equals_o1p` is an indexed version of the `equals` relation 244 | let equals_o1p = iteration.variable::<((Origin, Point), Origin)>("equals_o1p"); 245 | 246 | // Note: `equals_o2p` is an indexed version of the `equals` relation 247 | let equals_o2p = iteration.variable::<((Origin, Point), Origin)>("equals_o2p"); 248 | 249 | // Note: `equals_p` is an indexed version of the `equals` relation 250 | let equals_p = iteration.variable::<(Point, (Origin, Origin))>("equals_p"); 251 | let equals_step_5_1 = iteration.variable("equals_step_5_1"); 252 | let equals_step_5_2 = iteration.variable("equals_step_5_2"); 253 | let errors = iteration.variable::<(Loan, Point)>("errors"); 254 | let requires = iteration.variable::<(Origin, Loan, Point)>("requires"); 255 | 256 | // Note: `requires_lp` is an indexed version of the `requires` relation 257 | let requires_lp = iteration.variable::<((Loan, Point), Origin)>("requires_lp"); 258 | 259 | // Note: `requires_op` is an indexed version of the `requires` relation 260 | let requires_op = iteration.variable::<((Origin, Point), Loan)>("requires_op"); 261 | let requires_step_9_1 = iteration.variable("requires_step_9_1"); 262 | let requires_step_9_2 = iteration.variable("requires_step_9_2"); 263 | let subset = iteration.variable::<((Origin, Origin, Point), ())>("subset"); 264 | 265 | // Note: `subset_o1p` is an indexed version of the `subset` relation 266 | let subset_o1p = iteration.variable::<((Origin, Point), Origin)>("subset_o1p"); 267 | 268 | // Note: `subset_o2o1p` is an indexed version of the `subset` relation 269 | let subset_o2o1p = iteration.variable::<((Origin, Origin, Point), ())>("subset_o2o1p"); 270 | 271 | // Note: `subset_o2p` is an indexed version of the `subset` relation 272 | let subset_o2p = iteration.variable::<((Origin, Point), Origin)>("subset_o2p"); 273 | 274 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 275 | subset.extend(outlives.iter().map(|&tuple| (tuple, ()))); 276 | 277 | // R07: requires(O, L, P) :- borrow_region(O, L, P). 278 | requires.extend(borrow_region.iter().clone()); 279 | 280 | while iteration.changed() { 281 | // Index maintenance 282 | equals_p.from_map(&equals, |&(o1, o2, p)| (p, (o1, o2))); 283 | equals_o1p.from_map(&equals, |&(o1, o2, p)| ((o1, p), o2)); 284 | subset_o1p.from_map(&subset, |&((o1, o2, p), _)| ((o1, p), o2)); 285 | equals_o2p.from_map(&equals, |&(o1, o2, p)| ((o2, p), o1)); 286 | subset_o2o1p.from_map(&subset, |&((o1, o2, p), _)| ((o2, o1, p), ())); 287 | requires_lp.from_map(&requires, |&(o, l, p)| ((l, p), o)); 288 | subset_o2p.from_map(&subset, |&((o1, o2, p), _)| ((o2, p), o1)); 289 | requires_op.from_map(&requires, |&(o, l, p)| ((o, p), l)); 290 | 291 | // Rules 292 | 293 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 294 | // `outlives` is a static input, already loaded into `subset`. 295 | 296 | // R02: subset(O1, O3, P) :- subset(O1, O2, P), outlives(O2, O3, P). 297 | subset.from_join(&subset_o2p, &outlives_o1p, |&(_o2, p), &o1, &o3| { 298 | ((o1, o3, p), ()) 299 | }); 300 | 301 | // R03: equals(O1, O2, P) :- subset(O1, O2, P), subset(O2, O1, P). 302 | equals.from_join(&subset, &subset_o2o1p, |&(o1, o2, p), _, _| (o1, o2, p)); 303 | 304 | // R04: equals(O1, O3, P) :- equals(O1, O2, P), equals(O2, O3, P). 305 | equals.from_join(&equals_o2p, &equals_o1p, |&(_o2, p), &o1, &o3| (o1, o3, p)); 306 | 307 | // R05: equals(O1, O2, Q) :- equals(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 308 | equals_step_5_1.from_join(&equals_p, &cfg_edge_p, |&_p, &(o1, o2), &q| ((o1, q), o2)); 309 | equals_step_5_2.from_join(&equals_step_5_1, ®ion_live_at, |&(o1, q), &o2, _| { 310 | ((o2, q), o1) 311 | }); 312 | equals.from_join(&equals_step_5_2, ®ion_live_at, |&(o2, q), &o1, _| { 313 | (o1, o2, q) 314 | }); 315 | 316 | // R06: requires(O2, L, P) :- requires(O1, L, P), equals(O1, O2, P). 317 | requires.from_join(&requires_op, &equals_o1p, |&(_o1, p), &l, &o2| (o2, l, p)); 318 | 319 | // R07: requires(O, L, P) :- borrow_region(O, L, P). 320 | // `borrow_region` is a static input, already loaded into `requires`. 321 | 322 | // R08: requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 323 | requires.from_join(&requires_op, &subset_o1p, |&(_o1, p), &l, &o2| (o2, l, p)); 324 | 325 | // R09: requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 326 | requires_step_9_1.from_antijoin(&requires_lp, &killed, |&(l, p), &o| (p, (l, o))); 327 | requires_step_9_2.from_join(&requires_step_9_1, &cfg_edge_p, |&_p, &(l, o), &q| { 328 | ((o, q), l) 329 | }); 330 | requires.from_join(&requires_step_9_2, ®ion_live_at, |&(o, q), &l, _| { 331 | (o, l, q) 332 | }); 333 | 334 | // R10: borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 335 | borrow_live_at.from_join(&requires_op, ®ion_live_at, |&(_o, p), &l, _| { 336 | ((l, p), ()) 337 | }); 338 | 339 | // R11: errors(L, P) :- borrow_live_at(L, P), invalidates(L, P). 340 | errors.from_join(&borrow_live_at, &invalidates, |&(l, p), _, _| (l, p)); 341 | } 342 | 343 | errors.complete() 344 | }; 345 | } 346 | -------------------------------------------------------------------------------- /tests/naive_rules.rs: -------------------------------------------------------------------------------- 1 | use datapond::generate_skeleton_datafrog; 2 | use pretty_assertions::assert_eq; 3 | 4 | #[test] 5 | fn generate_naive_rules() { 6 | let text = r#" 7 | input borrow_region(O: Origin, L: Loan, P: Point) 8 | input cfg_edge(P: Point, Q: Point) 9 | input killed(L: Loan, P: Point) 10 | input outlives(O1: Origin, O2: Origin, P: Point) 11 | input region_live_at(O: Origin, P: Point) 12 | input invalidates(L: Loan, P: Point) 13 | internal subset(O1: Origin, O2: Origin, P: Point) 14 | internal requires(O: Origin, L: Loan, P: Point) 15 | internal borrow_live_at(L: Loan, P: Point) 16 | output errors(L: Loan, P: Point) 17 | 18 | subset(O1, O2, P) :- outlives(O1, O2, P). 19 | subset(O1, O3, P) :- subset(O1, O2, P), subset(O2, O3, P). 20 | subset(O1, O2, Q) :- subset(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 21 | requires(O, L, P) :- borrow_region(O, L, P). 22 | requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 23 | requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 24 | borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 25 | errors(L, P) :- borrow_live_at(L, P), invalidates(L, P). 26 | "#; 27 | 28 | let output = generate_skeleton_datafrog(text); 29 | 30 | let expected = r#" 31 | // Extensional predicates, and their indices 32 | 33 | let borrow_region: Relation<(Origin, Loan, Point)> = Vec::new().into(); 34 | 35 | // Note: `cfg_edge_p` is an indexed version of the input facts `cfg_edge` 36 | let cfg_edge_p: Relation<(Point, Point)> = Vec::new().into(); 37 | 38 | let invalidates: Relation<((Loan, Point), ())> = Vec::new().into(); 39 | let killed: Relation<(Loan, Point)> = Vec::new().into(); 40 | let outlives: Relation<(Origin, Origin, Point)> = Vec::new().into(); 41 | let region_live_at: Relation<((Origin, Point), ())> = Vec::new().into(); 42 | 43 | // `errors` inferred as the output relation 44 | let errors = { 45 | 46 | let mut iteration = Iteration::new(); 47 | 48 | // Intensional predicates, and their indices 49 | 50 | let borrow_live_at = iteration.variable::<((Loan, Point), ())>("borrow_live_at"); 51 | let errors = iteration.variable::<(Loan, Point)>("errors"); 52 | let requires = iteration.variable::<(Origin, Loan, Point)>("requires"); 53 | 54 | // Note: `requires_lp` is an indexed version of the `requires` relation 55 | let requires_lp = iteration.variable::<((Loan, Point), Origin)>("requires_lp"); 56 | 57 | // Note: `requires_op` is an indexed version of the `requires` relation 58 | let requires_op = iteration.variable::<((Origin, Point), Loan)>("requires_op"); 59 | let requires_step_6_1 = iteration.variable("requires_step_6_1"); 60 | let requires_step_6_2 = iteration.variable("requires_step_6_2"); 61 | let subset = iteration.variable::<(Origin, Origin, Point)>("subset"); 62 | 63 | // Note: `subset_o1p` is an indexed version of the `subset` relation 64 | let subset_o1p = iteration.variable::<((Origin, Point), Origin)>("subset_o1p"); 65 | 66 | // Note: `subset_o2p` is an indexed version of the `subset` relation 67 | let subset_o2p = iteration.variable::<((Origin, Point), Origin)>("subset_o2p"); 68 | 69 | // Note: `subset_p` is an indexed version of the `subset` relation 70 | let subset_p = iteration.variable::<(Point, (Origin, Origin))>("subset_p"); 71 | let subset_step_3_1 = iteration.variable("subset_step_3_1"); 72 | let subset_step_3_2 = iteration.variable("subset_step_3_2"); 73 | 74 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 75 | subset.extend(outlives.iter().clone()); 76 | 77 | // R04: requires(O, L, P) :- borrow_region(O, L, P). 78 | requires.extend(borrow_region.iter().clone()); 79 | 80 | while iteration.changed() { 81 | 82 | // Index maintenance 83 | requires_op.from_map(&requires, |&(o, l, p)| ((o, p), l)); 84 | requires_lp.from_map(&requires, |&(o, l, p)| ((l, p), o)); 85 | subset_o2p.from_map(&subset, |&(o1, o2, p)| ((o2, p), o1)); 86 | subset_o1p.from_map(&subset, |&(o1, o2, p)| ((o1, p), o2)); 87 | subset_p.from_map(&subset, |&(o1, o2, p)| (p, (o1, o2))); 88 | 89 | // Rules 90 | 91 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 92 | // `outlives` is a static input, already loaded into `subset`. 93 | 94 | // R02: subset(O1, O3, P) :- subset(O1, O2, P), subset(O2, O3, P). 95 | subset.from_join(&subset_o2p, &subset_o1p, |&(_o2, p), &o1, &o3| (o1, o3, p)); 96 | 97 | // R03: subset(O1, O2, Q) :- subset(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 98 | subset_step_3_1.from_join(&subset_p, &cfg_edge_p, |&_p, &(o1, o2), &q| ((o1, q), o2)); 99 | subset_step_3_2.from_join(&subset_step_3_1, ®ion_live_at, |&(o1, q), &o2, _| ((o2, q), o1)); 100 | subset.from_join(&subset_step_3_2, ®ion_live_at, |&(o2, q), &o1, _| (o1, o2, q)); 101 | 102 | // R04: requires(O, L, P) :- borrow_region(O, L, P). 103 | // `borrow_region` is a static input, already loaded into `requires`. 104 | 105 | // R05: requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 106 | requires.from_join(&requires_op, &subset_o1p, |&(_o1, p), &l, &o2| (o2, l, p)); 107 | 108 | // R06: requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 109 | requires_step_6_1.from_antijoin(&requires_lp, &killed, |&(l, p), &o| (p, (l, o))); 110 | requires_step_6_2.from_join(&requires_step_6_1, &cfg_edge_p, |&_p, &(l, o), &q| ((o, q), l)); 111 | requires.from_join(&requires_step_6_2, ®ion_live_at, |&(o, q), &l, _| (o, l, q)); 112 | 113 | // R07: borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 114 | borrow_live_at.from_join(&requires_op, ®ion_live_at, |&(_o, p), &l, _| ((l, p), ())); 115 | 116 | // R08: errors(L, P) :- borrow_live_at(L, P), invalidates(L, P). 117 | errors.from_join(&borrow_live_at, &invalidates, |&(l, p), _, _| (l, p)); 118 | } 119 | 120 | errors.complete() 121 | }; 122 | "#; 123 | println!("{}", output); 124 | assert_eq!(expected, output); 125 | } 126 | 127 | // Copied from the above output test, to ensure what is generated at least builds, even 128 | // if it cannot run due to having no data. The generic types are to bring in the ones 129 | // defined in the declarations. 130 | // TODO: add a build and generate step, and include the result in this function 131 | #[allow(dead_code, unused_variables)] 132 | fn ensure_generated_rules_build() 133 | where 134 | Origin: Ord + Copy + 'static, 135 | Loan: Ord + Copy + 'static, 136 | Point: Ord + Copy + 'static, 137 | { 138 | // shim to bring in datafrog so that the generated skeleton can build. 139 | use datafrog::{Iteration, Relation}; 140 | 141 | // ----- output from the skeleton generator follows below ----- 142 | 143 | // Extensional predicates, and their indices 144 | 145 | let borrow_region: Relation<(Origin, Loan, Point)> = Vec::new().into(); 146 | 147 | // Note: `cfg_edge_p` is an indexed version of the input facts `cfg_edge` 148 | let cfg_edge_p: Relation<(Point, Point)> = Vec::new().into(); 149 | 150 | let invalidates: Relation<((Loan, Point), ())> = Vec::new().into(); 151 | let killed: Relation<(Loan, Point)> = Vec::new().into(); 152 | let outlives: Relation<(Origin, Origin, Point)> = Vec::new().into(); 153 | let region_live_at: Relation<((Origin, Point), ())> = Vec::new().into(); 154 | 155 | // `errors` inferred as the output relation 156 | let errors = { 157 | let mut iteration = Iteration::new(); 158 | 159 | // Intensional predicates, and their indices 160 | 161 | let borrow_live_at = iteration.variable::<((Loan, Point), ())>("borrow_live_at"); 162 | let errors = iteration.variable::<(Loan, Point)>("errors"); 163 | let requires = iteration.variable::<(Origin, Loan, Point)>("requires"); 164 | 165 | // Note: `requires_lp` is an indexed version of the `requires` relation 166 | let requires_lp = iteration.variable::<((Loan, Point), Origin)>("requires_lp"); 167 | 168 | // Note: `requires_op` is an indexed version of the `requires` relation 169 | let requires_op = iteration.variable::<((Origin, Point), Loan)>("requires_op"); 170 | let requires_step_6_1 = iteration.variable("requires_step_6_1"); 171 | let requires_step_6_2 = iteration.variable("requires_step_6_2"); 172 | let subset = iteration.variable::<(Origin, Origin, Point)>("subset"); 173 | 174 | // Note: `subset_o1p` is an indexed version of the `subset` relation 175 | let subset_o1p = iteration.variable::<((Origin, Point), Origin)>("subset_o1p"); 176 | 177 | // Note: `subset_o2p` is an indexed version of the `subset` relation 178 | let subset_o2p = iteration.variable::<((Origin, Point), Origin)>("subset_o2p"); 179 | 180 | // Note: `subset_p` is an indexed version of the `subset` relation 181 | let subset_p = iteration.variable::<(Point, (Origin, Origin))>("subset_p"); 182 | let subset_step_3_1 = iteration.variable("subset_step_3_1"); 183 | let subset_step_3_2 = iteration.variable("subset_step_3_2"); 184 | 185 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 186 | subset.extend(outlives.iter().clone()); 187 | 188 | // R04: requires(O, L, P) :- borrow_region(O, L, P). 189 | requires.extend(borrow_region.iter().clone()); 190 | 191 | while iteration.changed() { 192 | // Index maintenance 193 | requires_op.from_map(&requires, |&(o, l, p)| ((o, p), l)); 194 | requires_lp.from_map(&requires, |&(o, l, p)| ((l, p), o)); 195 | subset_o2p.from_map(&subset, |&(o1, o2, p)| ((o2, p), o1)); 196 | subset_o1p.from_map(&subset, |&(o1, o2, p)| ((o1, p), o2)); 197 | subset_p.from_map(&subset, |&(o1, o2, p)| (p, (o1, o2))); 198 | 199 | // Rules 200 | 201 | // R01: subset(O1, O2, P) :- outlives(O1, O2, P). 202 | // `outlives` is a static input, already loaded into `subset`. 203 | 204 | // R02: subset(O1, O3, P) :- subset(O1, O2, P), subset(O2, O3, P). 205 | subset.from_join(&subset_o2p, &subset_o1p, |&(_o2, p), &o1, &o3| (o1, o3, p)); 206 | 207 | // R03: subset(O1, O2, Q) :- subset(O1, O2, P), cfg_edge(P, Q), region_live_at(O1, Q), region_live_at(O2, Q). 208 | subset_step_3_1.from_join(&subset_p, &cfg_edge_p, |&_p, &(o1, o2), &q| ((o1, q), o2)); 209 | subset_step_3_2.from_join(&subset_step_3_1, ®ion_live_at, |&(o1, q), &o2, _| { 210 | ((o2, q), o1) 211 | }); 212 | subset.from_join(&subset_step_3_2, ®ion_live_at, |&(o2, q), &o1, _| { 213 | (o1, o2, q) 214 | }); 215 | 216 | // R04: requires(O, L, P) :- borrow_region(O, L, P). 217 | // `borrow_region` is a static input, already loaded into `requires`. 218 | 219 | // R05: requires(O2, L, P) :- requires(O1, L, P), subset(O1, O2, P). 220 | requires.from_join(&requires_op, &subset_o1p, |&(_o1, p), &l, &o2| (o2, l, p)); 221 | 222 | // R06: requires(O, L, Q) :- requires(O, L, P), !killed(L, P), cfg_edge(P, Q), region_live_at(O, Q). 223 | requires_step_6_1.from_antijoin(&requires_lp, &killed, |&(l, p), &o| (p, (l, o))); 224 | requires_step_6_2.from_join(&requires_step_6_1, &cfg_edge_p, |&_p, &(l, o), &q| { 225 | ((o, q), l) 226 | }); 227 | requires.from_join(&requires_step_6_2, ®ion_live_at, |&(o, q), &l, _| { 228 | (o, l, q) 229 | }); 230 | 231 | // R07: borrow_live_at(L, P) :- requires(O, L, P), region_live_at(O, P). 232 | borrow_live_at.from_join(&requires_op, ®ion_live_at, |&(_o, p), &l, _| { 233 | ((l, p), ()) 234 | }); 235 | 236 | // R08: errors(L, P) :- borrow_live_at(L, P), invalidates(L, P). 237 | errors.from_join(&borrow_live_at, &invalidates, |&(l, p), _, _| (l, p)); 238 | } 239 | 240 | errors.complete() 241 | }; 242 | } 243 | --------------------------------------------------------------------------------