├── .github └── FUNDING.yml ├── .gitignore ├── .rust-toolchain ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── Resources.md ├── build.rs ├── crates └── sruth-derive │ ├── Cargo.toml │ └── src │ └── lib.rs ├── examples └── brainfuck │ ├── main.rs │ ├── parse.rs │ └── programs.rs └── src ├── builder ├── block.rs ├── context.rs ├── error.rs ├── function.rs └── mod.rs ├── dataflow ├── algorithms │ ├── mod.rs │ └── reachable.rs ├── input_manager.rs ├── mod.rs ├── operators │ ├── apply_seq.rs │ ├── arrange.rs │ ├── bounded_loop.rs │ ├── buffered_flat_map.rs │ ├── cleanup.rs │ ├── collect.rs │ ├── count_ext.rs │ ├── discriminated_idents.rs │ ├── distinct.rs │ ├── event_utils.rs │ ├── exchange.rs │ ├── filter_diff.rs │ ├── filter_map.rs │ ├── flatten.rs │ ├── inspect.rs │ ├── join.rs │ ├── keys.rs │ ├── map.rs │ ├── max.rs │ ├── min.rs │ ├── mod.rs │ ├── partition.rs │ ├── reverse.rs │ ├── split.rs │ ├── threshold.rs │ └── union_find.rs ├── program.rs ├── trace_manager.rs └── translate.rs ├── equisat ├── mod.rs ├── sexpr.lalrpop └── sexpr.rs ├── lib.rs ├── optimize ├── constant_folding │ ├── evaluation.rs │ ├── mod.rs │ └── promotion.rs ├── inline │ ├── early_inline.rs │ ├── heuristics.rs │ └── mod.rs ├── loops.rs ├── mod.rs └── peephole.rs ├── repr ├── basic_block.rs ├── constant.rs ├── function.rs ├── instruction │ ├── assign.rs │ ├── binary_ops.rs │ ├── bitcast.rs │ ├── call.rs │ ├── cmp.rs │ ├── mod.rs │ └── neg.rs ├── mod.rs ├── terminator.rs ├── types.rs ├── utils.rs └── value.rs ├── tests ├── mod.rs └── num_folding.rs ├── verify └── mod.rs ├── vsdg ├── cse.rs ├── dce.rs ├── dot.rs ├── folding.rs ├── graph.rs ├── inline.rs ├── logging.rs ├── loops.rs ├── mod.rs ├── node │ ├── control.rs │ ├── mod.rs │ ├── node_ext.rs │ ├── operation.rs │ ├── structure.rs │ └── value │ │ ├── constant.rs │ │ ├── mod.rs │ │ └── parameter.rs └── tests.rs └── wasm └── mod.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [Kixiron] 2 | custom: ["https://www.buymeacoffee.com/kixiron"] 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | .vscode 4 | .cargo 5 | -------------------------------------------------------------------------------- /.rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sruth" 3 | version = "0.1.0" 4 | description = "A toy optimization engine" 5 | authors = ["Chase Wilson "] 6 | license = "MIT OR Apache-2.0" 7 | edition = "2018" 8 | build = "build.rs" 9 | 10 | [features] 11 | default = ["dot"] 12 | dot = ["petgraph"] 13 | 14 | [[example]] 15 | name = "brainfuck" 16 | 17 | [dependencies] 18 | regex = "1.5.4" 19 | fxhash = "0.2.1" 20 | byteorder = "1.4.3" 21 | abomonation = "0.7.3" 22 | derive_more = "0.99.11" 23 | lalrpop-util = "0.19.5" 24 | crossbeam-channel = "0.5.0" 25 | abomonation_derive = "0.5.0" 26 | petgraph = { version = "0.5.1", optional = true } 27 | 28 | [dependencies.sruth-derive] 29 | path = "crates/sruth-derive" 30 | 31 | [dependencies.num-traits] 32 | version = "0.2.14" 33 | default-features = false 34 | 35 | [dependencies.lasso] 36 | version = "0.5.0" 37 | features = ["multi-threaded"] 38 | 39 | [dependencies.tracing] 40 | version = "0.1.23" 41 | default-features = false 42 | 43 | [dependencies.pretty] 44 | version = "0.10.0" 45 | default-features = false 46 | 47 | [dependencies.timely] 48 | git = "https://github.com/TimelyDataflow/timely-dataflow" 49 | default-features = false 50 | 51 | [dependencies.differential-dataflow] 52 | git = "https://github.com/TimelyDataflow/differential-dataflow" 53 | default-features = false 54 | 55 | [dependencies.dogsdogsdogs] 56 | git = "https://github.com/TimelyDataflow/differential-dataflow" 57 | default-features = false 58 | 59 | [build-dependencies] 60 | lalrpop = "0.19.5" 61 | 62 | [dev-dependencies] 63 | tracing-subscriber = "0.2.15" 64 | 65 | [profile.dev] 66 | opt-level = 0 67 | debug = 2 68 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright 2021 Chase Wilson 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files 4 | (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, 5 | publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do 6 | so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 11 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE 12 | FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 13 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sruth Lùbach: A Meandering Current 2 | 3 | `sruth` is an toy optimization engine written in [Differential Dataflow] based on the [VSDG], [RVSDG] and [Sea of Nodes] 4 | compiler designs. 5 | 6 | [Differential Dataflow]: https://github.com/TimelyDataflow/differential-dataflow 7 | [VSDG]: https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-705.pdf 8 | [RVSDG]: https://arxiv.org/abs/1912.05036 9 | [Sea of Nodes]: https://darksi.de/d.sea-of-nodes/ 10 | -------------------------------------------------------------------------------- /Resources.md: -------------------------------------------------------------------------------- 1 | * [Vectorized `popcnt`](https://arxiv.org/pdf/1611.07612.pdf) 2 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | lalrpop::process_root().unwrap(); 3 | } 4 | -------------------------------------------------------------------------------- /crates/sruth-derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sruth-derive" 3 | version = "0.1.0" 4 | authors = ["Chase Wilson "] 5 | license = "MIT OR Apache-2.0" 6 | edition = "2018" 7 | 8 | [lib] 9 | proc-macro = true 10 | 11 | [dependencies] 12 | quote = "1.0.9" 13 | derive_utils = "0.11" 14 | proc-macro2 = "1.0.24" 15 | syn = { version = "1.0.60", features = ["derive"] } 16 | -------------------------------------------------------------------------------- /crates/sruth-derive/src/lib.rs: -------------------------------------------------------------------------------- 1 | use derive_utils::quick_derive; 2 | use proc_macro2::{Span, TokenStream}; 3 | use quote::{quote, ToTokens}; 4 | use syn::{parse_macro_input, spanned::Spanned, Data, DeriveInput, Error, Ident, Index, Result}; 5 | 6 | #[proc_macro_derive(NodeExt)] 7 | pub fn derive_node_ext(input: proc_macro::TokenStream) -> proc_macro::TokenStream { 8 | quick_derive! { 9 | input, 10 | NodeExt, 11 | trait NodeExt { 12 | fn node_name(&self) -> &'static str; 13 | 14 | fn evaluate_with_constants(self, constants: &[(NodeId, Constant)]) -> (Node, Vec); 15 | 16 | fn inline_cost(&self) -> isize; 17 | } 18 | } 19 | } 20 | 21 | #[proc_macro_derive(Castable)] 22 | pub fn derive_castable_stub(input: proc_macro::TokenStream) -> proc_macro::TokenStream { 23 | let input = parse_macro_input!(input as DeriveInput); 24 | 25 | derive_castable(input) 26 | .unwrap_or_else(|err| err.into_compile_error()) 27 | .into() 28 | } 29 | 30 | fn derive_castable(input: DeriveInput) -> Result { 31 | let target_enum = if let Data::Enum(target) = input.data { 32 | target 33 | } else { 34 | return Err(Error::new_spanned(input, "Castable only accepts enums")); 35 | }; 36 | let enum_ty = input.ident; 37 | let mut output = TokenStream::new(); 38 | 39 | for variant in target_enum.variants.iter() { 40 | let mut fields = variant.fields.iter(); 41 | let field = fields.next().unwrap(); 42 | assert_eq!(fields.count(), 0); 43 | 44 | let variant_name = &variant.ident; 45 | let field_ty = &field.ty; 46 | let field_name = field.ident.as_ref().map_or_else( 47 | || { 48 | Index { 49 | index: 0, 50 | span: field.span(), 51 | } 52 | .into_token_stream() 53 | }, 54 | |field| field.into_token_stream(), 55 | ); 56 | let value = Ident::new("value", Span::mixed_site()); 57 | 58 | output.extend(quote! { 59 | impl Castable<#field_ty> for #enum_ty { 60 | fn is(&self) -> bool { 61 | core::matches!(self, #enum_ty::#variant_name { .. }) 62 | } 63 | 64 | unsafe fn cast_unchecked(&self) -> &#field_ty { 65 | if let #enum_ty::#variant_name { #field_name: #value } = self { 66 | #value 67 | } else { 68 | core::hint::unreachable_unchecked() 69 | } 70 | } 71 | } 72 | }); 73 | } 74 | 75 | Ok(output) 76 | } 77 | -------------------------------------------------------------------------------- /examples/brainfuck/main.rs: -------------------------------------------------------------------------------- 1 | mod parse; 2 | mod programs; 3 | 4 | use parse::BrainfuckAst; 5 | use programs::HELLO_WORLD; 6 | use sruth::{ 7 | builder::{BuildResult, Context, FunctionBuilder}, 8 | dataflow::{Diff, Time}, 9 | vsdg::{ 10 | dot, 11 | node::{CmpKind, Constant, NodeId}, 12 | optimization_dataflow, 13 | }, 14 | }; 15 | use std::{ 16 | sync::{ 17 | atomic::{AtomicU8, Ordering}, 18 | Arc, 19 | }, 20 | vec::IntoIter, 21 | }; 22 | use timely::Config; 23 | use tracing_subscriber::{ 24 | filter::LevelFilter, 25 | fmt::{self, time::uptime}, 26 | prelude::__tracing_subscriber_SubscriberExt, 27 | util::SubscriberInitExt, 28 | }; 29 | 30 | fn main() { 31 | let _ = tracing_subscriber::registry() 32 | .with(LevelFilter::TRACE) 33 | .with(fmt::layer().with_timer(uptime())) 34 | .try_init(); 35 | 36 | let program = parse::stratify(&parse::parse(HELLO_WORLD)); 37 | let counter = Arc::new(AtomicU8::new(0)); 38 | let context = Arc::new(Context::new(counter.fetch_add(1, Ordering::Relaxed))); 39 | let (sender, receiver) = crossbeam_channel::unbounded(); 40 | 41 | timely::execute(Config::thread(), move |worker| { 42 | let (mut inputs, _trace, probe) = 43 | optimization_dataflow::<_, Time, Diff>(worker, sender.clone(), counter.clone()); 44 | 45 | if worker.index() == 0 { 46 | let mut builder = context.builder(); 47 | 48 | let _add = builder 49 | .named_function("main", sruth::repr::Type::Uint, |func| { 50 | func.basic_block(|block| { 51 | block.ret(sruth::repr::Constant::Uint(0))?; 52 | Ok(()) 53 | })?; 54 | 55 | let data_tape = func.vsdg_const(Constant::Array(vec![Constant::Uint8(0); 100])); 56 | let mut data_ptr = func.vsdg_ptr_to(data_tape); 57 | 58 | let mut program = program.clone().into_iter(); 59 | while let Some(node) = program.next() { 60 | data_ptr = compile_node(node, &mut program, func, data_ptr)?; 61 | } 62 | 63 | let load = func.vsdg_load(data_ptr); 64 | func.vsdg_return(load) 65 | }) 66 | .unwrap(); 67 | 68 | builder.vsdg_finish(&mut inputs, 0).unwrap(); 69 | } 70 | 71 | inputs.advance_to(1); 72 | inputs.flush(); 73 | 74 | while probe.less_than(inputs.time()) { 75 | worker.step_or_park(None); 76 | } 77 | }) 78 | .unwrap(); 79 | 80 | dot::render_graphs(receiver); 81 | } 82 | 83 | fn compile_node( 84 | node: BrainfuckAst, 85 | program: &mut IntoIter, 86 | func: &mut FunctionBuilder, 87 | mut data_ptr: NodeId, 88 | ) -> BuildResult { 89 | match node { 90 | BrainfuckAst::IncPtr => { 91 | let one = func.vsdg_const(Constant::Uint8(1)); 92 | func.vsdg_add(data_ptr, one) 93 | } 94 | 95 | BrainfuckAst::DecPtr => { 96 | let one = func.vsdg_const(Constant::Uint8(1)); 97 | func.vsdg_sub(data_ptr, one) 98 | } 99 | 100 | BrainfuckAst::IncValue => { 101 | let one = func.vsdg_const(Constant::Uint8(1)); 102 | let value = func.vsdg_load(data_ptr); 103 | 104 | let incremented = func.vsdg_add(value, one)?; 105 | func.vsdg_store(incremented, data_ptr); 106 | 107 | Ok(incremented) 108 | } 109 | 110 | BrainfuckAst::DecValue => { 111 | let one = func.vsdg_const(Constant::Uint8(1)); 112 | let value = func.vsdg_load(data_ptr); 113 | 114 | let incremented = func.vsdg_sub(value, one)?; 115 | func.vsdg_store(incremented, data_ptr); 116 | 117 | Ok(incremented) 118 | } 119 | 120 | // TODO: Loop regions 121 | BrainfuckAst::Loop { body } => { 122 | func.vsdg_loop(|func| { 123 | for node in body { 124 | data_ptr = compile_node(node, program, func, data_ptr)?; 125 | } 126 | 127 | let value = func.vsdg_load(data_ptr); 128 | let zero = func.vsdg_const(Constant::Uint8(0)); 129 | let cmp = func.vsdg_cmp(CmpKind::Eq, value, zero); 130 | 131 | Ok(Some(cmp)) 132 | })?; 133 | 134 | Ok(data_ptr) 135 | } 136 | 137 | // TODO: Foreign function declaration & invocation 138 | BrainfuckAst::Output => Ok(data_ptr), 139 | BrainfuckAst::Input => Ok(data_ptr), 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /examples/brainfuck/parse.rs: -------------------------------------------------------------------------------- 1 | use std::slice::Iter; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 4 | pub enum Command { 5 | IncPtr, 6 | DecPtr, 7 | IncValue, 8 | DecValue, 9 | Output, 10 | Input, 11 | LoopStart, 12 | LoopEnd, 13 | } 14 | 15 | pub(crate) fn parse(source: &str) -> Vec { 16 | let mut commands = Vec::with_capacity(source.chars().count()); 17 | commands.extend(source.chars().filter_map(parse_char)); 18 | 19 | commands 20 | } 21 | 22 | fn parse_char(character: char) -> Option { 23 | match character { 24 | '>' => Some(Command::IncPtr), 25 | '<' => Some(Command::DecPtr), 26 | '+' => Some(Command::IncValue), 27 | '-' => Some(Command::DecValue), 28 | '.' => Some(Command::Output), 29 | ',' => Some(Command::Input), 30 | '[' => Some(Command::LoopStart), 31 | ']' => Some(Command::LoopEnd), 32 | _ => None, 33 | } 34 | } 35 | 36 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 37 | pub enum BrainfuckAst { 38 | IncPtr, 39 | DecPtr, 40 | IncValue, 41 | DecValue, 42 | Output, 43 | Input, 44 | Loop { body: Vec }, 45 | } 46 | 47 | pub(crate) fn stratify(program: &[Command]) -> Vec { 48 | let mut ast = Vec::with_capacity(program.len()); 49 | let mut program = program.iter(); 50 | 51 | while let Some(command) = program.next() { 52 | ast.push(stratify_command(command, &mut program)); 53 | } 54 | 55 | ast 56 | } 57 | 58 | fn stratify_command(command: &Command, program: &mut Iter) -> BrainfuckAst { 59 | match command { 60 | Command::IncPtr => BrainfuckAst::IncPtr, 61 | Command::DecPtr => BrainfuckAst::DecPtr, 62 | Command::IncValue => BrainfuckAst::IncValue, 63 | Command::DecValue => BrainfuckAst::DecValue, 64 | Command::Output => BrainfuckAst::Output, 65 | Command::Input => BrainfuckAst::Input, 66 | Command::LoopStart => { 67 | let mut body = Vec::with_capacity(10); 68 | 69 | let mut next = program.next().expect("unexpected EOF"); 70 | while !matches!(next, Command::LoopEnd) { 71 | body.push(stratify_command(next, program)); 72 | 73 | next = program.next().expect("unexpected EOF"); 74 | } 75 | 76 | BrainfuckAst::Loop { body } 77 | } 78 | Command::LoopEnd => panic!("unmatched loop end"), 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /examples/brainfuck/programs.rs: -------------------------------------------------------------------------------- 1 | pub(crate) const HELLO_WORLD: &str = " 2 | ++++++++ Set Cell #0 to 8 3 | [ 4 | >++++ Add 4 to Cell #1; this will always set Cell #1 to 4 5 | [ as the cell will be cleared by the loop 6 | >++ Add 2 to Cell #2 7 | >+++ Add 3 to Cell #3 8 | >+++ Add 3 to Cell #4 9 | >+ Add 1 to Cell #5 10 | <<<<- Decrement the loop counter in Cell #1 11 | ] Loop till Cell #1 is zero; number of iterations is 4 12 | >+ Add 1 to Cell #2 13 | >+ Add 1 to Cell #3 14 | >- Subtract 1 from Cell #4 15 | >>+ Add 1 to Cell #6 16 | [<] Move back to the first zero cell you find; this will 17 | be Cell #1 which was cleared by the previous loop 18 | <- Decrement the loop Counter in Cell #0 19 | ] Loop till Cell #0 is zero; number of iterations is 8 20 | 21 | The result of this is: 22 | Cell No : 0 1 2 3 4 5 6 23 | Contents: 0 0 72 104 88 32 8 24 | Pointer : ^ 25 | 26 | >>. Cell #2 has value 72 which is 'H' 27 | >---. Subtract 3 from Cell #3 to get 101 which is 'e' 28 | +++++++..+++. Likewise for 'llo' from Cell #3 29 | >>. Cell #5 is 32 for the space 30 | <-. Subtract 1 from Cell #4 for 87 to give a 'W' 31 | <. Cell #3 was set to 'o' from the end of 'Hello' 32 | +++.------.--------. Cell #3 for 'rl' and 'd' 33 | >>+. Add 1 to Cell #5 gives us an exclamation point 34 | >++. And finally a newline from Cell #6 35 | "; 36 | -------------------------------------------------------------------------------- /src/builder/context.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | builder::Builder, 3 | dataflow::operators::Uuid, 4 | repr::{BasicBlockId, FuncId, InstId, VarId}, 5 | vsdg::node::NodeId, 6 | }; 7 | use lasso::ThreadedRodeo; 8 | use std::{ 9 | num::NonZeroU64, 10 | sync::{ 11 | atomic::{AtomicU64, Ordering}, 12 | Arc, 13 | }, 14 | }; 15 | 16 | #[derive(Debug)] 17 | pub struct Context { 18 | pub(super) interner: ThreadedRodeo, 19 | func_counter: AtomicU64, 20 | block_counter: AtomicU64, 21 | inst_counter: AtomicU64, 22 | var_counter: AtomicU64, 23 | node_counter: AtomicU64, 24 | pub(super) ident_generation: u8, 25 | } 26 | 27 | // Public API 28 | impl Context { 29 | pub fn new(ident_generation: u8) -> Self { 30 | Self { 31 | interner: ThreadedRodeo::new(), 32 | func_counter: AtomicU64::new(0), 33 | block_counter: AtomicU64::new(0), 34 | inst_counter: AtomicU64::new(0), 35 | var_counter: AtomicU64::new(0), 36 | node_counter: AtomicU64::new(0), 37 | ident_generation, 38 | } 39 | } 40 | 41 | pub fn builder(self: &Arc) -> Builder { 42 | Builder::new(self.clone()) 43 | } 44 | 45 | pub fn interner(&self) -> &ThreadedRodeo { 46 | &self.interner 47 | } 48 | } 49 | 50 | // Private API 51 | impl Context { 52 | crate fn function_id(&self) -> FuncId { 53 | FuncId::new(fetch_id(&self.func_counter)) 54 | } 55 | 56 | crate fn block_id(&self) -> BasicBlockId { 57 | BasicBlockId::new(fetch_id(&self.block_counter)) 58 | } 59 | 60 | crate fn inst_id(&self) -> InstId { 61 | InstId::new(fetch_id(&self.inst_counter)) 62 | } 63 | 64 | crate fn var_id(&self) -> VarId { 65 | VarId::new(fetch_id(&self.var_counter)) 66 | } 67 | 68 | crate fn node_id(&self) -> NodeId { 69 | NodeId::new(Uuid::new( 70 | self.ident_generation, 71 | fetch_id(&self.node_counter).get(), 72 | )) 73 | } 74 | } 75 | 76 | #[inline] 77 | fn fetch_id(counter: &AtomicU64) -> NonZeroU64 { 78 | let int = counter.fetch_add(1, Ordering::Relaxed) + 1; 79 | 80 | if cfg!(debug_assertions) { 81 | if int == u64::max_value() { 82 | panic!("created the maximum number of ids (how did you even manage that?)"); 83 | } 84 | 85 | NonZeroU64::new(int).expect("created an invalid id") 86 | } else { 87 | unsafe { NonZeroU64::new_unchecked(int) } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/builder/error.rs: -------------------------------------------------------------------------------- 1 | use abomonation_derive::Abomonation; 2 | 3 | pub type BuildResult = Result; 4 | 5 | // TODO: Impl Display and Error, add docs 6 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Abomonation)] 7 | pub enum BuilderError { 8 | MissingTerminator, 9 | EmptyFunctionBody, 10 | MissingEntryBlock, 11 | MismatchedReturnTypes, 12 | MismatchedOperandTypes, 13 | IncorrectConditionType, 14 | } 15 | -------------------------------------------------------------------------------- /src/dataflow/algorithms/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod reachable; 2 | -------------------------------------------------------------------------------- /src/dataflow/algorithms/reachable.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{ 2 | difference::{Abelian, Multiply}, 3 | lattice::Lattice, 4 | operators::{ 5 | arrange::ArrangeBySelf, 6 | arrange::{ArrangeByKey, Arranged}, 7 | iterate::SemigroupVariable, 8 | reduce::ReduceCore, 9 | JoinCore, 10 | }, 11 | trace::{implementations::ord::OrdKeySpine, TraceReader}, 12 | Collection, ExchangeData, 13 | }; 14 | use std::hash::Hash; 15 | use timely::{dataflow::Scope, order::Product}; 16 | 17 | /// Propagates the reachability of nodes forward from the roots, returning 18 | /// a collection of all nodes that could be reached 19 | pub fn reachable( 20 | edges: &Collection, 21 | roots: &Collection, 22 | ) -> Collection 23 | where 24 | S: Scope, 25 | S::Timestamp: Lattice, 26 | N: ExchangeData + Hash, 27 | R: Abelian + ExchangeData + Multiply + From, 28 | { 29 | reachable_core("Reachable", &edges.arrange_by_key(), roots) 30 | } 31 | 32 | /// Propagates the reachability of nodes forward from the roots, returning 33 | /// a collection of all nodes that could be reached 34 | // This is `differential_dataflow::algorithms::graphs::propagate::propagate_core()` specialized for cases 35 | // where `nodes` doesn't carry any data along any data and only cares about node reachability 36 | pub fn reachable_core( 37 | name: &str, 38 | edges: &Arranged, 39 | roots: &Collection, 40 | ) -> Collection 41 | where 42 | S: Scope, 43 | S::Timestamp: Lattice, 44 | N: ExchangeData + Hash, 45 | R: Abelian + ExchangeData + Multiply + From, 46 | Trace: TraceReader + Clone + 'static, 47 | { 48 | roots 49 | .scope() 50 | .scoped::, _, _>(name, |scope| { 51 | let (edges, roots) = (edges.enter(scope), roots.enter(scope)); 52 | let proposals = SemigroupVariable::new(scope, Product::new(Default::default(), 1)); 53 | 54 | let labels = proposals 55 | .concat(&roots) 56 | .arrange_by_self() 57 | .reduce_abelian::<_, OrdKeySpine<_, _, _>>(name, |_key, _input, output| { 58 | output.push(((), R::from(1))); 59 | }); 60 | 61 | let propagate: Collection<_, N, R> = 62 | labels.join_core(&edges, |_, &(), node| Some(node.clone())); 63 | proposals.set(&propagate); 64 | 65 | labels.as_collection(|k, &()| k.clone()).leave() 66 | }) 67 | } 68 | -------------------------------------------------------------------------------- /src/dataflow/input_manager.rs: -------------------------------------------------------------------------------- 1 | use crate::repr::{ 2 | basic_block::BasicBlockDesc, function::FunctionDesc, BasicBlockId, FuncId, InstId, Instruction, 3 | }; 4 | use differential_dataflow::{ 5 | difference::{Abelian, Semigroup}, 6 | input::{Input, InputSession}, 7 | lattice::Lattice, 8 | operators::{ 9 | arrange::{ArrangeByKey, TraceAgent}, 10 | Threshold, 11 | }, 12 | trace::implementations::ord::OrdValSpine, 13 | ExchangeData, 14 | }; 15 | use std::fmt::Debug; 16 | use timely::{dataflow::Scope, progress::Timestamp}; 17 | 18 | pub struct InputManager 19 | where 20 | T: Timestamp + Lattice, 21 | R: Semigroup, 22 | { 23 | pub instructions: InputSession, 24 | pub instruction_trace: TraceAgent>, 25 | 26 | pub basic_blocks: InputSession, 27 | pub basic_block_trace: TraceAgent>, 28 | 29 | pub functions: InputSession, 30 | pub function_trace: TraceAgent>, 31 | } 32 | 33 | impl InputManager 34 | where 35 | T: Timestamp + Lattice, 36 | R: Semigroup + ExchangeData, 37 | { 38 | pub fn new(scope: &mut S) -> Self 39 | where 40 | S: Scope + Input, 41 | R: Abelian + From, 42 | { 43 | tracing::info!("created a new input manager"); 44 | 45 | let (instructions, instruction_trace) = scope.new_collection::<(InstId, Instruction), R>(); 46 | let (basic_blocks, basic_block_trace) = 47 | scope.new_collection::<(BasicBlockId, BasicBlockDesc), R>(); 48 | let (functions, function_trace) = scope.new_collection::<(FuncId, FunctionDesc), R>(); 49 | 50 | // TODO: Exchange more intelligently to put all blocks & instructions for 51 | // a given function onto the same worker 52 | let instruction_trace = instruction_trace.distinct_core().arrange_by_key().trace; 53 | let basic_block_trace = basic_block_trace.distinct_core().arrange_by_key().trace; 54 | let function_trace = function_trace.distinct_core().arrange_by_key().trace; 55 | 56 | Self { 57 | instructions, 58 | instruction_trace, 59 | basic_blocks, 60 | basic_block_trace, 61 | functions, 62 | function_trace, 63 | } 64 | } 65 | 66 | pub fn advance_to(&mut self, time: T) 67 | where 68 | T: Debug + Clone, 69 | { 70 | tracing::info!("advancing to timestamp {:?}", time); 71 | 72 | self.instructions.advance_to(time.clone()); 73 | self.instructions.flush(); 74 | 75 | self.basic_blocks.advance_to(time.clone()); 76 | self.basic_blocks.flush(); 77 | 78 | self.functions.advance_to(time); 79 | self.functions.flush(); 80 | } 81 | 82 | pub fn time(&self) -> &T { 83 | debug_assert_eq!(self.instructions.time(), self.basic_blocks.time()); 84 | debug_assert_eq!(self.instructions.time(), self.functions.time()); 85 | 86 | self.instructions.time() 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/dataflow/mod.rs: -------------------------------------------------------------------------------- 1 | mod input_manager; 2 | mod program; 3 | mod trace_manager; 4 | mod translate; 5 | 6 | pub mod algorithms; 7 | pub mod operators; 8 | 9 | pub use input_manager::InputManager; 10 | pub use program::{Program, ProgramVariable}; 11 | pub use trace_manager::TraceManager; 12 | pub use translate::translate; 13 | 14 | pub type Diff = isize; 15 | 16 | pub type Time = usize; 17 | -------------------------------------------------------------------------------- /src/dataflow/operators/apply_seq.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{ 2 | difference::Abelian, lattice::Lattice, operators::iterate::Variable, AsCollection, Collection, 3 | Data, 4 | }; 5 | use timely::{ 6 | dataflow::{operators::Map, Scope}, 7 | order::Product, 8 | }; 9 | 10 | pub trait ApplySequenced { 11 | type Output; 12 | 13 | fn apply_sequenced(&self, sequenced: Vec D>>) -> Self::Output { 14 | self.apply_sequenced_named("ApplySequenced", sequenced) 15 | } 16 | 17 | fn apply_sequenced_named( 18 | &self, 19 | name: &str, 20 | sequenced: Vec D>>, 21 | ) -> Self::Output; 22 | } 23 | 24 | impl ApplySequenced for Collection 25 | where 26 | S: Scope, 27 | S::Timestamp: Lattice, 28 | D: Data, 29 | R: Abelian, 30 | { 31 | type Output = Collection; 32 | 33 | fn apply_sequenced_named( 34 | &self, 35 | name: &str, 36 | sequenced: Vec D>>, 37 | ) -> Self::Output { 38 | self.inner 39 | .scope() 40 | .scoped::, _, _>(name, move |scope| { 41 | let variable = 42 | Variable::new_from(self.enter(scope), Product::new(Default::default(), 1)); 43 | 44 | let applied = variable 45 | .inner 46 | .map(move |(data, time, diff)| { 47 | let apply = &*sequenced[time.inner as usize % sequenced.len()]; 48 | 49 | (apply(data), time, diff) 50 | }) 51 | .as_collection(); 52 | 53 | variable.set(&applied).leave() 54 | }) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/dataflow/operators/arrange.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{ 2 | difference::Semigroup, 3 | lattice::Lattice, 4 | operators::arrange::{Arrange, Arranged, TraceAgent}, 5 | trace::implementations::ord::{OrdKeySpine, OrdValSpine}, 6 | Collection, ExchangeData, Hashable, 7 | }; 8 | use timely::dataflow::{ 9 | channels::pact::{Exchange, Pipeline}, 10 | Scope, 11 | }; 12 | 13 | pub trait ArrangeByKeyExt { 14 | type Output; 15 | 16 | fn arrange_by_key_exchange(&self, route: F) -> Self::Output 17 | where 18 | F: Fn(&K, &V) -> u64 + 'static, 19 | { 20 | self.arrange_by_key_exchange_named("ArrangeByKeyExchange", route) 21 | } 22 | 23 | fn arrange_by_key_exchange_named(&self, name: &str, route: F) -> Self::Output 24 | where 25 | F: Fn(&K, &V) -> u64 + 'static; 26 | 27 | fn arrange_by_key_pipelined(&self) -> Self::Output { 28 | self.arrange_by_key_pipelined_named("ArrangeByKeyPipelined") 29 | } 30 | 31 | fn arrange_by_key_pipelined_named(&self, name: &str) -> Self::Output; 32 | } 33 | 34 | impl ArrangeByKeyExt for Collection 35 | where 36 | S: Scope, 37 | S::Timestamp: Lattice, 38 | K: ExchangeData + Hashable, 39 | V: ExchangeData, 40 | R: Semigroup + ExchangeData, 41 | { 42 | #[allow(clippy::type_complexity)] 43 | type Output = Arranged>>; 44 | 45 | fn arrange_by_key_exchange_named(&self, name: &str, route: F) -> Self::Output 46 | where 47 | F: Fn(&K, &V) -> u64 + 'static, 48 | { 49 | let exchange = Exchange::new(move |((key, value), _time, _diff)| route(key, value)); 50 | self.arrange_core(exchange, name) 51 | } 52 | 53 | fn arrange_by_key_pipelined_named(&self, name: &str) -> Self::Output { 54 | self.arrange_core(Pipeline, name) 55 | } 56 | } 57 | 58 | pub trait ArrangeBySelfExt { 59 | type Output; 60 | 61 | fn arrange_by_self_exchange(&self, route: F) -> Self::Output 62 | where 63 | F: Fn(&K) -> u64 + 'static, 64 | { 65 | self.arrange_by_self_exchange_named("ArrangeBySelfExchange", route) 66 | } 67 | 68 | fn arrange_by_self_exchange_named(&self, name: &str, route: F) -> Self::Output 69 | where 70 | F: Fn(&K) -> u64 + 'static; 71 | 72 | fn arrange_by_self_pipelined(&self) -> Self::Output { 73 | self.arrange_by_self_pipelined_named("ArrangeBySelfPipelined") 74 | } 75 | 76 | fn arrange_by_self_pipelined_named(&self, name: &str) -> Self::Output; 77 | } 78 | 79 | impl ArrangeBySelfExt for Collection 80 | where 81 | S: Scope, 82 | S::Timestamp: Lattice, 83 | K: ExchangeData + Hashable, 84 | R: Semigroup + ExchangeData, 85 | { 86 | type Output = Arranged>>; 87 | 88 | fn arrange_by_self_exchange_named(&self, name: &str, route: F) -> Self::Output 89 | where 90 | F: Fn(&K) -> u64 + 'static, 91 | { 92 | let exchange = Exchange::new(move |((key, ()), _time, _diff)| route(key)); 93 | self.map(|key| (key, ())).arrange_core(exchange, name) 94 | } 95 | 96 | fn arrange_by_self_pipelined_named(&self, name: &str) -> Self::Output { 97 | self.map(|key| (key, ())).arrange_core(Pipeline, name) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/dataflow/operators/bounded_loop.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{difference::Semigroup, AsCollection, Collection, Data}; 2 | use timely::{ 3 | dataflow::{ 4 | operators::{feedback::LoopVariable, BranchWhen, ConnectLoop}, 5 | scopes::child::Iterative, 6 | Scope, 7 | }, 8 | progress::Timestamp, 9 | }; 10 | 11 | pub trait BoundedLoop 12 | where 13 | S: Scope, 14 | D: Data, 15 | R: Semigroup, 16 | { 17 | fn bounded_loop(&self, max_iterations: T, looped: F) -> Self 18 | where 19 | Self: Sized, 20 | T: Semigroup + Timestamp + From, 21 | for<'a> F: 22 | FnOnce(&Collection, D, R>) -> Collection, D, R>, 23 | { 24 | self.bounded_loop_named("BoundedLoop", max_iterations, looped) 25 | } 26 | 27 | fn bounded_loop_named(&self, name: &str, max_iterations: T, looped: F) -> Self 28 | where 29 | T: Semigroup + Timestamp + From, 30 | for<'a> F: 31 | FnOnce(&Collection, D, R>) -> Collection, D, R>; 32 | } 33 | 34 | impl BoundedLoop for Collection 35 | where 36 | S: Scope, 37 | D: Data, 38 | R: Semigroup, 39 | { 40 | fn bounded_loop_named(&self, name: &str, max_iterations: T, looped: F) -> Self 41 | where 42 | T: Semigroup + Timestamp + From, 43 | for<'a> F: 44 | FnOnce(&Collection, D, R>) -> Collection, D, R>, 45 | { 46 | self.scope().scoped(name, move |scope| { 47 | let (handle, cycle) = scope.loop_variable(T::from(1)); 48 | 49 | let collection = looped(&self.enter(scope).concat(&cycle.as_collection())); 50 | let (connected, _discarded) = collection 51 | .inner 52 | .branch_when(move |time| time.inner >= max_iterations); 53 | 54 | connected.connect_loop(handle); 55 | collection.leave() 56 | }) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/dataflow/operators/buffered_flat_map.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{difference::Semigroup, AsCollection, Collection}; 2 | use timely::{ 3 | dataflow::{channels::pact::Pipeline, operators::Operator, Scope, Stream}, 4 | Data, 5 | }; 6 | 7 | pub trait BufferedFlatMap { 8 | type Output; 9 | 10 | fn buffered_flat_map(&self, logic: L) -> Self::Output 11 | where 12 | L: FnMut(D, &mut AppendOnlyVec) + 'static, 13 | { 14 | self.buffered_flat_map_named("BufferedFlatMap", logic) 15 | } 16 | 17 | fn buffered_flat_map_named(&self, name: &str, logic: L) -> Self::Output 18 | where 19 | L: FnMut(D, &mut AppendOnlyVec) + 'static; 20 | } 21 | 22 | impl BufferedFlatMap for Stream 23 | where 24 | S: Scope, 25 | D: Data, 26 | D2: Data, 27 | { 28 | type Output = Stream; 29 | 30 | fn buffered_flat_map_named(&self, name: &str, mut logic: L) -> Self::Output 31 | where 32 | L: FnMut(D, &mut AppendOnlyVec) + 'static, 33 | { 34 | let mut buffer = Vec::new(); 35 | let mut user_buffer = AppendOnlyVec::new(); 36 | 37 | self.unary(Pipeline, name, move |_capability, _info| { 38 | move |input, output| { 39 | input.for_each(|capability, data| { 40 | data.swap(&mut buffer); 41 | 42 | for data in buffer.drain(..) { 43 | logic(data, &mut user_buffer); 44 | } 45 | 46 | output 47 | .session(&capability) 48 | .give_iterator(user_buffer.0.drain(..)) 49 | }); 50 | } 51 | }) 52 | } 53 | } 54 | 55 | impl BufferedFlatMap for Collection 56 | where 57 | S: Scope, 58 | S::Timestamp: Clone, 59 | D: Data, 60 | D2: Data, 61 | R: Semigroup + Clone, 62 | { 63 | type Output = Collection; 64 | 65 | fn buffered_flat_map_named(&self, name: &str, mut logic: L) -> Self::Output 66 | where 67 | L: FnMut(D, &mut AppendOnlyVec) + 'static, 68 | { 69 | let mut user_buffer = AppendOnlyVec::new(); 70 | 71 | self.inner 72 | .buffered_flat_map_named(name, move |(data, time, diff), buffer| { 73 | logic(data, &mut user_buffer); 74 | 75 | buffer.extend( 76 | user_buffer 77 | .0 78 | .drain(..) 79 | .map(|data| (data, time.clone(), diff.clone())), 80 | ); 81 | }) 82 | .as_collection() 83 | } 84 | } 85 | 86 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 87 | pub struct AppendOnlyVec(Vec); 88 | 89 | impl AppendOnlyVec { 90 | #[inline] 91 | pub const fn new() -> Self { 92 | Self(Vec::new()) 93 | } 94 | 95 | #[inline] 96 | pub fn push(&mut self, value: T) { 97 | self.0.push(value) 98 | } 99 | 100 | #[inline] 101 | pub fn append(&mut self, other: &mut Vec) { 102 | self.0.append(other) 103 | } 104 | 105 | #[inline] 106 | pub fn reserve(&mut self, additional: usize) { 107 | self.0.reserve(additional) 108 | } 109 | 110 | #[inline] 111 | pub fn reserve_exact(&mut self, additional: usize) { 112 | self.0.reserve_exact(additional) 113 | } 114 | } 115 | 116 | impl<'a, T: 'a + Copy> Extend<&'a T> for AppendOnlyVec { 117 | #[inline] 118 | fn extend>(&mut self, iter: I) { 119 | self.0.extend(iter) 120 | } 121 | } 122 | 123 | impl Extend for AppendOnlyVec { 124 | #[inline] 125 | fn extend>(&mut self, iter: I) { 126 | self.0.extend(iter) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/dataflow/operators/collect.rs: -------------------------------------------------------------------------------- 1 | use std::mem; 2 | 3 | use crate::{ 4 | dataflow::operators::{BufferedFlatMap, FilterMap, MapExt}, 5 | repr::{Cast, InstId, Instruction, InstructionExt, RawCast, Type, TypedVar, Value, VarId}, 6 | }; 7 | use differential_dataflow::{difference::Semigroup, Collection, Data}; 8 | use timely::dataflow::Scope; 9 | 10 | pub trait CollectUsages { 11 | type Output; 12 | 13 | fn collect_usages(&self) -> Self::Output { 14 | self.collect_usages_named("CollectUsages") 15 | } 16 | 17 | fn collect_usages_named(&self, name: &str) -> Self::Output; 18 | } 19 | 20 | impl CollectUsages for Collection 21 | where 22 | S: Scope, 23 | R: Semigroup, 24 | { 25 | type Output = Collection; 26 | 27 | fn collect_usages_named(&self, name: &str) -> Self::Output { 28 | self.buffered_flat_map_named(name, |(id, inst), buf| { 29 | buf.extend(inst.used_vars().into_iter().map(move |var| (var, id))); 30 | }) 31 | } 32 | } 33 | 34 | pub trait CollectDeclarations { 35 | type Output; 36 | 37 | fn collect_declarations(&self) -> Self::Output { 38 | self.collect_declarations_named("CollectDeclarations") 39 | } 40 | 41 | fn collect_declarations_named(&self, name: &str) -> Self::Output; 42 | } 43 | 44 | impl CollectDeclarations for Collection 45 | where 46 | S: Scope, 47 | R: Semigroup, 48 | { 49 | type Output = Collection; 50 | 51 | fn collect_declarations_named(&self, name: &str) -> Self::Output { 52 | self.map_named(name, |(id, inst)| { 53 | (TypedVar::new(inst.dest(), inst.dest_type()), id) 54 | }) 55 | } 56 | } 57 | 58 | pub trait CollectVariableTypes { 59 | type Output; 60 | 61 | fn collect_var_types(&self) -> Self::Output { 62 | self.collect_var_types_named("CollectVariableTypes") 63 | } 64 | 65 | fn collect_var_types_named(&self, name: &str) -> Self::Output; 66 | } 67 | 68 | impl CollectVariableTypes for Collection 69 | where 70 | S: Scope, 71 | R: Semigroup, 72 | { 73 | type Output = Collection; 74 | 75 | fn collect_var_types_named(&self, name: &str) -> Self::Output { 76 | self.map_named(name, |(_id, inst)| (inst.dest(), inst.dest_type())) 77 | } 78 | } 79 | 80 | pub trait CollectValues { 81 | type Output; 82 | 83 | fn collect_values(&self) -> Self::Output { 84 | self.collect_values_named("CollectValues") 85 | } 86 | 87 | fn collect_values_named(&self, name: &str) -> Self::Output; 88 | } 89 | 90 | impl CollectValues for Collection 91 | where 92 | S: Scope, 93 | R: Semigroup, 94 | { 95 | type Output = Collection; 96 | 97 | fn collect_values_named(&self, name: &str) -> Self::Output { 98 | let mut buffer = Vec::new(); 99 | 100 | self.buffered_flat_map_named(name, move |(id, inst), buf| { 101 | // Safety: `buffer` is cleared before the values go out of scope 102 | inst.used_values_into(unsafe { 103 | mem::transmute::<&mut Vec<&Value>, &mut Vec<&Value>>(&mut buffer) 104 | }); 105 | 106 | buf.extend(buffer.drain(..).cloned().map(move |val| (id, val))); 107 | }) 108 | } 109 | } 110 | 111 | pub trait CollectCastable 112 | where 113 | S: Scope, 114 | R: Semigroup, 115 | { 116 | fn collect_castable(&self) -> Collection 117 | where 118 | T: RawCast, 119 | U: Data, 120 | { 121 | self.collect_castable_named("CollectCastable") 122 | } 123 | 124 | fn collect_castable_named(&self, name: &str) -> Collection 125 | where 126 | T: RawCast, 127 | U: Data; 128 | } 129 | 130 | impl CollectCastable for Collection 131 | where 132 | S: Scope, 133 | T: Data, 134 | I: Data, 135 | R: Semigroup, 136 | { 137 | fn collect_castable_named(&self, name: &str) -> Collection 138 | where 139 | T: RawCast, 140 | U: Data, 141 | { 142 | self.filter_map_named(name, |(id, value)| { 143 | value.cast::().map(move |val| (id, val)) 144 | }) 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/dataflow/operators/count_ext.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{ 2 | difference::{Abelian, Semigroup}, 3 | lattice::Lattice, 4 | operators::{ 5 | arrange::{ArrangeBySelf, Arranged}, 6 | reduce::ReduceCore, 7 | }, 8 | trace::{implementations::ord::OrdValSpine, BatchReader, Cursor, TraceReader}, 9 | Collection, ExchangeData, Hashable, 10 | }; 11 | use timely::dataflow::Scope; 12 | 13 | pub trait CountExt 14 | where 15 | S: Scope, 16 | R1: Semigroup, 17 | { 18 | fn count_core(&self) -> Collection 19 | where 20 | R2: Semigroup + Abelian + From; 21 | } 22 | 23 | impl CountExt for Collection 24 | where 25 | S: Scope, 26 | S::Timestamp: Lattice + Ord, 27 | K: ExchangeData + Hashable, 28 | R1: Semigroup + ExchangeData, 29 | { 30 | fn count_core(&self) -> Collection 31 | where 32 | R2: Semigroup + Abelian + From, 33 | { 34 | self.arrange_by_self_named("Arrange: Count").count_core() 35 | } 36 | } 37 | 38 | impl CountExt for Arranged 39 | where 40 | S: Scope, 41 | S::Timestamp: Lattice + Ord, 42 | K: ExchangeData + Hashable, 43 | R1: Semigroup + ExchangeData, 44 | A1: TraceReader + Clone + 'static, 45 | A1::Batch: BatchReader, 46 | A1::Cursor: Cursor, 47 | { 48 | fn count_core(&self) -> Collection 49 | where 50 | R2: Semigroup + Abelian + From, 51 | { 52 | self.reduce_abelian::<_, OrdValSpine<_, _, _, _>>("Count", |_key, input, output| { 53 | output.push((input[0].1.clone(), R2::from(1))) 54 | }) 55 | .as_collection(|key, count| (key.clone(), count.clone())) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/dataflow/operators/discriminated_idents.rs: -------------------------------------------------------------------------------- 1 | use abomonation::Abomonation; 2 | use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; 3 | use differential_dataflow::{ 4 | algorithms::identifiers::Identifiers, difference::Abelian, lattice::Lattice, Collection, 5 | ExchangeData, 6 | }; 7 | use std::{ 8 | fmt::{self, Debug, Display}, 9 | hash::{Hash, Hasher}, 10 | io, mem, 11 | }; 12 | use timely::dataflow::Scope; 13 | 14 | pub trait DiscriminatedIdents { 15 | type Output; 16 | 17 | fn discriminated_idents(&self, discriminant: Discriminant) -> Self::Output; 18 | } 19 | 20 | impl DiscriminatedIdents for Collection 21 | where 22 | S: Scope, 23 | S::Timestamp: Lattice, 24 | D: ExchangeData + Hash, 25 | R: Abelian + ExchangeData, 26 | Discriminant: Display + Clone + 'static, 27 | Ident: Identifier + Clone + 'static, 28 | { 29 | type Output = Collection; 30 | 31 | fn discriminated_idents(&self, discriminant: Discriminant) -> Self::Output { 32 | tracing::debug!( 33 | discriminant = %discriminant, 34 | "initializing a discriminated idents producer", 35 | ); 36 | 37 | self.identifiers() 38 | .map(move |(data, hash)| (data, Ident::new_ident(discriminant.clone(), hash))) 39 | } 40 | } 41 | 42 | pub trait Identifier { 43 | type Discriminant; 44 | 45 | fn new_ident(discriminant: Self::Discriminant, hash: u64) -> Self; 46 | } 47 | 48 | #[derive(Clone, Copy, PartialOrd, Ord)] 49 | pub struct Uuid { 50 | discriminant: u8, 51 | hash: u64, 52 | } 53 | 54 | impl Uuid { 55 | pub const fn new(discriminant: u8, hash: u64) -> Self { 56 | Self { discriminant, hash } 57 | } 58 | 59 | const fn from_u128(uuid: u128) -> Self { 60 | Self::from_le_bytes(uuid.to_le_bytes()) 61 | } 62 | 63 | #[allow(clippy::many_single_char_names)] 64 | const fn from_le_bytes(bytes: [u8; mem::size_of::()]) -> Self { 65 | let [a, _, _, _, _, _, _, _, b, c, d, e, f, g, h, i] = bytes; 66 | 67 | Self { 68 | discriminant: u8::from_le_bytes([a]), 69 | hash: u64::from_le_bytes([b, c, d, e, f, g, h, i]), 70 | } 71 | } 72 | 73 | const fn as_u128(&self) -> u128 { 74 | u128::from_le_bytes(self.to_le_bytes()) 75 | } 76 | 77 | #[allow(clippy::many_single_char_names)] 78 | const fn to_le_bytes(&self) -> [u8; mem::size_of::()] { 79 | let [a] = self.discriminant.to_le_bytes(); 80 | let [b, c, d, e, f, g, h, i] = self.hash.to_le_bytes(); 81 | 82 | [a, 0, 0, 0, 0, 0, 0, 0, b, c, d, e, f, g, h, i] 83 | } 84 | } 85 | 86 | impl Identifier for Uuid { 87 | type Discriminant = u8; 88 | 89 | fn new_ident(discriminant: Self::Discriminant, hash: u64) -> Self { 90 | Self::new(discriminant, hash) 91 | } 92 | } 93 | 94 | impl Debug for Uuid { 95 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 96 | write!(fmt, "{:#02X}{:08X}", self.discriminant, self.hash) 97 | } 98 | } 99 | 100 | impl Display for Uuid { 101 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 102 | write!(fmt, "{:#02X}{:08X}", self.discriminant, self.hash) 103 | } 104 | } 105 | 106 | impl PartialEq for Uuid { 107 | fn eq(&self, other: &Uuid) -> bool { 108 | self.as_u128() == other.as_u128() 109 | } 110 | } 111 | 112 | impl Eq for Uuid {} 113 | 114 | impl Hash for Uuid { 115 | fn hash(&self, state: &mut H) { 116 | state.write_u128(self.as_u128()); 117 | } 118 | } 119 | 120 | impl Abomonation for Uuid { 121 | unsafe fn entomb(&self, write: &mut W) -> io::Result<()> { 122 | write.write_u128::(self.as_u128()) 123 | } 124 | 125 | unsafe fn exhume<'a, 'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { 126 | *self = Self::from_u128((&*bytes).read_u128::().ok()?); 127 | Some(&mut bytes[mem::size_of::()..]) 128 | } 129 | 130 | fn extent(&self) -> usize { 131 | mem::size_of::() 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/dataflow/operators/distinct.rs: -------------------------------------------------------------------------------- 1 | use crate::dataflow::operators::ArrangeBySelfExt; 2 | use differential_dataflow::{ 3 | difference::{Abelian, Semigroup}, 4 | lattice::Lattice, 5 | operators::{arrange::Arranged, Threshold}, 6 | trace::{BatchReader, Cursor, TraceReader}, 7 | Collection, ExchangeData, Hashable, 8 | }; 9 | use timely::dataflow::Scope; 10 | 11 | pub trait DistinctExt { 12 | type Output; 13 | 14 | fn distinct_exchange(&self, route: F) -> Self::Output 15 | where 16 | F: Fn(&D) -> u64 + 'static, 17 | { 18 | self.distinct_exchange_named("DistinctExchange", route) 19 | } 20 | 21 | fn distinct_exchange_named(&self, name: &str, route: F) -> Self::Output 22 | where 23 | F: Fn(&D) -> u64 + 'static; 24 | 25 | fn distinct_pipelined(&self) -> Self::Output { 26 | self.distinct_pipelined_named("DistinctPipelined") 27 | } 28 | 29 | fn distinct_pipelined_named(&self, name: &str) -> Self::Output; 30 | } 31 | 32 | impl DistinctExt for Collection 33 | where 34 | S: Scope, 35 | S::Timestamp: Lattice, 36 | D: ExchangeData + Hashable, 37 | R1: Semigroup + ExchangeData, 38 | R2: Semigroup + Abelian + From, 39 | { 40 | type Output = Collection; 41 | 42 | fn distinct_exchange_named(&self, name: &str, route: F) -> Self::Output 43 | where 44 | F: Fn(&D) -> u64 + 'static, 45 | { 46 | self.arrange_by_self_exchange_named("Arrange: DistinctExchange", route) 47 | .threshold_named(name, |_, _| R2::from(1)) 48 | } 49 | 50 | fn distinct_pipelined_named(&self, name: &str) -> Self::Output { 51 | self.arrange_by_self_pipelined_named("Arrange: DistinctPipelined") 52 | .threshold_named(name, |_, _| R2::from(1)) 53 | } 54 | } 55 | 56 | impl DistinctExt for Arranged 57 | where 58 | S: Scope, 59 | S::Timestamp: Lattice, 60 | K: ExchangeData + Hashable, 61 | R1: Semigroup + ExchangeData, 62 | R2: Semigroup + Abelian + From, 63 | A: TraceReader + Clone + 'static, 64 | A::Batch: BatchReader, 65 | A::Cursor: Cursor, 66 | { 67 | type Output = Collection; 68 | 69 | // TODO: It's funky that this is a noop 70 | fn distinct_exchange_named(&self, name: &str, _route: F) -> Self::Output 71 | where 72 | F: Fn(&K) -> u64 + 'static, 73 | { 74 | self.threshold_named(name, |_, _| R2::from(1)) 75 | } 76 | 77 | fn distinct_pipelined_named(&self, name: &str) -> Self::Output { 78 | self.threshold_named(name, |_, _| R2::from(1)) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/dataflow/operators/event_utils.rs: -------------------------------------------------------------------------------- 1 | use crossbeam_channel::{Receiver, Sender}; 2 | use std::{ 3 | mem, 4 | ops::{Deref, DerefMut}, 5 | }; 6 | use timely::dataflow::operators::capture::{Event, EventPusher, Extract}; 7 | 8 | #[derive(Debug)] 9 | pub struct CrossbeamPusher(pub Sender); 10 | 11 | impl CrossbeamPusher { 12 | pub fn new(sender: Sender) -> Self { 13 | Self(sender) 14 | } 15 | } 16 | 17 | impl EventPusher for CrossbeamPusher> { 18 | fn push(&mut self, event: Event) { 19 | // Ignore any errors that occur 20 | let _ = self.send(event); 21 | } 22 | } 23 | 24 | impl Clone for CrossbeamPusher { 25 | fn clone(&self) -> Self { 26 | Self(self.0.clone()) 27 | } 28 | } 29 | 30 | impl Deref for CrossbeamPusher { 31 | type Target = Sender; 32 | 33 | fn deref(&self) -> &Self::Target { 34 | &self.0 35 | } 36 | } 37 | 38 | impl DerefMut for CrossbeamPusher { 39 | fn deref_mut(&mut self) -> &mut Self::Target { 40 | &mut self.0 41 | } 42 | } 43 | 44 | #[derive(Debug)] 45 | pub struct CrossbeamExtractor(pub Receiver); 46 | 47 | impl CrossbeamExtractor { 48 | pub fn new(receiver: Receiver) -> Self { 49 | Self(receiver) 50 | } 51 | } 52 | 53 | impl Extract for CrossbeamExtractor> { 54 | fn extract(self) -> Vec<(T, Vec)> { 55 | let mut result = Vec::new(); 56 | for event in self { 57 | if let Event::Messages(time, data) = event { 58 | result.push((time, data)); 59 | } 60 | } 61 | result.sort_by(|x, y| x.0.cmp(&y.0)); 62 | 63 | let mut current = 0; 64 | for i in 1..result.len() { 65 | if result[current].0 == result[i].0 { 66 | let data = mem::take(&mut result[i].1); 67 | result[current].1.extend(data); 68 | } else { 69 | current = i; 70 | } 71 | } 72 | 73 | for &mut (_, ref mut data) in &mut result { 74 | data.sort(); 75 | } 76 | result.retain(|x| !x.1.is_empty()); 77 | result 78 | } 79 | } 80 | 81 | impl IntoIterator for CrossbeamExtractor { 82 | type IntoIter = crossbeam_channel::IntoIter; 83 | type Item = T; 84 | 85 | fn into_iter(self) -> Self::IntoIter { 86 | self.0.into_iter() 87 | } 88 | } 89 | 90 | impl Clone for CrossbeamExtractor { 91 | fn clone(&self) -> Self { 92 | Self(self.0.clone()) 93 | } 94 | } 95 | 96 | impl Deref for CrossbeamExtractor { 97 | type Target = Receiver; 98 | 99 | fn deref(&self) -> &Self::Target { 100 | &self.0 101 | } 102 | } 103 | 104 | impl DerefMut for CrossbeamExtractor { 105 | fn deref_mut(&mut self) -> &mut Self::Target { 106 | &mut self.0 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/dataflow/operators/exchange.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{difference::Semigroup, AsCollection, Collection, ExchangeData}; 2 | use timely::dataflow::{channels::pact::Exchange, operators::Operator, Scope, Stream}; 3 | 4 | pub trait ExchangeExt { 5 | fn exchange(&self, route: F) -> Self 6 | where 7 | F: Fn(&D) -> u64 + 'static, 8 | Self: Sized, 9 | { 10 | self.exchange_named("Exchange", route) 11 | } 12 | 13 | fn exchange_named(&self, name: &str, route: F) -> Self 14 | where 15 | F: Fn(&D) -> u64 + 'static, 16 | Self: Sized; 17 | } 18 | 19 | impl ExchangeExt for Stream 20 | where 21 | S: Scope, 22 | D: ExchangeData, 23 | { 24 | fn exchange_named(&self, name: &str, route: F) -> Self 25 | where 26 | F: Fn(&D) -> u64 + 'static, 27 | Self: Sized, 28 | { 29 | self.unary(Exchange::new(route), name, move |_capability, _info| { 30 | let mut buffer = Vec::new(); 31 | 32 | move |input, output| { 33 | input.for_each(|time, data| { 34 | data.swap(&mut buffer); 35 | output.session(&time).give_vec(&mut buffer); 36 | }); 37 | } 38 | }) 39 | } 40 | } 41 | 42 | impl ExchangeExt for Collection 43 | where 44 | S: Scope, 45 | S::Timestamp: ExchangeData, 46 | R: Semigroup + ExchangeData, 47 | D: ExchangeData, 48 | { 49 | fn exchange_named(&self, name: &str, route: F) -> Self 50 | where 51 | F: Fn(&D) -> u64 + 'static, 52 | Self: Sized, 53 | { 54 | self.inner 55 | .exchange_named(name, move |(data, _time, _diff)| route(data)) 56 | .as_collection() 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/dataflow/operators/filter_diff.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{collection::AsCollection, difference::Monoid, Collection}; 2 | use timely::{ 3 | dataflow::{channels::pact::Pipeline, operators::Operator, Scope}, 4 | Data, 5 | }; 6 | 7 | pub trait FilterDiff { 8 | type Output; 9 | 10 | fn filter_diff(&self, logic: L) -> Self::Output 11 | where 12 | L: FnMut(&R) -> bool + 'static, 13 | { 14 | self.filter_diff_named("FilterDiff", logic) 15 | } 16 | 17 | fn filter_diff_named(&self, name: &str, logic: L) -> Self::Output 18 | where 19 | L: FnMut(&R) -> bool + 'static; 20 | } 21 | 22 | impl FilterDiff for Collection 23 | where 24 | S: Scope, 25 | D: Data, 26 | R: Monoid, 27 | { 28 | type Output = Collection; 29 | 30 | fn filter_diff_named(&self, name: &str, mut logic: L) -> Self::Output 31 | where 32 | L: FnMut(&R) -> bool + 'static, 33 | { 34 | let mut buffer = Vec::new(); 35 | self.inner 36 | .unary(Pipeline, name, move |_capability, _info| { 37 | move |input, output| { 38 | input.for_each(|capability, data| { 39 | data.swap(&mut buffer); 40 | 41 | output.session(&capability).give_iterator( 42 | buffer.drain(..).filter(|(_data, _time, diff)| logic(diff)), 43 | ); 44 | }); 45 | } 46 | }) 47 | .as_collection() 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/dataflow/operators/filter_map.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{collection::AsCollection, difference::Semigroup, Collection}; 2 | use timely::{ 3 | dataflow::{channels::pact::Pipeline, operators::Operator, Scope, Stream}, 4 | Data, 5 | }; 6 | 7 | pub trait FilterMap { 8 | type Output; 9 | 10 | fn filter_map(&self, logic: L) -> Self::Output 11 | where 12 | L: FnMut(D) -> Option + 'static, 13 | { 14 | self.filter_map_named("FilterMap", logic) 15 | } 16 | 17 | fn filter_map_named(&self, name: &str, logic: L) -> Self::Output 18 | where 19 | L: FnMut(D) -> Option + 'static; 20 | } 21 | 22 | impl FilterMap for Stream 23 | where 24 | S: Scope, 25 | D: Data, 26 | D2: Data, 27 | { 28 | type Output = Stream; 29 | 30 | fn filter_map_named(&self, name: &str, mut logic: L) -> Self::Output 31 | where 32 | L: FnMut(D) -> Option + 'static, 33 | { 34 | let mut buffer = Vec::new(); 35 | 36 | self.unary(Pipeline, name, move |_capability, _info| { 37 | move |input, output| { 38 | input.for_each(|capability, data| { 39 | data.swap(&mut buffer); 40 | 41 | output 42 | .session(&capability) 43 | .give_iterator(buffer.drain(..).filter_map(|data| logic(data))); 44 | }); 45 | } 46 | }) 47 | } 48 | } 49 | 50 | impl FilterMap for Collection 51 | where 52 | S: Scope, 53 | D: Data, 54 | D2: Data, 55 | R: Semigroup, 56 | { 57 | type Output = Collection; 58 | 59 | fn filter_map_named(&self, name: &str, mut logic: L) -> Self::Output 60 | where 61 | L: FnMut(D) -> Option + 'static, 62 | { 63 | self.inner 64 | .filter_map_named(name, move |(data, time, diff)| { 65 | logic(data).map(|data| (data, time, diff)) 66 | }) 67 | .as_collection() 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/dataflow/operators/flatten.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{difference::Semigroup, AsCollection, Collection, Data}; 2 | use timely::dataflow::{channels::pact::Pipeline, operators::Operator, Scope, Stream}; 3 | 4 | pub trait Flatten { 5 | type Output; 6 | 7 | fn flatten(&self) -> Self::Output 8 | where 9 | D1: IntoIterator, 10 | { 11 | self.flatten_named("Flatten") 12 | } 13 | 14 | fn flatten_named(&self, name: &str) -> Self::Output 15 | where 16 | D1: IntoIterator; 17 | } 18 | 19 | impl Flatten for Stream 20 | where 21 | S: Scope, 22 | D1: Data, 23 | D2: Data, 24 | { 25 | type Output = Stream; 26 | 27 | fn flatten_named(&self, name: &str) -> Self::Output 28 | where 29 | D1: IntoIterator, 30 | { 31 | let mut buffer = Vec::new(); 32 | 33 | self.unary(Pipeline, name, move |_capability, _info| { 34 | move |input, output| { 35 | input.for_each(|capability, data| { 36 | data.swap(&mut buffer); 37 | 38 | output 39 | .session(&capability) 40 | .give_iterator(buffer.drain(..).flatten()); 41 | }); 42 | } 43 | }) 44 | } 45 | } 46 | 47 | impl Flatten for Collection 48 | where 49 | S: Scope, 50 | S::Timestamp: Clone, 51 | D1: Data, 52 | D2: Data, 53 | R: Semigroup + Clone, 54 | { 55 | type Output = Collection; 56 | 57 | fn flatten_named(&self, name: &str) -> Self::Output 58 | where 59 | D1: IntoIterator, 60 | { 61 | let mut buffer = Vec::new(); 62 | 63 | self.inner 64 | .unary(Pipeline, name, move |_capability, _info| { 65 | move |input, output| { 66 | input.for_each(|capability, data| { 67 | data.swap(&mut buffer); 68 | 69 | let mut session = output.session(&capability); 70 | for (data, time, diff) in buffer.drain(..) { 71 | session.give_iterator( 72 | data.into_iter() 73 | .map(|data| (data, time.clone(), diff.clone())), 74 | ); 75 | } 76 | }); 77 | } 78 | }) 79 | .as_collection() 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/dataflow/operators/inspect.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{ 2 | difference::{Abelian, Multiply, Semigroup}, 3 | lattice::Lattice, 4 | operators::{Consolidate, Reduce}, 5 | Collection, Data, ExchangeData, Hashable, 6 | }; 7 | use std::{fmt::Debug, panic::Location}; 8 | use timely::dataflow::{operators::Inspect, Scope, Stream}; 9 | 10 | pub trait InspectExt { 11 | type Value; 12 | 13 | fn debug_inspect(&self, inspect: F) -> Self 14 | where 15 | F: FnMut(&Self::Value) + 'static; 16 | 17 | #[track_caller] 18 | fn debug(&self) -> Self 19 | where 20 | Self: Sized, 21 | Self::Value: Debug, 22 | { 23 | let location = Location::caller(); 24 | self.debug_inspect(move |value| { 25 | eprintln!( 26 | "[{}:{}:{}] {:?}", 27 | location.file(), 28 | location.line(), 29 | location.column(), 30 | value, 31 | ); 32 | }) 33 | } 34 | } 35 | 36 | impl InspectExt for Collection 37 | where 38 | S: Scope, 39 | D: Data, 40 | R: Semigroup, 41 | { 42 | type Value = (D, S::Timestamp, R); 43 | 44 | fn debug_inspect(&self, inspect: F) -> Self 45 | where 46 | F: FnMut(&Self::Value) + 'static, 47 | { 48 | if cfg!(debug_assertions) { 49 | self.inspect(inspect) 50 | } else { 51 | self.clone() 52 | } 53 | } 54 | } 55 | 56 | impl InspectExt for Stream 57 | where 58 | S: Scope, 59 | D: Data, 60 | { 61 | type Value = D; 62 | 63 | fn debug_inspect(&self, inspect: F) -> Self 64 | where 65 | F: FnMut(&Self::Value) + 'static, 66 | { 67 | if cfg!(debug_assertions) { 68 | self.inspect(inspect) 69 | } else { 70 | self.clone() 71 | } 72 | } 73 | } 74 | 75 | pub trait AggregatedDebug { 76 | type Data; 77 | type Time; 78 | type Diff; 79 | 80 | fn inspect_aggregate(&self, inspect: F) -> Self 81 | where 82 | F: FnMut(&Self::Time, &[(Self::Data, Self::Diff)]) + 'static; 83 | 84 | #[track_caller] 85 | fn debug_aggregate(&self) -> Self 86 | where 87 | Self: Sized, 88 | Self::Data: Debug, 89 | Self::Time: Debug, 90 | Self::Diff: Debug, 91 | { 92 | let location = Location::caller(); 93 | self.inspect_aggregate(move |time, value| { 94 | eprintln!( 95 | "[{}:{}:{}] {:?} @ {:?}", 96 | location.file(), 97 | location.line(), 98 | location.column(), 99 | value, 100 | time, 101 | ); 102 | }) 103 | } 104 | } 105 | 106 | impl AggregatedDebug for Collection 107 | where 108 | S: Scope, 109 | S::Timestamp: Lattice, 110 | D: ExchangeData + Hashable, 111 | R: ExchangeData + Abelian + Multiply + From, 112 | ((), D): Hashable, 113 | { 114 | type Data = D; 115 | type Time = S::Timestamp; 116 | type Diff = R; 117 | 118 | fn inspect_aggregate(&self, mut inspect: F) -> Self 119 | where 120 | F: FnMut(&Self::Time, &[(Self::Data, Self::Diff)]) + 'static, 121 | { 122 | if cfg!(debug_assertions) { 123 | self.map(|data| ((), data)) 124 | .consolidate() 125 | .reduce(|&(), input, output| { 126 | let aggregate: Vec<_> = input 127 | .iter() 128 | .map(|(data, diff)| ((*data).clone(), diff.clone())) 129 | .collect(); 130 | 131 | output.push((aggregate, R::from(1))); 132 | }) 133 | .inspect(move |(((), aggregate), time, _)| inspect(time, aggregate)) 134 | .explode(|((), data)| data) 135 | } else { 136 | self.clone() 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/dataflow/operators/join.rs: -------------------------------------------------------------------------------- 1 | use differential_dataflow::{ 2 | difference::{Multiply, Semigroup}, 3 | lattice::Lattice, 4 | operators::{ 5 | arrange::{ArrangeByKey, ArrangeBySelf, Arranged}, 6 | JoinCore, 7 | }, 8 | trace::TraceReader, 9 | Collection, ExchangeData, Hashable, 10 | }; 11 | use timely::dataflow::Scope; 12 | 13 | pub trait SemijoinExt { 14 | fn semijoin(&self, other: &Other) -> Collection>::Output> 15 | where 16 | S: Scope, 17 | K: ExchangeData + Hashable, 18 | V: ExchangeData, 19 | R: ExchangeData + Semigroup, 20 | R2: ExchangeData + Semigroup, 21 | R: Multiply, 22 | >::Output: Semigroup; 23 | } 24 | 25 | impl SemijoinExt> 26 | for Arranged 27 | where 28 | S: Scope, 29 | S::Timestamp: Lattice, 30 | R2: Semigroup, 31 | Trace1: TraceReader