├── .dockerignore ├── .gitignore ├── assets └── architecture_diagram.png ├── scripts ├── client └── transact ├── Dockerfile.init ├── src ├── test │ ├── common_test.rs │ ├── slave_test.rs │ ├── node_test.rs │ ├── master_test.rs │ ├── storage_test.rs │ ├── coord_test.rs │ ├── tablet_test.rs │ ├── expression_test.rs │ └── query_converter_test.rs ├── bin │ ├── paxos2pc_sim │ │ ├── main.rs │ │ ├── tests.rs │ │ ├── message.rs │ │ ├── simple_rm_es.rs │ │ ├── simple_tm_es.rs │ │ ├── stm_simple_rm_es.rs │ │ ├── tests_paxos2pc.rs │ │ ├── tests_stmpaxos2pc.rs │ │ └── stm_simple_tm_es.rs │ ├── experimental │ │ └── main.rs │ ├── transact │ │ └── main.rs │ ├── paxos │ │ └── main.rs │ └── simtest │ │ └── main.rs ├── lib.rs ├── simulation_utils.rs ├── experimental.rs ├── lang.rs ├── test_utils.rs ├── finish_query_tm_es.rs ├── finish_query_rm_es.rs ├── shard_split_slave_rm_es.rs ├── shard_split_tablet_rm_es.rs ├── drop_table_rm_es.rs ├── ms_table_read_es.rs ├── alter_table_rm_es.rs ├── network_driver.rs ├── query_planning.rs ├── multiversion_map.rs ├── tm_status.rs ├── create_table_rm_es.rs ├── slave_group_create_es.rs ├── ms_table_delete_es.rs └── shard_snapshot_es.rs ├── Dockerfile ├── Cargo.toml ├── rustfmt.toml ├── LICENSE ├── notes.md └── run /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .idea 3 | target 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | .vscode 3 | .idea 4 | 5 | # OS 6 | .DS_Store 7 | 8 | # Build 9 | target 10 | -------------------------------------------------------------------------------- /assets/architecture_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasindumuth/rUniversalDB/HEAD/assets/architecture_diagram.png -------------------------------------------------------------------------------- /scripts/client: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create a 1ms egress delay in the network (to be closer to simulating a real network) 4 | tc qdisc add dev eth0 root netem delay 1ms 5 | 6 | # Run the binary 7 | target/debug/client "${@}" 8 | -------------------------------------------------------------------------------- /scripts/transact: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create a 1ms egress delay in the network (to be closer to simulating a real network) 4 | tc qdisc add dev eth0 root netem delay 1ms 5 | 6 | # Run the binary 7 | target/debug/transact "${@}" 8 | -------------------------------------------------------------------------------- /Dockerfile.init: -------------------------------------------------------------------------------- 1 | FROM rustlang/rust:nightly 2 | WORKDIR /home 3 | COPY ./ ./ 4 | 5 | # Install the `tc` command 6 | RUN apt-get update && apt-get install -y iproute2 7 | 8 | # Build 9 | RUN cargo build --bin transact; cargo build --bin client; 10 | -------------------------------------------------------------------------------- /src/test/common_test.rs: -------------------------------------------------------------------------------- 1 | use crate::common::Timestamp; 2 | 3 | #[test] 4 | fn timestamp_test() { 5 | assert_eq!(Timestamp::new(1, 2).add(Timestamp::new(1, 1)), Timestamp::new(2, 3)); 6 | assert_eq!(Timestamp::new(1, 2).add(Timestamp::new(1, u64::MAX)), Timestamp::new(3, 1)); 7 | } 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM runiversal:latest 2 | WORKDIR /home 3 | # If we delete files, make sure to run rm -rf to get these files 4 | # out from the previous version of runiversal we bring in. The COPY 5 | # command doesn't remove them for us. 6 | # RUN rm -rf ./* 7 | COPY ./ ./ 8 | RUN cargo build --bin transact; cargo build --bin client; 9 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(map_first_last)] 2 | 3 | use crate::tests::test; 4 | 5 | mod message; 6 | mod simple_rm_es; 7 | mod simple_tm_es; 8 | mod simulation; 9 | mod slave; 10 | mod stm_simple_rm_es; 11 | mod stm_simple_tm_es; 12 | mod tests; 13 | mod tests_paxos2pc; 14 | mod tests_stmpaxos2pc; 15 | 16 | #[macro_use] 17 | extern crate runiversal; 18 | 19 | fn main() { 20 | test() 21 | } 22 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/tests.rs: -------------------------------------------------------------------------------- 1 | use crate::tests_paxos2pc; 2 | use crate::tests_stmpaxos2pc; 3 | use rand::{RngCore, SeedableRng}; 4 | use rand_xorshift::XorShiftRng; 5 | use runiversal::test_utils::mk_seed; 6 | 7 | /// Run `test_single()` multiple times, each with a different seed. 8 | pub fn test() { 9 | let mut orig_rand = XorShiftRng::from_seed([0; 16]); 10 | for i in 0..2000 { 11 | let mut seed = mk_seed(&mut orig_rand); 12 | if i % 2 == 0 { 13 | tests_stmpaxos2pc::test_single(i, seed); 14 | } else { 15 | tests_paxos2pc::test_single(i, seed); 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/slave_test.rs: -------------------------------------------------------------------------------- 1 | use crate::slave::SlaveState; 2 | use crate::test_utils::CheckCtx; 3 | 4 | // ----------------------------------------------------------------------------------------------- 5 | // Consistency Testing 6 | // ----------------------------------------------------------------------------------------------- 7 | 8 | pub fn check_slave_clean(slave: &SlaveState, check_ctx: &mut CheckCtx) { 9 | let statuses = &slave.statuses; 10 | 11 | // Check `Statuses` clean 12 | check_ctx.check(statuses.create_table_ess.is_empty()); 13 | check_ctx.check(statuses.shard_split_ess.is_empty()); 14 | } 15 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "runiversal" 3 | version = "0.1.0" 4 | authors = ["Pasindu Muthukuda "] 5 | edition = "2018" 6 | default-run = "transact" 7 | 8 | [profile.release] 9 | debug-assertions = true 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | serde = { version = "1.0.117", features = ["derive"] } 15 | sqlparser = "0.9.0" 16 | sqlformat = "0.1.8" 17 | rmp-serde = "1.0.0" 18 | rand = "0.7.3" 19 | rand_xorshift = "0.2.0" 20 | byteorder = "1.3.4" 21 | clap = "3.0.13" 22 | rustyline = "9.1.0" 23 | crossterm = "0.23.2" 24 | tui = "0.18.0" 25 | tabled = "0.6.1" 26 | log = "0.4.17" 27 | env_logger = "0.9.0" 28 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 2 2 | 3 | # These are the default values for these settings when 4 | # `use_small_heuristics = "Default'`. I generally like them, but I'd 5 | # like to tinker with them a little bit. So I'm turning off 6 | # `use_small_heuristics` and setting them manually. 7 | # 8 | # fn_call_width = 60 9 | # attr_fn_like_width = 70 10 | # struct_lit_width = 18 11 | # struct_variant_width = 35 12 | # array_width = 60 13 | # chain_width = 60 14 | # single_line_if_else_max_width = 50 15 | # use_small_heuristics = "Off" 16 | 17 | # Actually.. this strategy doesn't work. The formatting we get for 18 | # using above values directly isn't the same as using 19 | # `use_small_heuristics = "Default"`. I believe there 20 | # is a bug in rustfmt. We can just use "Max" instead. 21 | use_small_heuristics = "Max" 22 | -------------------------------------------------------------------------------- /src/test/node_test.rs: -------------------------------------------------------------------------------- 1 | use crate::master::master_test::check_master_clean; 2 | use crate::node::{NodeState, State}; 3 | use crate::slave::slave_test::check_slave_clean; 4 | use crate::test_utils::CheckCtx; 5 | 6 | // ----------------------------------------------------------------------------------------------- 7 | // Consistency Testing 8 | // ----------------------------------------------------------------------------------------------- 9 | 10 | pub fn check_node_clean(node: &NodeState, check_ctx: &mut CheckCtx) { 11 | match &node.state { 12 | State::DNEState(_) => {} 13 | State::FreeNodeState(_, _) => {} 14 | State::NominalSlaveState(slave_state, _) => { 15 | check_slave_clean(&slave_state, check_ctx); 16 | } 17 | State::NominalMasterState(master_state, _) => { 18 | check_master_clean(&master_state, check_ctx); 19 | } 20 | State::PostExistence => {} 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/bin/experimental/main.rs: -------------------------------------------------------------------------------- 1 | use sqlparser::dialect::GenericDialect; 2 | use sqlparser::parser::Parser; 3 | use std::collections::BTreeMap; 4 | 5 | fn main() { 6 | let d = true; 7 | let mut f = 3; 8 | println!("{}", f); 9 | 10 | // let mut s = BTreeMap::new(); 11 | // s.insert("hello", 4); 12 | // println!("{:#?}", s); 13 | } 14 | 15 | fn sql() { 16 | // 17 | // let sql = "SELECT a, b, 123, myfunc(b) \ 18 | // FROM table_1 \ 19 | // WHERE a > b AND b < 100 \ 20 | // ORDER BY a DESC, b"; 21 | 22 | let sql = " 23 | INSERT INTO inventory (product_id, email, count) 24 | VALUES (2, 'my_email_2', 25); 25 | 26 | SELECT SUM(DISTINCT count) 27 | FROM inventory; 28 | "; 29 | 30 | let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ... 31 | 32 | let ast = Parser::parse_sql(&dialect, sql); 33 | 34 | println!("AST: {:#?}", ast); 35 | } 36 | -------------------------------------------------------------------------------- /src/test/master_test.rs: -------------------------------------------------------------------------------- 1 | use crate::master::MasterState; 2 | use crate::test_utils::CheckCtx; 3 | 4 | // ----------------------------------------------------------------------------------------------- 5 | // Consistency Testing 6 | // ----------------------------------------------------------------------------------------------- 7 | 8 | pub fn check_master_clean(master: &MasterState, check_ctx: &mut CheckCtx) { 9 | let statuses = &master.statuses; 10 | let ctx = &master.ctx; 11 | 12 | // Check `Status` clean 13 | check_ctx.check(statuses.create_table_tm_ess.is_empty()); 14 | check_ctx.check(statuses.alter_table_tm_ess.is_empty()); 15 | check_ctx.check(statuses.drop_table_tm_ess.is_empty()); 16 | check_ctx.check(statuses.shard_split_tm_ess.is_empty()); 17 | check_ctx.check(statuses.planning_ess.is_empty()); 18 | 19 | // Check `Master` clean 20 | check_ctx.check(ctx.external_request_id_map.is_empty()); 21 | } 22 | -------------------------------------------------------------------------------- /src/test/storage_test.rs: -------------------------------------------------------------------------------- 1 | use super::add_version; 2 | use crate::common::{mk_t, Timestamp}; 3 | use crate::common::{ColVal, ColValN}; 4 | 5 | #[test] 6 | fn add_version_test() { 7 | let mut versions = Vec::<(Timestamp, ColValN)>::new(); 8 | 9 | add_version(&mut versions, mk_t(10), None); 10 | assert_eq!(versions, vec![(mk_t(10), None)]); 11 | add_version(&mut versions, mk_t(20), None); 12 | assert_eq!(versions, vec![(mk_t(10), None), (mk_t(20), None)]); 13 | add_version(&mut versions, mk_t(5), None); 14 | assert_eq!(versions, vec![(mk_t(5), None), (mk_t(10), None), (mk_t(20), None)]); 15 | add_version(&mut versions, mk_t(15), None); 16 | assert_eq!(versions, vec![(mk_t(5), None), (mk_t(10), None), (mk_t(15), None), (mk_t(20), None)]); 17 | add_version(&mut versions, mk_t(10), Some(ColVal::Int(10))); 18 | assert_eq!( 19 | versions, 20 | vec![(mk_t(5), None), (mk_t(10), Some(ColVal::Int(10))), (mk_t(15), None), (mk_t(20), None)] 21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 pasindumuth 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(map_first_last)] 2 | 3 | extern crate core; 4 | 5 | #[macro_use] 6 | pub mod lang; 7 | pub mod alter_table_rm_es; 8 | pub mod alter_table_tm_es; 9 | pub mod col_usage; 10 | pub mod common; 11 | pub mod coord; 12 | pub mod create_table_rm_es; 13 | pub mod create_table_tm_es; 14 | pub mod drop_table_rm_es; 15 | pub mod drop_table_tm_es; 16 | pub mod experimental; 17 | pub mod expression; 18 | pub mod finish_query_rm_es; 19 | pub mod finish_query_tm_es; 20 | pub mod free_node_manager; 21 | pub mod gr_query_es; 22 | pub mod join_read_es; 23 | pub mod join_util; 24 | pub mod master; 25 | pub mod master_query_planning_es; 26 | pub mod message; 27 | pub mod ms_query_coord_es; 28 | pub mod ms_table_delete_es; 29 | pub mod ms_table_es; 30 | pub mod ms_table_insert_es; 31 | pub mod ms_table_read_es; 32 | pub mod ms_table_write_es; 33 | pub mod multiversion_map; 34 | pub mod net; 35 | pub mod network_driver; 36 | pub mod node; 37 | pub mod paxos; 38 | pub mod paxos2pc_rm; 39 | pub mod paxos2pc_tm; 40 | pub mod query_converter; 41 | pub mod query_planning; 42 | pub mod server; 43 | pub mod shard_pending_es; 44 | pub mod shard_snapshot_es; 45 | pub mod shard_split_slave_rm_es; 46 | pub mod shard_split_tablet_rm_es; 47 | pub mod shard_split_tm_es; 48 | pub mod simulation_utils; 49 | pub mod slave; 50 | pub mod slave_group_create_es; 51 | pub mod slave_reconfig_es; 52 | pub mod sql_ast; 53 | pub mod sql_parser; 54 | pub mod stmpaxos2pc_rm; 55 | pub mod stmpaxos2pc_tm; 56 | pub mod storage; 57 | pub mod table_read_es; 58 | pub mod tablet; 59 | pub mod test_utils; 60 | pub mod tm_status; 61 | pub mod trans_table_read_es; 62 | -------------------------------------------------------------------------------- /src/simulation_utils.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{EndpointId, InternalMode}; 2 | use std::collections::{BTreeMap, VecDeque}; 3 | 4 | // ----------------------------------------------------------------------------------------------- 5 | // Utils 6 | // ----------------------------------------------------------------------------------------------- 7 | // Construct the PaxosNode EndpointIds of the paxos at the given index. 8 | pub fn mk_paxos_eid(i: u32) -> EndpointId { 9 | EndpointId::new(format!("pe{}", i), InternalMode::Internal) 10 | } 11 | 12 | // Construct the Slave EndpointId of the Slave at the given index. 13 | pub fn mk_slave_eid(i: u32) -> EndpointId { 14 | EndpointId::new(format!("se{}", i), InternalMode::Internal) 15 | } 16 | 17 | // Construct the EndpointId of a Node. 18 | pub fn mk_node_eid(i: u32) -> EndpointId { 19 | EndpointId::new(format!("ne{}", i), InternalMode::Internal) 20 | } 21 | 22 | // Construct the Client id of the slave at the given index. 23 | pub fn mk_client_eid(i: u32) -> EndpointId { 24 | EndpointId::new(format!("ce{}", i), InternalMode::External { salt: "".to_string() }) 25 | } 26 | 27 | /// Add a message between two nodes in the network. 28 | pub fn add_msg( 29 | queues: &mut BTreeMap>>, 30 | nonempty_queues: &mut Vec<(EndpointId, EndpointId)>, 31 | msg: NetworkMessageT, 32 | from_eid: &EndpointId, 33 | to_eid: &EndpointId, 34 | ) { 35 | let queue = queues.get_mut(from_eid).unwrap().get_mut(to_eid).unwrap(); 36 | if queue.len() == 0 { 37 | let queue_id = (from_eid.clone(), to_eid.clone()); 38 | nonempty_queues.push(queue_id); 39 | } 40 | queue.push_back(msg); 41 | } 42 | -------------------------------------------------------------------------------- /src/experimental.rs: -------------------------------------------------------------------------------- 1 | use crate::sql_ast::proc; 2 | use std::mem; 3 | use std::ops::Deref; 4 | 5 | // In this file, we store experimental implementations for things. 6 | 7 | // Implementations 8 | 9 | struct SubqueryIter<'a> { 10 | expr: &'a proc::ValExpr, 11 | parent: Option>>, 12 | } 13 | 14 | // impl proc::ValExpr { 15 | // fn subquery_iter(&self) -> SubqueryIter<'_> { 16 | // SubqueryIter { expr: self, parent: None } 17 | // } 18 | // } 19 | 20 | impl<'a> Iterator for SubqueryIter<'a> { 21 | type Item = &'a proc::GRQuery; 22 | // The property here is that we should return the GRQuerys in `expr` in this 23 | // node, then the parent node, then its' parent, and so-on. 24 | fn next(&mut self) -> Option { 25 | match self.expr { 26 | proc::ValExpr::ColumnRef(_) => { 27 | if let Some(parent) = self.parent.take() { 28 | *self = *parent; 29 | self.next() 30 | } else { 31 | None 32 | } 33 | } 34 | proc::ValExpr::UnaryExpr { expr, .. } => { 35 | self.expr = expr.deref(); 36 | self.next() 37 | } 38 | proc::ValExpr::BinaryExpr { left, right, .. } => { 39 | self.parent = 40 | Some(Box::new(SubqueryIter { expr: right, parent: mem::take(&mut self.parent) })); 41 | self.expr = left; 42 | self.next() 43 | } 44 | proc::ValExpr::Value { .. } => { 45 | if let Some(parent) = self.parent.take() { 46 | *self = *parent; 47 | self.next() 48 | } else { 49 | None 50 | } 51 | } 52 | proc::ValExpr::Subquery { query } => { 53 | if let Some(parent) = self.parent.take() { 54 | *self = *parent; 55 | Some(query) 56 | } else { 57 | None 58 | } 59 | } 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/lang.rs: -------------------------------------------------------------------------------- 1 | macro_rules! collection { 2 | // map-like 3 | ($($k:expr => $v:expr),* $(,)?) => { 4 | std::iter::Iterator::collect(std::array::IntoIter::new([$(($k, $v),)*])) 5 | }; 6 | // set-like 7 | ($($v:expr),* $(,)?) => { 8 | std::iter::Iterator::collect(std::array::IntoIter::new([$($v,)*])) 9 | }; 10 | } 11 | 12 | /// The first argument is a single-element Tuple Struct Variant 13 | /// whose inside's we want to extract to. The second argument is the 14 | /// value we to extract from. If the value is a reference, the return 15 | /// value here is a reference. Otherwise, the value is moved. 16 | #[macro_export] 17 | macro_rules! cast { 18 | ($enum:path, $expr:expr) => {{ 19 | if let $enum(item) = $expr { 20 | Some(item) 21 | } else { 22 | debug_assert!(false); 23 | None 24 | } 25 | }}; 26 | } 27 | 28 | /// Same as the above, but the expected branch might be the `None` 29 | /// branch, so we do not debug assert. 30 | #[macro_export] 31 | macro_rules! cast_safe { 32 | ($enum:path, $expr:expr) => {{ 33 | if let $enum(item) = $expr { 34 | Some(item) 35 | } else { 36 | None 37 | } 38 | }}; 39 | } 40 | 41 | /// A macro that makes it easy to check that an expression is true, 42 | /// and then exit the current function if it is false (in production, 43 | /// but assert in development). 44 | #[macro_export] 45 | macro_rules! check { 46 | ($expr:expr) => {{ 47 | if $expr { 48 | Some(()) 49 | } else { 50 | debug_assert!(false); 51 | None 52 | }? // We place the `?` here, since it is easy to forget 53 | // when using this macro (since it does not return anything). 54 | }}; 55 | } 56 | 57 | #[cfg(test)] 58 | mod tests { 59 | enum Enum { 60 | V1(i32), 61 | V2(String), 62 | } 63 | 64 | #[test] 65 | fn cast_test() { 66 | let e = Enum::V2("value".to_string()); 67 | let inner_incorrect = cast!(Enum::V1, &e); 68 | assert!(inner_incorrect.is_err()); 69 | let inner_correct = cast!(Enum::V2, &e); 70 | assert_eq!(inner_correct, Ok(&"value".to_string())); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/test_utils.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{ 2 | ColName, ColVal, EndpointId, SlaveGroupId, TablePath, TabletGroupId, TransTableName, 3 | }; 4 | use rand::RngCore; 5 | use rand_xorshift::XorShiftRng; 6 | 7 | pub fn cn(s: &str) -> ColName { 8 | ColName(s.to_string()) 9 | } 10 | 11 | pub fn cno(s: &str) -> Option { 12 | Some(ColName(s.to_string())) 13 | } 14 | 15 | pub fn cvs(s: &str) -> ColVal { 16 | ColVal::String(s.to_string()) 17 | } 18 | 19 | pub fn cvi(i: i32) -> ColVal { 20 | ColVal::Int(i) 21 | } 22 | 23 | pub fn cvb(b: bool) -> ColVal { 24 | ColVal::Bool(b) 25 | } 26 | 27 | pub fn mk_sid(id: &str) -> SlaveGroupId { 28 | SlaveGroupId(id.to_string()) 29 | } 30 | 31 | pub fn mk_tid(id: &str) -> TabletGroupId { 32 | TabletGroupId(id.to_string()) 33 | } 34 | 35 | pub fn mk_tab(table_path: &str) -> TablePath { 36 | TablePath(table_path.to_string()) 37 | } 38 | 39 | pub fn mk_ttab(table_path: &str) -> TransTableName { 40 | TransTableName(table_path.to_string()) 41 | } 42 | 43 | // ----------------------------------------------------------------------------------------------- 44 | // Random 45 | // ----------------------------------------------------------------------------------------------- 46 | 47 | pub fn mk_seed(rand: &mut XorShiftRng) -> [u8; 16] { 48 | let mut seed = [0; 16]; 49 | rand.fill_bytes(&mut seed); 50 | seed 51 | } 52 | 53 | // ----------------------------------------------------------------------------------------------- 54 | // Check Context 55 | // ----------------------------------------------------------------------------------------------- 56 | 57 | /// This is a utility for effectively accumulating the AND result of many boolean expressions. 58 | /// If `check` is called even once with `false` after construction, we remember this fact 59 | /// in `cum_bool`. We do not simply use a `&mut bool` because sometimes, we want to panic 60 | /// if the AND expression would evaluate to false (and we want to do it early). 61 | pub struct CheckCtx { 62 | pub should_assert: bool, 63 | cum_bool: bool, 64 | } 65 | 66 | impl CheckCtx { 67 | pub fn new(should_assert: bool) -> CheckCtx { 68 | CheckCtx { should_assert, cum_bool: true } 69 | } 70 | 71 | pub fn check(&mut self, boolean: bool) { 72 | if !boolean { 73 | if self.should_assert { 74 | panic!(); 75 | } else { 76 | self.cum_bool = false; 77 | } 78 | } 79 | } 80 | 81 | pub fn get_result(&self) -> bool { 82 | self.cum_bool 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/message.rs: -------------------------------------------------------------------------------- 1 | use crate::simple_tm_es::SimplePayloadTypes; 2 | use crate::stm_simple_tm_es::STMSimpleTMPayloadTypes; 3 | use runiversal::common::{QueryId, SlaveGroupId}; 4 | use runiversal::message as msg; 5 | use runiversal::paxos2pc_tm as paxos2pc; 6 | use runiversal::stmpaxos2pc_tm as stmpaxos2pc; 7 | use serde::{Deserialize, Serialize}; 8 | 9 | // ------------------------------------------------------------------------------------------------- 10 | // NetworkMessage 11 | // ------------------------------------------------------------------------------------------------- 12 | 13 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 14 | pub enum NetworkMessage { 15 | Slave(SlaveMessage), 16 | } 17 | 18 | // ------------------------------------------------------------------------------------------------- 19 | // SlaveMessage 20 | // ------------------------------------------------------------------------------------------------- 21 | 22 | pub type RemoteMessage = msg::RemoteMessage; 23 | pub type RemoteLeaderChangedGossip = msg::RemoteLeaderChangedGossip; 24 | 25 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 26 | pub enum SlaveMessage { 27 | ExternalMessage(ExternalMessage), 28 | RemoteMessage(msg::RemoteMessage), 29 | RemoteLeaderChangedGossip(msg::RemoteLeaderChangedGossip), 30 | } 31 | 32 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 33 | pub enum SlaveRemotePayload { 34 | // Simple STMPaxos2PC 35 | STMRMMessage(stmpaxos2pc::RMMessage), 36 | STMTMMessage(stmpaxos2pc::TMMessage), 37 | 38 | // Simple Paxos2PC 39 | RMMessage(paxos2pc::RMMessage), 40 | TMMessage(paxos2pc::TMMessage), 41 | } 42 | 43 | // ------------------------------------------------------------------------------------------------- 44 | // ExternalMessage 45 | // ------------------------------------------------------------------------------------------------- 46 | 47 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 48 | pub enum ExternalMessage { 49 | STMSimpleRequest(STMSimpleRequest), 50 | SimpleRequest(SimpleRequest), 51 | } 52 | 53 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 54 | pub struct STMSimpleRequest { 55 | pub query_id: QueryId, 56 | pub rms: Vec, 57 | } 58 | 59 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 60 | pub struct SimpleRequest { 61 | pub query_id: QueryId, 62 | pub rms: Vec, 63 | } 64 | 65 | // ------------------------------------------------------------------------------------------------- 66 | // Paxos 67 | // ------------------------------------------------------------------------------------------------- 68 | 69 | pub type LeaderChanged = msg::LeaderChanged; 70 | pub type PLEntry = msg::PLEntry; 71 | -------------------------------------------------------------------------------- /src/test/coord_test.rs: -------------------------------------------------------------------------------- 1 | use crate::coord::CoordState; 2 | use crate::test_utils::CheckCtx; 3 | 4 | // ----------------------------------------------------------------------------------------------- 5 | // Consistency Testing 6 | // ----------------------------------------------------------------------------------------------- 7 | 8 | /// Asserts various consistency properties in the `CoordState`. 9 | pub fn assert_coord_consistency(coord: &CoordState) { 10 | external_request_id_map_consistency(coord); 11 | } 12 | 13 | // Verify that every `MSCoordES` and every `FinishQueryTMES`. 14 | fn external_request_id_map_consistency(coord: &CoordState) { 15 | let statuses = &coord.statuses; 16 | let ctx = &coord.ctx; 17 | 18 | if ctx.is_leader() { 19 | // If this is a Leader, we make sure all RequestIds in the ESs exist in 20 | // the `external_request_id_map`. 21 | for (qid, es) in &statuses.ms_coord_ess { 22 | if let Some(stored_qid) = ctx.external_request_id_map.get(&es.request_id) { 23 | assert_eq!(stored_qid, qid); 24 | } else { 25 | panic!(); 26 | } 27 | } 28 | 29 | for (qid, es) in &statuses.finish_query_tm_ess { 30 | if let Some(response_data) = &es.inner.response_data { 31 | if let Some(stored_qid) = ctx.external_request_id_map.get(&response_data.request_id) { 32 | assert_eq!(stored_qid, qid); 33 | } else { 34 | panic!(); 35 | } 36 | } 37 | } 38 | 39 | // Next, we see if all entries in `external_request_id_map` are in an ES. 40 | for (rid, qid) in &ctx.external_request_id_map { 41 | if let Some(es) = &statuses.ms_coord_ess.get(qid) { 42 | assert_eq!(&es.request_id, rid); 43 | assert!(!statuses.finish_query_tm_ess.contains_key(qid)); 44 | } else if let Some(es) = &statuses.finish_query_tm_ess.get(qid) { 45 | if let Some(response_data) = &es.inner.response_data { 46 | assert_eq!(&response_data.request_id, rid); 47 | } else { 48 | panic!(); 49 | } 50 | } else { 51 | panic!(); 52 | } 53 | } 54 | } else { 55 | // If this is a Follower, we make sure it has `external_request_id_map` be empty. 56 | assert!(ctx.external_request_id_map.is_empty()); 57 | assert!(statuses.ms_coord_ess.is_empty()); 58 | } 59 | } 60 | 61 | pub fn check_coord_clean(coord: &CoordState, check_ctx: &mut CheckCtx) { 62 | let statuses = &coord.statuses; 63 | let ctx = &coord.ctx; 64 | 65 | // Check `Status` clean 66 | check_ctx.check(statuses.finish_query_tm_ess.is_empty()); 67 | check_ctx.check(statuses.ms_coord_ess.is_empty()); 68 | check_ctx.check(statuses.gr_query_ess.is_empty()); 69 | check_ctx.check(statuses.join_query_ess.is_empty()); 70 | check_ctx.check(statuses.trans_table_read_ess.is_empty()); 71 | check_ctx.check(statuses.tm_statuss.is_empty()); 72 | 73 | // Check `Coord` clean 74 | check_ctx.check(ctx.external_request_id_map.is_empty()); 75 | } 76 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/simple_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::message as msg; 2 | use crate::simple_tm_es::{ 3 | SimplePayloadTypes, SimplePrepare, SimpleRMAborted, SimpleRMCommitted, SimpleRMPrepared, 4 | }; 5 | use crate::slave::SlaveContext; 6 | use rand::RngCore; 7 | use runiversal::common::BasicIOCtx; 8 | use runiversal::common::QueryId; 9 | use runiversal::paxos2pc_rm::{Paxos2PCRMInner, Paxos2PCRMOuter}; 10 | use runiversal::paxos2pc_tm::{PayloadTypes, RMCommittedPLm}; 11 | 12 | // ----------------------------------------------------------------------------------------------- 13 | // SimpleES Implementation 14 | // ----------------------------------------------------------------------------------------------- 15 | 16 | #[derive(Debug)] 17 | pub struct SimpleRMInner {} 18 | 19 | pub type SimpleRMES = Paxos2PCRMOuter; 20 | 21 | impl Paxos2PCRMInner for SimpleRMInner { 22 | fn new>( 23 | _: &mut SlaveContext, 24 | io_ctx: &mut IO, 25 | _: SimplePrepare, 26 | _: &mut (), 27 | ) -> Option { 28 | // Here, we randomly decide whether to accept continue or Abort. // We abort with 5% chance. 29 | if io_ctx.rand().next_u32() % 100 < 5 { 30 | None 31 | } else { 32 | Some(SimpleRMInner {}) 33 | } 34 | } 35 | 36 | fn new_follower>( 37 | _: &mut SlaveContext, 38 | _: &mut IO, 39 | _: SimpleRMPrepared, 40 | ) -> SimpleRMInner { 41 | SimpleRMInner {} 42 | } 43 | 44 | fn early_aborted>( 45 | &mut self, 46 | _: &mut ::RMContext, 47 | _: &mut IO, 48 | ) { 49 | } 50 | 51 | fn mk_prepared_plm>( 52 | &mut self, 53 | _: &mut SlaveContext, 54 | _: &mut IO, 55 | ) -> SimpleRMPrepared { 56 | SimpleRMPrepared {} 57 | } 58 | 59 | fn prepared_plm_inserted>( 60 | &mut self, 61 | _: &mut SlaveContext, 62 | _: &mut IO, 63 | ) { 64 | } 65 | 66 | fn mk_committed_plm>( 67 | &mut self, 68 | _: &mut SlaveContext, 69 | _: &mut IO, 70 | ) -> SimpleRMCommitted { 71 | SimpleRMCommitted {} 72 | } 73 | 74 | fn committed_plm_inserted>( 75 | &mut self, 76 | _: &mut SlaveContext, 77 | _: &mut IO, 78 | _: &QueryId, 79 | ) { 80 | } 81 | 82 | fn mk_aborted_plm>( 83 | &mut self, 84 | _: &mut SlaveContext, 85 | _: &mut IO, 86 | ) -> SimpleRMAborted { 87 | SimpleRMAborted {} 88 | } 89 | 90 | fn aborted_plm_inserted>( 91 | &mut self, 92 | _: &mut SlaveContext, 93 | _: &mut IO, 94 | ) { 95 | } 96 | 97 | fn reconfig_snapshot(&self) -> Self { 98 | unimplemented!() 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/simple_tm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::message as msg; 2 | use crate::slave::{SlaveContext, SlavePLm}; 3 | use runiversal::common::BasicIOCtx; 4 | use runiversal::common::{EndpointId, RequestId, SlaveGroupId}; 5 | use runiversal::paxos2pc_tm::{ 6 | Paxos2PCTMInner, Paxos2PCTMOuter, PayloadTypes, RMMessage, RMPLm, TMMessage, 7 | }; 8 | use serde::{Deserialize, Serialize}; 9 | use std::collections::BTreeMap; 10 | 11 | // ----------------------------------------------------------------------------------------------- 12 | // Payloads 13 | // ----------------------------------------------------------------------------------------------- 14 | 15 | // RM PLm 16 | 17 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 18 | pub struct SimpleRMPrepared {} 19 | 20 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 21 | pub struct SimpleRMCommitted {} 22 | 23 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 24 | pub struct SimpleRMAborted {} 25 | 26 | // TM-to-RM 27 | 28 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 29 | pub struct SimplePrepare {} 30 | 31 | // SimplePayloadTypes 32 | 33 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 34 | pub struct SimplePayloadTypes {} 35 | 36 | impl PayloadTypes for SimplePayloadTypes { 37 | // Master 38 | type RMPLm = SlavePLm; 39 | type RMPath = SlaveGroupId; 40 | type TMPath = SlaveGroupId; 41 | type RMMessage = msg::SlaveRemotePayload; 42 | type TMMessage = msg::SlaveRemotePayload; 43 | type NetworkMessageT = msg::NetworkMessage; 44 | type RMContext = SlaveContext; 45 | type RMExtraData = (); 46 | type TMContext = SlaveContext; 47 | 48 | // RM PLm 49 | type RMPreparedPLm = SimpleRMPrepared; 50 | type RMCommittedPLm = SimpleRMCommitted; 51 | type RMAbortedPLm = SimpleRMAborted; 52 | 53 | fn rm_plm(plm: RMPLm) -> Self::RMPLm { 54 | SlavePLm::SimpleRM(plm) 55 | } 56 | 57 | type Prepare = SimplePrepare; 58 | 59 | fn rm_msg(msg: RMMessage) -> Self::RMMessage { 60 | msg::SlaveRemotePayload::RMMessage(msg) 61 | } 62 | 63 | fn tm_msg(msg: TMMessage) -> Self::TMMessage { 64 | msg::SlaveRemotePayload::TMMessage(msg) 65 | } 66 | } 67 | 68 | // ----------------------------------------------------------------------------------------------- 69 | // Simple Implementation 70 | // ----------------------------------------------------------------------------------------------- 71 | 72 | pub type SimpleTMES = Paxos2PCTMOuter; 73 | 74 | #[derive(Debug)] 75 | pub struct SimpleTMInner {} 76 | 77 | impl Paxos2PCTMInner for SimpleTMInner { 78 | fn new_rec>( 79 | _: &mut SlaveContext, 80 | _: &mut IO, 81 | ) -> SimpleTMInner { 82 | SimpleTMInner {} 83 | } 84 | 85 | fn committed>(&mut self, _: &mut SlaveContext, _: &mut IO) {} 86 | 87 | fn aborted>(&mut self, _: &mut SlaveContext, _: &mut IO) {} 88 | } 89 | -------------------------------------------------------------------------------- /src/finish_query_tm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{BasicIOCtx, ColName, QueryResult, ShardingGen, Timestamp}; 2 | use crate::common::{CNodePath, EndpointId, QueryId, RequestId, TNodePath, TQueryPath, TableView}; 3 | use crate::coord::CoordContext; 4 | use crate::message as msg; 5 | use crate::paxos2pc_tm::{ 6 | Paxos2PCTMInner, Paxos2PCTMOuter, PayloadTypes, RMMessage, RMPLm, TMMessage, 7 | }; 8 | use crate::sql_ast::{iast, proc}; 9 | use crate::storage::GenericTable; 10 | use crate::tablet::{MSQueryES, ReadWriteRegion, TabletContext, TabletPLm}; 11 | use serde::{Deserialize, Serialize}; 12 | use std::collections::BTreeMap; 13 | 14 | // ----------------------------------------------------------------------------------------------- 15 | // Payloads 16 | // ----------------------------------------------------------------------------------------------- 17 | 18 | // RM PLm 19 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 20 | pub struct FinishQueryRMPrepared { 21 | pub sharding_gen: ShardingGen, 22 | pub region_lock: ReadWriteRegion, 23 | pub timestamp: Timestamp, 24 | pub update_view: GenericTable, 25 | } 26 | 27 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 28 | pub struct FinishQueryRMCommitted {} 29 | 30 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 31 | pub struct FinishQueryRMAborted {} 32 | 33 | // TM-to-RM Messages 34 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 35 | pub struct FinishQueryPrepare { 36 | /// Contains the QueryId of the MSQueryES that this `Prepare` has to take over 37 | pub query_id: QueryId, 38 | } 39 | 40 | // FinishQueryPayloadTypes 41 | 42 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 43 | pub struct FinishQueryPayloadTypes {} 44 | 45 | impl PayloadTypes for FinishQueryPayloadTypes { 46 | // Master 47 | type RMPLm = TabletPLm; 48 | type RMPath = TNodePath; 49 | type TMPath = CNodePath; 50 | type RMMessage = msg::TabletMessage; 51 | type TMMessage = msg::CoordMessage; 52 | type NetworkMessageT = msg::NetworkMessage; 53 | type RMContext = TabletContext; 54 | type RMExtraData = BTreeMap; 55 | type TMContext = CoordContext; 56 | 57 | // RM PLm 58 | type RMPreparedPLm = FinishQueryRMPrepared; 59 | type RMCommittedPLm = FinishQueryRMCommitted; 60 | type RMAbortedPLm = FinishQueryRMAborted; 61 | 62 | fn rm_plm(plm: RMPLm) -> Self::RMPLm { 63 | TabletPLm::FinishQuery(plm) 64 | } 65 | 66 | type Prepare = FinishQueryPrepare; 67 | 68 | fn rm_msg(msg: RMMessage) -> Self::RMMessage { 69 | msg::TabletMessage::FinishQuery(msg) 70 | } 71 | 72 | fn tm_msg(msg: TMMessage) -> Self::TMMessage { 73 | msg::CoordMessage::FinishQuery(msg) 74 | } 75 | } 76 | 77 | // ----------------------------------------------------------------------------------------------- 78 | // FinishQueryTMES 79 | // ----------------------------------------------------------------------------------------------- 80 | 81 | pub type FinishQueryTMES = Paxos2PCTMOuter; 82 | 83 | #[derive(Debug)] 84 | pub struct ResponseData { 85 | // Request values (values send in the original request) 86 | pub request_id: RequestId, 87 | pub sender_eid: EndpointId, 88 | /// We hold onto the original `Query` in case of an Abort so that we can restart. 89 | pub sql_query: iast::Query, 90 | 91 | // Result values (values computed by the MSCoordES) 92 | pub result: QueryResult, 93 | pub timestamp: Timestamp, 94 | } 95 | 96 | #[derive(Debug)] 97 | pub struct FinishQueryTMInner { 98 | pub response_data: Option, 99 | pub committed: bool, 100 | } 101 | 102 | // ----------------------------------------------------------------------------------------------- 103 | // Implementation 104 | // ----------------------------------------------------------------------------------------------- 105 | 106 | impl Paxos2PCTMInner for FinishQueryTMInner { 107 | fn new_rec(_: &mut CoordContext, _: &mut IO) -> FinishQueryTMInner { 108 | FinishQueryTMInner { response_data: None, committed: false } 109 | } 110 | 111 | fn committed(&mut self, _: &mut CoordContext, _: &mut IO) { 112 | self.committed = true; 113 | } 114 | 115 | fn aborted(&mut self, _: &mut CoordContext, _: &mut IO) { 116 | self.committed = false; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/stm_simple_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::message as msg; 2 | use crate::slave::{SlaveContext, SlavePLm}; 3 | use crate::stm_simple_tm_es::{ 4 | STMSimpleClosed, STMSimpleCommit, STMSimplePrepare, STMSimplePrepared, STMSimpleTMPayloadTypes, 5 | }; 6 | use runiversal::common::BasicIOCtx; 7 | use runiversal::common::SlaveGroupId; 8 | use runiversal::stmpaxos2pc_rm::{ 9 | RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner, 10 | STMPaxos2PCRMOuter, 11 | }; 12 | use runiversal::stmpaxos2pc_tm::TMMessage; 13 | use serde::{Deserialize, Serialize}; 14 | 15 | // ----------------------------------------------------------------------------------------------- 16 | // Payloads 17 | // ----------------------------------------------------------------------------------------------- 18 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 19 | pub struct STMSimpleRMPayloadTypes {} 20 | 21 | impl RMPayloadTypes for STMSimpleRMPayloadTypes { 22 | type TM = STMSimpleTMPayloadTypes; 23 | type RMContext = SlaveContext; 24 | 25 | // Actions 26 | type RMCommitActionData = (); 27 | 28 | // RM PLm 29 | type RMPreparedPLm = STMSimpleRMPrepared; 30 | type RMCommittedPLm = STMSimpleRMCommitted; 31 | type RMAbortedPLm = STMSimpleRMAborted; 32 | } 33 | 34 | // RM PLm 35 | 36 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 37 | pub struct STMSimpleRMPrepared {} 38 | 39 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 40 | pub struct STMSimpleRMCommitted {} 41 | 42 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 43 | pub struct STMSimpleRMAborted {} 44 | 45 | // ----------------------------------------------------------------------------------------------- 46 | // RMServerContext 47 | // ----------------------------------------------------------------------------------------------- 48 | 49 | impl RMServerContext for SlaveContext { 50 | fn push_plm(&mut self, plm: RMPLm) { 51 | self.slave_bundle.plms.push(SlavePLm::SimpleSTMRM(plm)); 52 | } 53 | 54 | fn send_to_tm>( 55 | &mut self, 56 | io_ctx: &mut IO, 57 | tm: &SlaveGroupId, 58 | msg: TMMessage, 59 | ) { 60 | self.send(io_ctx, tm, msg::SlaveRemotePayload::STMTMMessage(msg)); 61 | } 62 | 63 | fn mk_node_path(&self) -> SlaveGroupId { 64 | self.this_sid.clone() 65 | } 66 | 67 | fn is_leader(&self) -> bool { 68 | SlaveContext::is_leader(self) 69 | } 70 | } 71 | 72 | // ----------------------------------------------------------------------------------------------- 73 | // SimpleES Implementation 74 | // ----------------------------------------------------------------------------------------------- 75 | 76 | #[derive(Debug)] 77 | pub struct STMSimpleRMInner {} 78 | 79 | pub type STMSimpleRMES = STMPaxos2PCRMOuter; 80 | pub type STMSimpleRMAction = STMPaxos2PCRMAction; 81 | 82 | impl STMPaxos2PCRMInner for STMSimpleRMInner { 83 | fn new>( 84 | _: &mut SlaveContext, 85 | _: &mut IO, 86 | _: STMSimplePrepare, 87 | ) -> STMSimpleRMInner { 88 | STMSimpleRMInner {} 89 | } 90 | 91 | fn new_follower>( 92 | _: &mut SlaveContext, 93 | _: &mut IO, 94 | _: STMSimpleRMPrepared, 95 | ) -> STMSimpleRMInner { 96 | STMSimpleRMInner {} 97 | } 98 | 99 | fn mk_closed() -> STMSimpleClosed { 100 | STMSimpleClosed {} 101 | } 102 | 103 | fn mk_prepared_plm>( 104 | &mut self, 105 | _: &mut SlaveContext, 106 | _: &mut IO, 107 | ) -> Option { 108 | Some(STMSimpleRMPrepared {}) 109 | } 110 | 111 | fn prepared_plm_inserted>( 112 | &mut self, 113 | _: &mut SlaveContext, 114 | _: &mut IO, 115 | ) -> STMSimplePrepared { 116 | STMSimplePrepared {} 117 | } 118 | 119 | fn mk_committed_plm>( 120 | &mut self, 121 | _: &mut SlaveContext, 122 | _: &mut IO, 123 | _: &STMSimpleCommit, 124 | ) -> STMSimpleRMCommitted { 125 | STMSimpleRMCommitted {} 126 | } 127 | 128 | fn committed_plm_inserted>( 129 | &mut self, 130 | _: &mut SlaveContext, 131 | _: &mut IO, 132 | _: &RMCommittedPLm, 133 | ) { 134 | } 135 | 136 | fn mk_aborted_plm>( 137 | &mut self, 138 | _: &mut SlaveContext, 139 | _: &mut IO, 140 | ) -> STMSimpleRMAborted { 141 | STMSimpleRMAborted {} 142 | } 143 | 144 | fn aborted_plm_inserted>( 145 | &mut self, 146 | _: &mut SlaveContext, 147 | _: &mut IO, 148 | ) { 149 | } 150 | 151 | fn reconfig_snapshot(&self) -> Self { 152 | unimplemented!() 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/test/tablet_test.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{PrimaryKey, QueryId, TabletKeyRange}; 2 | use crate::finish_query_rm_es::FinishQueryRMES; 3 | use crate::tablet::{ShardingState, TabletState, DDLES}; 4 | use crate::test_utils::{cvb, cvi, cvs, CheckCtx}; 5 | use std::collections::BTreeMap; 6 | 7 | #[test] 8 | fn test_key_comparison() { 9 | assert_eq!( 10 | PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] }, 11 | PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] } 12 | ); 13 | 14 | assert!( 15 | PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] } 16 | < PrimaryKey { cols: vec![cvi(3), cvs("a"), cvb(false)] } 17 | ); 18 | 19 | assert!( 20 | PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] } 21 | < PrimaryKey { cols: vec![cvi(2), cvs("b"), cvb(false)] } 22 | ); 23 | 24 | assert!( 25 | PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(false)] } 26 | < PrimaryKey { cols: vec![cvi(2), cvs("a"), cvb(true)] } 27 | ); 28 | } 29 | 30 | // ----------------------------------------------------------------------------------------------- 31 | // Consistency Testing 32 | // ----------------------------------------------------------------------------------------------- 33 | 34 | /// Asserts various consistency properties in the `TabletState`. 35 | pub fn assert_tablet_consistency(tablet: &TabletState) { 36 | let statuses = &tablet.statuses; 37 | 38 | // Verify for every MSQueryES, every ES in `pending_queries` exist. 39 | for (query_id, ms_query_es) in &statuses.ms_query_ess { 40 | for child_qid in &ms_query_es.pending_queries { 41 | if let Some(es) = statuses.top.ms_table_read_ess.get(child_qid) { 42 | assert_eq!(&es.general.ms_query_id, query_id); 43 | } else if let Some(es) = statuses.top.ms_table_write_ess.get(child_qid) { 44 | assert_eq!(&es.general.ms_query_id, query_id); 45 | } else if let Some(es) = statuses.top.ms_table_insert_ess.get(child_qid) { 46 | assert_eq!(&es.general.ms_query_id, query_id); 47 | } else if let Some(es) = statuses.top.ms_table_delete_ess.get(child_qid) { 48 | assert_eq!(&es.general.ms_query_id, query_id); 49 | } else { 50 | panic!(); 51 | } 52 | } 53 | } 54 | 55 | // Verify that for every MSTable*ES, a valid MSQueryES exists. 56 | for (query_id, es) in &statuses.top.ms_table_read_ess { 57 | if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) { 58 | assert!(ms_query_es.pending_queries.contains(query_id)); 59 | } else { 60 | panic!() 61 | } 62 | } 63 | for (query_id, es) in &statuses.top.ms_table_write_ess { 64 | if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) { 65 | assert!(ms_query_es.pending_queries.contains(query_id)); 66 | } else { 67 | panic!() 68 | } 69 | } 70 | for (query_id, es) in &statuses.top.ms_table_insert_ess { 71 | if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) { 72 | assert!(ms_query_es.pending_queries.contains(query_id)); 73 | } else { 74 | panic!() 75 | } 76 | } 77 | for (query_id, es) in &statuses.top.ms_table_delete_ess { 78 | if let Some(ms_query_es) = statuses.ms_query_ess.get(&es.general.ms_query_id) { 79 | assert!(ms_query_es.pending_queries.contains(query_id)); 80 | } else { 81 | panic!() 82 | } 83 | } 84 | } 85 | 86 | pub fn check_tablet_clean(tablet: &TabletState, check_ctx: &mut CheckCtx) { 87 | let statuses = &tablet.statuses; 88 | let ctx = &tablet.ctx; 89 | 90 | // Check `Statuses` clean 91 | 92 | check_ctx.check(statuses.perform_query_buffer.is_empty()); 93 | 94 | check_ctx.check(statuses.gr_query_ess.is_empty()); 95 | check_ctx.check(statuses.join_query_ess.is_empty()); 96 | check_ctx.check(statuses.tm_statuss.is_empty()); 97 | check_ctx.check(statuses.ms_query_ess.is_empty()); 98 | check_ctx.check(statuses.top.table_read_ess.is_empty()); 99 | check_ctx.check(statuses.top.trans_table_read_ess.is_empty()); 100 | check_ctx.check(statuses.top.ms_table_read_ess.is_empty()); 101 | check_ctx.check(statuses.top.ms_table_write_ess.is_empty()); 102 | check_ctx.check(statuses.top.ms_table_insert_ess.is_empty()); 103 | check_ctx.check(statuses.top.ms_table_delete_ess.is_empty()); 104 | for (_, es) in &statuses.finish_query_ess { 105 | if let FinishQueryRMES::Paxos2PCRMExecOuter(_) = es { 106 | check_ctx.check(false); 107 | } 108 | } 109 | 110 | check_ctx.check(match &statuses.ddl_es { 111 | DDLES::None => true, 112 | DDLES::Alter(_) => false, 113 | DDLES::Drop(_) => false, 114 | DDLES::Dropped(_) => true, 115 | DDLES::ShardSplit(_) => false, 116 | }); 117 | 118 | check_ctx.check(match &statuses.sharding_state { 119 | ShardingState::None => true, 120 | ShardingState::ShardingSnapshotES(_) => false, 121 | }); 122 | 123 | // Check `Tablet` clean 124 | 125 | check_ctx.check(ctx.verifying_writes.is_empty()); 126 | check_ctx.check(ctx.inserting_prepared_writes.is_empty()); 127 | check_ctx.check(ctx.prepared_writes.is_empty()); 128 | 129 | check_ctx.check(ctx.waiting_read_protected.is_empty()); 130 | check_ctx.check(ctx.inserting_read_protected.is_empty()); 131 | 132 | check_ctx.check(ctx.waiting_locked_cols.is_empty()); 133 | check_ctx.check(ctx.inserting_locked_cols.is_empty()); 134 | 135 | check_ctx.check(ctx.ms_root_query_map.is_empty()); 136 | } 137 | -------------------------------------------------------------------------------- /src/finish_query_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{BasicIOCtx, GeneralTraceMessage, Timestamp}; 2 | use crate::common::{QueryId, ShardingGen}; 3 | use crate::finish_query_tm_es::{ 4 | FinishQueryPayloadTypes, FinishQueryPrepare, FinishQueryRMAborted, FinishQueryRMCommitted, 5 | FinishQueryRMPrepared, 6 | }; 7 | use crate::paxos2pc_rm::{Paxos2PCRMInner, Paxos2PCRMOuter}; 8 | use crate::paxos2pc_tm::PayloadTypes; 9 | use crate::sql_ast::proc; 10 | use crate::storage::{commit_to_storage, compress_updates_views, GenericTable}; 11 | use crate::tablet::{MSQueryES, ReadWriteRegion, TabletContext}; 12 | use serde::{Deserialize, Serialize}; 13 | use std::collections::BTreeMap; 14 | 15 | // ----------------------------------------------------------------------------------------------- 16 | // FinishQueryRMES 17 | // ----------------------------------------------------------------------------------------------- 18 | 19 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 20 | pub struct FinishQueryRMInner { 21 | pub sharding_gen: ShardingGen, 22 | pub region_lock: ReadWriteRegion, 23 | pub timestamp: Timestamp, 24 | pub update_view: GenericTable, 25 | } 26 | 27 | pub type FinishQueryRMES = Paxos2PCRMOuter; 28 | 29 | // ----------------------------------------------------------------------------------------------- 30 | // Implementation 31 | // ----------------------------------------------------------------------------------------------- 32 | 33 | impl Paxos2PCRMInner for FinishQueryRMInner { 34 | fn new( 35 | ctx: &mut TabletContext, 36 | _: &mut IO, 37 | payload: FinishQueryPrepare, 38 | extra_data: &mut BTreeMap, 39 | ) -> Option { 40 | if let Some(ms_query_es) = extra_data.remove(&payload.query_id) { 41 | ctx.ms_root_query_map.remove(&ms_query_es.root_query_path.query_id); 42 | debug_assert!(ms_query_es.pending_queries.is_empty()); 43 | 44 | let timestamp = ms_query_es.timestamp; 45 | 46 | // Move the VerifyingReadWrite to inserting. 47 | let verifying = ctx.verifying_writes.remove(×tamp).unwrap(); 48 | debug_assert!(verifying.m_waiting_read_protected.is_empty()); 49 | let region_lock = ReadWriteRegion { 50 | orig_p: verifying.orig_p, 51 | m_read_protected: verifying.m_read_protected, 52 | m_write_protected: verifying.m_write_protected, 53 | }; 54 | ctx.inserting_prepared_writes.insert(timestamp.clone(), region_lock.clone()); 55 | 56 | Some(FinishQueryRMInner { 57 | sharding_gen: ms_query_es.sharding_gen, 58 | region_lock, 59 | timestamp, 60 | update_view: compress_updates_views(ms_query_es.update_views), 61 | }) 62 | } else { 63 | // The MSQueryES might not be present because of a DeadlockSafetyWriteAbort. 64 | None 65 | } 66 | } 67 | 68 | fn new_follower( 69 | _: &mut TabletContext, 70 | _: &mut IO, 71 | payload: FinishQueryRMPrepared, 72 | ) -> FinishQueryRMInner { 73 | FinishQueryRMInner { 74 | sharding_gen: payload.sharding_gen, 75 | region_lock: payload.region_lock, 76 | timestamp: payload.timestamp, 77 | update_view: payload.update_view, 78 | } 79 | } 80 | 81 | fn early_aborted(&mut self, ctx: &mut TabletContext, _: &mut IO) { 82 | ctx.inserting_prepared_writes.remove(&self.timestamp); 83 | } 84 | 85 | fn mk_prepared_plm( 86 | &mut self, 87 | _: &mut TabletContext, 88 | _: &mut IO, 89 | ) -> FinishQueryRMPrepared { 90 | FinishQueryRMPrepared { 91 | sharding_gen: self.sharding_gen.clone(), 92 | region_lock: self.region_lock.clone(), 93 | timestamp: self.timestamp.clone(), 94 | update_view: self.update_view.clone(), 95 | } 96 | } 97 | 98 | fn prepared_plm_inserted(&mut self, ctx: &mut TabletContext, _: &mut IO) { 99 | ctx.inserting_prepared_writes.remove(&self.timestamp); 100 | ctx.prepared_writes.insert(self.timestamp.clone(), self.region_lock.clone()); 101 | } 102 | 103 | fn mk_committed_plm( 104 | &mut self, 105 | _: &mut TabletContext, 106 | _: &mut IO, 107 | ) -> FinishQueryRMCommitted { 108 | FinishQueryRMCommitted {} 109 | } 110 | 111 | fn committed_plm_inserted( 112 | &mut self, 113 | ctx: &mut TabletContext, 114 | io_ctx: &mut IO, 115 | query_id: &QueryId, 116 | ) { 117 | commit_to_storage(&mut ctx.storage, &self.timestamp, self.update_view.clone()); 118 | let region_lock = ctx.prepared_writes.remove(&self.timestamp).unwrap(); 119 | ctx.committed_writes.insert(self.timestamp.clone(), region_lock); 120 | 121 | // Trace the commit 122 | io_ctx.general_trace(GeneralTraceMessage::CommittedQueryId( 123 | query_id.clone(), 124 | self.timestamp.clone(), 125 | )); 126 | } 127 | 128 | fn mk_aborted_plm( 129 | &mut self, 130 | _: &mut TabletContext, 131 | _: &mut IO, 132 | ) -> FinishQueryRMAborted { 133 | FinishQueryRMAborted {} 134 | } 135 | 136 | fn aborted_plm_inserted(&mut self, ctx: &mut TabletContext, _: &mut IO) { 137 | ctx.prepared_writes.remove(&self.timestamp).unwrap(); 138 | } 139 | 140 | fn reconfig_snapshot(&self) -> FinishQueryRMInner { 141 | self.clone() 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/shard_split_slave_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{cur_timestamp, QueryId, Timestamp}; 2 | use crate::common::{mk_t, BasicIOCtx}; 3 | use crate::common::{TNodePath, TabletGroupId}; 4 | use crate::message as msg; 5 | use crate::server::ServerContextBase; 6 | use crate::shard_pending_es::ShardingSplitPLm; 7 | use crate::shard_split_tm_es::{ 8 | ShardNodePath, ShardSplitClosed, ShardSplitCommit, ShardSplitPrepare, ShardSplitPrepared, 9 | ShardSplitTMPayloadTypes, 10 | }; 11 | use crate::slave::{SlaveContext, SlavePLm}; 12 | use crate::stmpaxos2pc_rm::{ 13 | RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner, 14 | STMPaxos2PCRMOuter, 15 | }; 16 | use crate::stmpaxos2pc_tm::TMMessage; 17 | use crate::tablet::ShardingSnapshot; 18 | use serde::{Deserialize, Serialize}; 19 | use std::cmp::max; 20 | 21 | // ----------------------------------------------------------------------------------------------- 22 | // Payloads 23 | // ----------------------------------------------------------------------------------------------- 24 | 25 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 26 | pub struct ShardSplitSlaveRMPayloadTypes {} 27 | 28 | impl RMPayloadTypes for ShardSplitSlaveRMPayloadTypes { 29 | type TM = ShardSplitTMPayloadTypes; 30 | type RMContext = SlaveContext; 31 | 32 | // Actions 33 | type RMCommitActionData = (TabletGroupId, QueryId); 34 | 35 | // RM PLm 36 | type RMPreparedPLm = ShardSplitSlaveRMPrepared; 37 | type RMCommittedPLm = ShardSplitSlaveRMCommitted; 38 | type RMAbortedPLm = ShardSplitSlaveRMAborted; 39 | } 40 | 41 | // RM PLm 42 | 43 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 44 | pub struct ShardSplitSlaveRMPrepared {} 45 | 46 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 47 | pub struct ShardSplitSlaveRMCommitted { 48 | /// The `TabletGroupId` for the new Tablet that will be created. 49 | pub tid: TabletGroupId, 50 | } 51 | 52 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 53 | pub struct ShardSplitSlaveRMAborted {} 54 | 55 | // ----------------------------------------------------------------------------------------------- 56 | // RMServerContext ShardSplitSlave 57 | // ----------------------------------------------------------------------------------------------- 58 | 59 | impl RMServerContext for SlaveContext { 60 | fn push_plm(&mut self, plm: RMPLm) { 61 | self.slave_bundle.plms.push(SlavePLm::ShardingSplitPLm(ShardingSplitPLm::ShardSplit(plm))); 62 | } 63 | 64 | fn send_to_tm( 65 | &mut self, 66 | io_ctx: &mut IO, 67 | _: &(), 68 | msg: TMMessage, 69 | ) { 70 | self.send_to_master(io_ctx, msg::MasterRemotePayload::ShardSplit(msg)); 71 | } 72 | 73 | fn mk_node_path(&self) -> ShardNodePath { 74 | ShardNodePath::Slave(self.this_sid.clone()) 75 | } 76 | 77 | fn is_leader(&self) -> bool { 78 | SlaveContext::is_leader(self) 79 | } 80 | } 81 | 82 | // ----------------------------------------------------------------------------------------------- 83 | // ShardSplitSlaveES Implementation 84 | // ----------------------------------------------------------------------------------------------- 85 | 86 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 87 | pub struct ShardSplitSlaveRMInner {} 88 | 89 | pub type ShardSplitSlaveRMES = 90 | STMPaxos2PCRMOuter; 91 | pub type ShardSplitSlaveRMAction = STMPaxos2PCRMAction; 92 | 93 | impl STMPaxos2PCRMInner for ShardSplitSlaveRMInner { 94 | fn new( 95 | _: &mut SlaveContext, 96 | _: &mut IO, 97 | _: ShardSplitPrepare, 98 | ) -> ShardSplitSlaveRMInner { 99 | ShardSplitSlaveRMInner {} 100 | } 101 | 102 | fn new_follower( 103 | _: &mut SlaveContext, 104 | _: &mut IO, 105 | _: ShardSplitSlaveRMPrepared, 106 | ) -> ShardSplitSlaveRMInner { 107 | ShardSplitSlaveRMInner {} 108 | } 109 | 110 | fn mk_closed() -> ShardSplitClosed { 111 | ShardSplitClosed {} 112 | } 113 | 114 | fn mk_prepared_plm( 115 | &mut self, 116 | _: &mut SlaveContext, 117 | _: &mut IO, 118 | ) -> Option { 119 | Some(ShardSplitSlaveRMPrepared {}) 120 | } 121 | 122 | fn prepared_plm_inserted( 123 | &mut self, 124 | _: &mut SlaveContext, 125 | _: &mut IO, 126 | ) -> ShardSplitPrepared { 127 | ShardSplitPrepared {} 128 | } 129 | 130 | fn mk_committed_plm( 131 | &mut self, 132 | _: &mut SlaveContext, 133 | _: &mut IO, 134 | commit: &ShardSplitCommit, 135 | ) -> ShardSplitSlaveRMCommitted { 136 | ShardSplitSlaveRMCommitted { tid: commit.target_new.tid.clone() } 137 | } 138 | 139 | fn committed_plm_inserted( 140 | &mut self, 141 | _: &mut SlaveContext, 142 | _: &mut IO, 143 | commit: &RMCommittedPLm, 144 | ) -> (TabletGroupId, QueryId) { 145 | (commit.payload.tid.clone(), commit.query_id.clone()) 146 | } 147 | 148 | fn mk_aborted_plm( 149 | &mut self, 150 | _: &mut SlaveContext, 151 | _: &mut IO, 152 | ) -> ShardSplitSlaveRMAborted { 153 | ShardSplitSlaveRMAborted {} 154 | } 155 | 156 | fn aborted_plm_inserted(&mut self, _: &mut SlaveContext, _: &mut IO) {} 157 | 158 | fn reconfig_snapshot(&self) -> ShardSplitSlaveRMInner { 159 | self.clone() 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/test/expression_test.rs: -------------------------------------------------------------------------------- 1 | use crate::common::ColVal; 2 | use crate::common::{ColBound, SingleBound}; 3 | use crate::expression::{ 4 | col_bound_intersect_interval, construct_cexpr, construct_colvaln, does_col_regions_intersect, 5 | evaluate_binary_op, evaluate_c_expr, CExpr, EvalError, 6 | }; 7 | use crate::query_converter::flatten_val_expr_r; 8 | use crate::sql_ast::{iast, proc}; 9 | use crate::sql_parser::convert_expr; 10 | use crate::test_utils::cn; 11 | use sqlparser::ast; 12 | use sqlparser::dialect::GenericDialect; 13 | use sqlparser::parser::Parser; 14 | use sqlparser::tokenizer::Tokenizer; 15 | use std::collections::BTreeMap; 16 | 17 | // ----------------------------------------------------------------------------------------------- 18 | // Expression Evaluation 19 | // ----------------------------------------------------------------------------------------------- 20 | 21 | /// Utility for converting a raw SQL expression, not containing `Subquery`s or `ColumnRef`s. 22 | fn parse_expr(expr_str: &str) -> CExpr { 23 | let dialect = GenericDialect {}; 24 | let mut tokenizer = Tokenizer::new(&dialect, expr_str); 25 | let tokens = tokenizer.tokenize().unwrap(); 26 | let mut parser = Parser::new(tokens, &dialect); 27 | let sql_expr = parser.parse_expr().unwrap(); 28 | let internal_expr = convert_expr(sql_expr).unwrap(); 29 | let val_expr = flatten_val_expr_r(&internal_expr, &mut 0).unwrap(); 30 | construct_cexpr(&val_expr, &mut BTreeMap::new(), &mut Vec::new(), &mut 0).unwrap() 31 | } 32 | 33 | #[test] 34 | fn construct_colvaln_test() { 35 | // Number 36 | assert_eq!(construct_colvaln(iast::Value::Number("42".to_string())), Ok(Some(ColVal::Int(42)))); 37 | assert_eq!(construct_colvaln(iast::Value::Number("".to_string())), Err(EvalError::GenericError)); 38 | assert_eq!( 39 | construct_colvaln(iast::Value::Number("999999999999".to_string())), 40 | Err(EvalError::GenericError) 41 | ); 42 | assert_eq!( 43 | construct_colvaln(iast::Value::Number("1234hello".to_string())), 44 | Err(EvalError::GenericError) 45 | ); 46 | 47 | // String, Boolean, Null 48 | assert_eq!( 49 | construct_colvaln(iast::Value::QuotedString("hello".to_string())), 50 | Ok(Some(ColVal::String("hello".to_string()))) 51 | ); 52 | assert_eq!(construct_colvaln(iast::Value::Boolean(true)), Ok(Some(ColVal::Bool(true)))); 53 | assert_eq!(construct_colvaln(iast::Value::Null), Ok(None)); 54 | } 55 | 56 | #[test] 57 | fn evaluate_unary_op_test() { 58 | // Plus 59 | assert_eq!(evaluate_c_expr(&parse_expr("+10")), Ok(Some(ColVal::Int(10)))); 60 | assert_eq!(evaluate_c_expr(&parse_expr("-10")), Ok(Some(ColVal::Int(-10)))); 61 | // Not 62 | assert_eq!(evaluate_c_expr(&parse_expr("NOT true")), Ok(Some(ColVal::Bool(false)))); 63 | assert_eq!(evaluate_c_expr(&parse_expr("NOT (NULL)")), Ok(None)); 64 | } 65 | 66 | #[test] 67 | fn evaluate_binary_op_test() { 68 | // Divide 69 | assert_eq!(evaluate_c_expr(&parse_expr("20/10")), Ok(Some(ColVal::Int(2)))); 70 | assert_eq!(evaluate_c_expr(&parse_expr("20/15")), Ok(Some(ColVal::Int(1)))); 71 | assert_eq!(evaluate_c_expr(&parse_expr("20/25")), Ok(Some(ColVal::Int(0)))); 72 | assert_eq!(evaluate_c_expr(&parse_expr("-30/20")), Ok(Some(ColVal::Int(-1)))); 73 | assert_eq!(evaluate_c_expr(&parse_expr("10/0")), Err(EvalError::InvalidBinaryOp)); 74 | // OR 75 | assert_eq!(evaluate_c_expr(&parse_expr("true OR NULL")), Ok(Some(ColVal::Bool(true)))); 76 | assert_eq!(evaluate_c_expr(&parse_expr("NULL OR true")), Ok(Some(ColVal::Bool(true)))); 77 | assert_eq!(evaluate_c_expr(&parse_expr("false OR NULL")), Ok(None)); 78 | assert_eq!(evaluate_c_expr(&parse_expr("NULL OR false")), Ok(None)); 79 | assert_eq!(evaluate_c_expr(&parse_expr("NULL OR NULL")), Ok(None)); 80 | assert_eq!(evaluate_c_expr(&parse_expr("false OR false")), Ok(Some(ColVal::Bool(false)))); 81 | assert_eq!(evaluate_c_expr(&parse_expr("false OR 3")), Err(EvalError::InvalidBinaryOp)); 82 | } 83 | 84 | // ----------------------------------------------------------------------------------------------- 85 | // Region Isolation Property Utilities 86 | // ----------------------------------------------------------------------------------------------- 87 | 88 | fn unb() -> SingleBound { 89 | SingleBound::Unbounded 90 | } 91 | 92 | fn inc(val: T) -> SingleBound { 93 | SingleBound::Included(val) 94 | } 95 | 96 | fn exl(val: T) -> SingleBound { 97 | SingleBound::Excluded(val) 98 | } 99 | 100 | /// `ColBound` of `Int` 101 | fn cb(start: SingleBound, end: SingleBound) -> ColBound { 102 | ColBound { start, end } 103 | } 104 | 105 | #[test] 106 | fn col_bound_intersect_interval_test() { 107 | assert_eq!( 108 | col_bound_intersect_interval(&cb(inc(3), inc(5)), &cb(inc(4), inc(6))), 109 | (&inc(4), &inc(5)) 110 | ); 111 | assert_eq!( 112 | col_bound_intersect_interval(&cb(unb(), exl(5)), &cb(unb(), inc(4))), 113 | (&unb(), &inc(4)) 114 | ); 115 | assert_eq!( 116 | col_bound_intersect_interval(&cb(exl(3), exl(5)), &cb(unb(), inc(3))), 117 | (&exl(3), &inc(3)) 118 | ); 119 | } 120 | 121 | #[test] 122 | fn does_col_regions_intersect_test() { 123 | let cols1 = vec![cn("c1"), cn("c2")]; 124 | let cols2 = vec![cn("c2"), cn("c3")]; 125 | let cols3 = vec![cn("c4")]; 126 | let cols4 = vec![]; 127 | assert!(does_col_regions_intersect(&cols1, &cols2)); 128 | assert!(does_col_regions_intersect(&cols2, &cols1)); 129 | assert!(!does_col_regions_intersect(&cols1, &cols3)); 130 | assert!(!does_col_regions_intersect(&cols3, &cols1)); 131 | assert!(!does_col_regions_intersect(&cols3, &cols4)); 132 | assert!(!does_col_regions_intersect(&cols4, &cols3)); 133 | } 134 | -------------------------------------------------------------------------------- /notes.md: -------------------------------------------------------------------------------- 1 | # Build & Run 2 | 3 | cargo build 4 | 5 | # Docker 6 | 7 | ## Setup 8 | docker network create --subnet=172.20.0.0/16 runiversal-net 9 | 10 | ## Build 11 | docker build -t runiversal -f Dockerfile.init . 12 | docker build -t runiversal . 13 | 14 | ## Local Build and Test 15 | cargo build 16 | 17 | cargo run --release --bin paxos && 18 | cargo run --release --bin paxos2pc_sim && 19 | cargo run --release --bin simtest 20 | cargo run --release --bin simtest -- -i 9 21 | 22 | cargo run --bin client 2>/dev/null 23 | docker run -it runiversal 24 | 25 | ## Run & Stop 26 | docker run --cap-add=NET_ADMIN -it --name=rclient4 --ip 172.20.0.4 --network=runiversal-net runiversal scripts/client -i 172.20.0.4 -e 172.20.0.10 27 | docker run --cap-add=NET_ADMIN -it --name=runiversal10 --ip 172.20.0.10 --network=runiversal-net runiversal scripts/transact -i 172.20.0.10 -t masterbootup 28 | docker run --cap-add=NET_ADMIN -d --name=runiversal15 --ip 172.20.0.15 --network=runiversal-net runiversal scripts/transact -i 172.20.0.15 -t freenode -f newslave -e 172.20.0.10 29 | 30 | docker kill rclient; docker container rm rclient; 31 | docker kill runiversal10; docker container rm runiversal10; 32 | docker kill runiversal15; docker container rm runiversal15; 33 | 34 | ## Setup 35 | To build: 36 | 37 | ./run build 38 | 39 | To start up the system and create an initial client, do: 40 | 41 | ./run start 42 | 43 | To create extra clients and nodes, do: 44 | 45 | ./run new_client 3 10 46 | ./run new_node 25 reconfig 10 47 | ./run new_node 26 newslave 10 48 | ./run new_node 27 newslave 10 49 | ./run new_node 28 newslave 10 50 | ./run new_node 29 newslave 10 51 | ./run new_node 30 newslave 10 52 | ./run new_node 31 reconfig 10 53 | ./run new_node 32 reconfig 10 54 | ./run new_node 33 reconfig 10 55 | ./run new_node 34 reconfig 10 56 | ./run new_node 35 reconfig 10 57 | 58 | master_target 172.20.0.1 59 | slave_target 172.20.0.16 60 | 61 | To clean up everything, do: 62 | 63 | ./run clean 64 | ./run cclean 2 65 | ./run cclean 3 66 | ./run nclean 25 67 | ./run nclean 26 68 | ./run dclean 69 | 70 | ## Demo 71 | 1. Run `./run start` in terminal pane. This will start the MasterGroup, 2 SlaveGroups, and a client. Run `live` in that view 72 | 3. Run `./run new_client 3 10` to start a new client. 73 | 2. Explicitly connect to `172.20.0.15` with `slave_target 172.20.0.15` (the Leadership for the first SlaveGroup). (This is useful for showcasing node deletion later.) 74 | 4. Run the Basic/Advanced Queries. 75 | 5. Kill `172.20.0.15` with `./run nclean 15` (or similar). (This shows reconfiguration.) 76 | 6. Create a Slave free node so that it can replace the one that was just killed: `./run new_node 25 reconfig 10` 77 | 7. Explicitly connect to `172.20.0.17` with `slave_target 172.20.0.17` and then fire some queries (just to show that new leaders are actually possible to use). 78 | 8. Create 5 Slaves as newslave, e.g. `./run new_node 26 newslave 10` to show how new SlaveGroups are formed automatically. 79 | 9. Explicitly connect to `172.20.0.26` with `slave_target 172.20.0.26` and then fire some queries (just to show that new Groups are actually used). 80 | 10. Run the `./run new_node 31 reconfig 10` commands to create lots of free nodes. 81 | 11. Kill `172.20.0.26` with `./run nclean 26` (or similar). (This shows reconfiguration, immediately follows by the consumption of a free node.) 82 | 12. Explicitly connect to `172.20.0.29` with `slave_target 172.20.0.29` and then fire some queries (just to show that new Groups are actually used). 83 | 13. Quit the live system with `q`, and call `./run dclean` to clean up. 84 | 14. Run the simulation tests and describe that. 85 | 86 | ## Basic Queries 87 | ```sql 88 | CREATE TABLE user(id INT PRIMARY KEY); 89 | INSERT INTO user(id) VALUES (1), (2), (3); 90 | SELECT * FROM user; 91 | 92 | ALTER TABLE user ADD name STRING; 93 | UPDATE user SET name = 'henry' WHERE id = 2; 94 | SELECT * FROM user; 95 | 96 | CREATE TABLE inventory(id INT PRIMARY KEY, name VARCHAR); 97 | INSERT INTO inventory(id, name) VALUES (1, 'pasindu'), (2, 'hello'); 98 | SELECT id, name FROM inventory; 99 | 100 | DROP TABLE user; 101 | DROP TABLE inventory; 102 | ``` 103 | 104 | ## Advanced Queries 105 | 106 | ### DDL 107 | ```sql 108 | CREATE TABLE inventory ( 109 | product_id INT PRIMARY KEY, email VARCHAR, 110 | count INT 111 | ); 112 | -- Separate 113 | INSERT INTO inventory (product_id, email, count) 114 | VALUES 115 | (0, 'my_email_0', 15), 116 | (1, 'my_email_1', 25); 117 | -- Separate 118 | CREATE TABLE user ( 119 | email VARCHAR PRIMARY KEY, balance INT, 120 | ); 121 | -- Separate 122 | INSERT INTO user (email, balance) 123 | VALUES 124 | ('my_email_0', 50), 125 | ('my_email_1', 60), 126 | ('my_email_2', 70); 127 | -- Separate 128 | CREATE TABLE product_stock (id INT PRIMARY KEY, product_id INT,); 129 | -- Separate 130 | INSERT INTO product_stock (id, product_id) 131 | VALUES 132 | (0, 0), 133 | (1, 1), 134 | (2, 1); 135 | ``` 136 | ### DQL 137 | 138 | ```sql 139 | -- Join 140 | SELECT U2.email, U1.balance, product_id 141 | FROM user AS U2 JOIN (user AS U1 LEFT JOIN inventory AS I) 142 | ON ((SELECT count(id) 143 | FROM product_stock 144 | WHERE product_id = I.product_id) = 2) 145 | AND U1.balance <= 60 146 | WHERE U2.email = 'my_email_0'; 147 | 148 | -- CTEs 149 | WITH 150 | v1 AS (SELECT email AS e, balance * 2 151 | FROM user 152 | WHERE email = 'my_email_0') 153 | SELECT * 154 | FROM v1; 155 | 156 | -- Multi Stage 157 | UPDATE user 158 | SET balance = balance + 20 159 | WHERE email = ( 160 | SELECT email 161 | FROM inventory 162 | WHERE product_id = 1); 163 | 164 | UPDATE inventory 165 | SET count = count + 5 166 | WHERE email = ( 167 | SELECT email 168 | FROM user 169 | WHERE balance >= 80); 170 | ``` -------------------------------------------------------------------------------- /src/bin/transact/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(map_first_last)] 2 | 3 | mod server; 4 | 5 | #[macro_use] 6 | extern crate runiversal; 7 | 8 | use crate::server::{ProdCoreIOCtx, ProdIOCtx, TIMER_INCREMENT}; 9 | use clap::{arg, App}; 10 | use env_logger::Builder; 11 | use log::LevelFilter; 12 | use rand::{RngCore, SeedableRng}; 13 | use rand_xorshift::XorShiftRng; 14 | use runiversal::common::{ 15 | mk_t, BasicIOCtx, FreeNodeIOCtx, GossipData, InternalMode, MasterIOCtx, NodeIOCtx, SlaveIOCtx, 16 | }; 17 | use runiversal::common::{CoordGroupId, EndpointId, Gen, LeadershipId, PaxosGroupId, SlaveGroupId}; 18 | use runiversal::coord::{CoordConfig, CoordContext, CoordForwardMsg, CoordState}; 19 | use runiversal::free_node_manager::FreeNodeType; 20 | use runiversal::master::{ 21 | FullMasterInput, MasterConfig, MasterContext, MasterState, MasterTimerInput, 22 | }; 23 | use runiversal::message as msg; 24 | use runiversal::message::FreeNodeMessage; 25 | use runiversal::net::{handle_self_conn, send_msg, start_acceptor_thread, SendAction}; 26 | use runiversal::node::{get_prod_configs, GenericInput, NodeConfig, NodeState}; 27 | use runiversal::paxos::PaxosConfig; 28 | use runiversal::slave::{ 29 | FullSlaveInput, SlaveBackMessage, SlaveConfig, SlaveContext, SlaveState, SlaveTimerInput, 30 | }; 31 | use runiversal::tablet::TabletConfig; 32 | use runiversal::test_utils as tu; 33 | use runiversal::test_utils::mk_seed; 34 | use std::collections::{BTreeMap, LinkedList}; 35 | use std::env; 36 | use std::net::{TcpListener, TcpStream}; 37 | use std::sync::mpsc::Sender; 38 | use std::sync::{mpsc, Arc, Mutex}; 39 | use std::thread; 40 | use std::time::{SystemTime, UNIX_EPOCH}; 41 | 42 | // ----------------------------------------------------------------------------------------------- 43 | // Main 44 | // ----------------------------------------------------------------------------------------------- 45 | 46 | fn main() { 47 | // Setup CLI parsing 48 | let matches = App::new("rUniversalDB") 49 | .version("1.0") 50 | .author("Pasindu M. ") 51 | .arg( 52 | arg!(-t --startup_type ) 53 | .required(true) 54 | .help("Indicates if this is an initial Master node ('masterbootup') or not ('freenode').'") 55 | .possible_values(["masterbootup", "freenode"]), 56 | ) 57 | .arg(arg!(-i --ip ).required(true).help("The IP address of the current host.")) 58 | .arg( 59 | arg!(-f --freenode_type ) 60 | .required(false) 61 | .help("The type of freenode this is.") 62 | .possible_values(["newslave", "reconfig"]), 63 | ) 64 | .arg(arg!(-e --entry_ip ).required(false).help( 65 | "The IP address of the current Master \ 66 | Leader. (This is unused if the startup_type is 'masterbootup').", 67 | )) 68 | .get_matches(); 69 | 70 | // Setup logging 71 | Builder::new().filter_level(LevelFilter::max()).init(); 72 | 73 | // Get required arguments 74 | let startup_type = matches.value_of("startup_type").unwrap().to_string(); 75 | let this_ip = matches.value_of("ip").unwrap().to_string(); 76 | 77 | // The mpsc channel for passing data to the Server Thread from all FromNetwork Threads. 78 | let (to_server_sender, to_server_receiver) = mpsc::channel::(); 79 | // Maps the IP addresses to a FromServer Queue, used to send data to Outgoing Connections. 80 | let out_conn_map = Arc::new(Mutex::new(BTreeMap::>::new())); 81 | 82 | // Start the Accepting Thread 83 | start_acceptor_thread(&to_server_sender, this_ip.clone()); 84 | 85 | // Create the self-connection 86 | let this_internal_mode = InternalMode::Internal; 87 | let this_eid = EndpointId::new(this_ip, this_internal_mode.clone()); 88 | handle_self_conn(&this_eid, &out_conn_map, &to_server_sender); 89 | 90 | // Run startup_type specific code. 91 | match &startup_type[..] { 92 | "masterbootup" => {} 93 | "freenode" => { 94 | // Parse entry_ip 95 | let master_ip = matches 96 | .value_of("entry_ip") 97 | .expect("entry_ip is requred if startup_type is 'freenode'") 98 | .to_string(); 99 | let master_eid = EndpointId::new(master_ip, InternalMode::Internal); 100 | 101 | // Parse freenode_type 102 | let freenode_type = matches 103 | .value_of("freenode_type") 104 | .expect("entry_ip is requred if startup_type is 'freenode'"); 105 | 106 | let node_type = match freenode_type { 107 | "newslave" => FreeNodeType::NewSlaveFreeNode, 108 | "reconfig" => FreeNodeType::ReconfigFreeNode, 109 | _ => unreachable!(), 110 | }; 111 | 112 | // Send RegisterFreeNode 113 | send_msg( 114 | &out_conn_map, 115 | &master_eid, 116 | SendAction::new( 117 | msg::NetworkMessage::Master(msg::MasterMessage::FreeNodeAssoc( 118 | msg::FreeNodeAssoc::RegisterFreeNode(msg::RegisterFreeNode { 119 | sender_eid: this_eid.clone(), 120 | node_type, 121 | }), 122 | )), 123 | None, 124 | ), 125 | &this_internal_mode, 126 | ); 127 | } 128 | _ => unreachable!(), 129 | } 130 | 131 | let mut io_ctx = ProdIOCtx { 132 | rand: XorShiftRng::from_entropy(), 133 | out_conn_map, 134 | exited: false, 135 | to_top: to_server_sender, 136 | tablet_map: Default::default(), 137 | coord_map: Default::default(), 138 | tasks: Arc::new(Mutex::new(Default::default())), 139 | }; 140 | io_ctx.start(); 141 | 142 | let mut node = NodeState::new(this_eid, get_prod_configs()); 143 | node.bootstrap(&mut io_ctx); 144 | 145 | // Enter the main loop forever. 146 | loop { 147 | let generic_input = to_server_receiver.recv().unwrap(); 148 | node.process_input(&mut io_ctx, generic_input); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/shard_split_tablet_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{cur_timestamp, QueryId, Timestamp}; 2 | use crate::common::{mk_t, BasicIOCtx}; 3 | use crate::common::{ 4 | ShardingGen, SlaveGroupId, TNodePath, TablePath, TabletGroupId, TabletKeyRange, 5 | }; 6 | use crate::message as msg; 7 | use crate::server::ServerContextBase; 8 | use crate::shard_split_tm_es::{ 9 | STRange, ShardNodePath, ShardSplitClosed, ShardSplitCommit, ShardSplitPrepare, 10 | ShardSplitPrepared, ShardSplitTMPayloadTypes, 11 | }; 12 | use crate::stmpaxos2pc_rm::{ 13 | RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner, 14 | STMPaxos2PCRMOuter, 15 | }; 16 | use crate::stmpaxos2pc_tm::TMMessage; 17 | use crate::tablet::{TabletContext, TabletPLm}; 18 | use serde::{Deserialize, Serialize}; 19 | use std::cmp::max; 20 | 21 | // ----------------------------------------------------------------------------------------------- 22 | // Payloads 23 | // ----------------------------------------------------------------------------------------------- 24 | 25 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 26 | pub struct ShardSplitTabletRMPayloadTypes {} 27 | 28 | impl RMPayloadTypes for ShardSplitTabletRMPayloadTypes { 29 | type TM = ShardSplitTMPayloadTypes; 30 | type RMContext = TabletContext; 31 | 32 | // Actions. This contains the target Tablet to create. 33 | type RMCommitActionData = STRange; 34 | 35 | // RM PLm 36 | type RMPreparedPLm = ShardSplitTabletRMPrepared; 37 | type RMCommittedPLm = ShardSplitTabletRMCommitted; 38 | type RMAbortedPLm = ShardSplitTabletRMAborted; 39 | } 40 | 41 | // RM PLm 42 | 43 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 44 | pub struct ShardSplitTabletRMPrepared {} 45 | 46 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 47 | pub struct ShardSplitTabletRMCommitted { 48 | pub sharding_gen: ShardingGen, 49 | pub target_old: STRange, 50 | pub target_new: STRange, 51 | } 52 | 53 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 54 | pub struct ShardSplitTabletRMAborted {} 55 | 56 | // ----------------------------------------------------------------------------------------------- 57 | // RMServerContext ShardSplitTablet 58 | // ----------------------------------------------------------------------------------------------- 59 | 60 | impl RMServerContext for TabletContext { 61 | fn push_plm(&mut self, plm: RMPLm) { 62 | self.tablet_bundle.push(TabletPLm::ShardSplit(plm)); 63 | } 64 | 65 | fn send_to_tm( 66 | &mut self, 67 | io_ctx: &mut IO, 68 | _: &(), 69 | msg: TMMessage, 70 | ) { 71 | self.send_to_master(io_ctx, msg::MasterRemotePayload::ShardSplit(msg)); 72 | } 73 | 74 | fn mk_node_path(&self) -> ShardNodePath { 75 | ShardNodePath::Tablet(TabletContext::mk_node_path(self)) 76 | } 77 | 78 | fn is_leader(&self) -> bool { 79 | TabletContext::is_leader(self) 80 | } 81 | } 82 | 83 | // ----------------------------------------------------------------------------------------------- 84 | // ShardSplitTabletRMES Implementation 85 | // ----------------------------------------------------------------------------------------------- 86 | 87 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 88 | pub struct ShardSplitTabletRMInner {} 89 | 90 | pub type ShardSplitTabletRMES = 91 | STMPaxos2PCRMOuter; 92 | pub type ShardSplitTabletRMAction = STMPaxos2PCRMAction; 93 | 94 | impl STMPaxos2PCRMInner for ShardSplitTabletRMInner { 95 | fn new( 96 | _: &mut TabletContext, 97 | _: &mut IO, 98 | _: ShardSplitPrepare, 99 | ) -> ShardSplitTabletRMInner { 100 | ShardSplitTabletRMInner {} 101 | } 102 | 103 | fn new_follower( 104 | _: &mut TabletContext, 105 | _: &mut IO, 106 | _: ShardSplitTabletRMPrepared, 107 | ) -> ShardSplitTabletRMInner { 108 | ShardSplitTabletRMInner {} 109 | } 110 | 111 | fn mk_closed() -> ShardSplitClosed { 112 | ShardSplitClosed {} 113 | } 114 | 115 | fn mk_prepared_plm( 116 | &mut self, 117 | ctx: &mut TabletContext, 118 | _: &mut IO, 119 | ) -> Option { 120 | if ctx.pause_ddl() { 121 | None 122 | } else { 123 | Some(ShardSplitTabletRMPrepared {}) 124 | } 125 | } 126 | 127 | fn prepared_plm_inserted( 128 | &mut self, 129 | _: &mut TabletContext, 130 | _: &mut IO, 131 | ) -> ShardSplitPrepared { 132 | ShardSplitPrepared {} 133 | } 134 | 135 | fn mk_committed_plm( 136 | &mut self, 137 | _: &mut TabletContext, 138 | _: &mut IO, 139 | commit: &ShardSplitCommit, 140 | ) -> ShardSplitTabletRMCommitted { 141 | ShardSplitTabletRMCommitted { 142 | sharding_gen: commit.sharding_gen.clone(), 143 | target_old: commit.target_old.clone(), 144 | target_new: commit.target_new.clone(), 145 | } 146 | } 147 | 148 | fn committed_plm_inserted( 149 | &mut self, 150 | ctx: &mut TabletContext, 151 | _: &mut IO, 152 | plm: &RMCommittedPLm, 153 | ) -> STRange { 154 | // Update Sharding data. 155 | ctx.this_sharding_gen = plm.payload.sharding_gen.clone(); 156 | ctx.this_tablet_key_range = plm.payload.target_old.range.clone(); 157 | plm.payload.target_new.clone() 158 | } 159 | 160 | fn mk_aborted_plm( 161 | &mut self, 162 | _: &mut TabletContext, 163 | _: &mut IO, 164 | ) -> ShardSplitTabletRMAborted { 165 | ShardSplitTabletRMAborted {} 166 | } 167 | 168 | fn aborted_plm_inserted(&mut self, _: &mut TabletContext, _: &mut IO) {} 169 | 170 | fn reconfig_snapshot(&self) -> ShardSplitTabletRMInner { 171 | self.clone() 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/drop_table_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::TNodePath; 2 | use crate::common::{cur_timestamp, Timestamp}; 3 | use crate::common::{mk_t, BasicIOCtx}; 4 | use crate::drop_table_tm_es::{ 5 | DropTableClosed, DropTableCommit, DropTablePrepare, DropTablePrepared, DropTableTMPayloadTypes, 6 | }; 7 | use crate::message as msg; 8 | use crate::server::ServerContextBase; 9 | use crate::stmpaxos2pc_rm::{ 10 | RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner, 11 | STMPaxos2PCRMOuter, 12 | }; 13 | use crate::stmpaxos2pc_tm::TMMessage; 14 | use crate::tablet::{TabletContext, TabletPLm}; 15 | use serde::{Deserialize, Serialize}; 16 | use std::cmp::max; 17 | 18 | // ----------------------------------------------------------------------------------------------- 19 | // Payloads 20 | // ----------------------------------------------------------------------------------------------- 21 | 22 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 23 | pub struct DropTableRMPayloadTypes {} 24 | 25 | impl RMPayloadTypes for DropTableRMPayloadTypes { 26 | type TM = DropTableTMPayloadTypes; 27 | type RMContext = TabletContext; 28 | 29 | // Actions 30 | type RMCommitActionData = Timestamp; 31 | 32 | // RM PLm 33 | type RMPreparedPLm = DropTableRMPrepared; 34 | type RMCommittedPLm = DropTableRMCommitted; 35 | type RMAbortedPLm = DropTableRMAborted; 36 | } 37 | 38 | // RM PLm 39 | 40 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 41 | pub struct DropTableRMPrepared { 42 | pub timestamp: Timestamp, 43 | } 44 | 45 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 46 | pub struct DropTableRMCommitted { 47 | pub timestamp: Timestamp, 48 | } 49 | 50 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 51 | pub struct DropTableRMAborted {} 52 | 53 | // ----------------------------------------------------------------------------------------------- 54 | // RMServerContext DropTable 55 | // ----------------------------------------------------------------------------------------------- 56 | 57 | impl RMServerContext for TabletContext { 58 | fn push_plm(&mut self, plm: RMPLm) { 59 | self.tablet_bundle.push(TabletPLm::DropTable(plm)); 60 | } 61 | 62 | fn send_to_tm( 63 | &mut self, 64 | io_ctx: &mut IO, 65 | _: &(), 66 | msg: TMMessage, 67 | ) { 68 | self.send_to_master(io_ctx, msg::MasterRemotePayload::DropTable(msg)); 69 | } 70 | 71 | fn mk_node_path(&self) -> TNodePath { 72 | TabletContext::mk_node_path(self) 73 | } 74 | 75 | fn is_leader(&self) -> bool { 76 | TabletContext::is_leader(self) 77 | } 78 | } 79 | 80 | // ----------------------------------------------------------------------------------------------- 81 | // DropTableES Implementation 82 | // ----------------------------------------------------------------------------------------------- 83 | 84 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 85 | pub struct DropTableRMInner { 86 | pub prepared_timestamp: Timestamp, 87 | } 88 | 89 | pub type DropTableRMES = STMPaxos2PCRMOuter; 90 | pub type DropTableRMAction = STMPaxos2PCRMAction; 91 | 92 | impl STMPaxos2PCRMInner for DropTableRMInner { 93 | fn new( 94 | ctx: &mut TabletContext, 95 | io_ctx: &mut IO, 96 | _: DropTablePrepare, 97 | ) -> DropTableRMInner { 98 | // Construct the `preparing_timestamp` 99 | let mut timestamp = cur_timestamp(io_ctx, ctx.tablet_config.timestamp_suffix_divisor); 100 | timestamp = max(timestamp, ctx.table_schema.val_cols.get_latest_lat()); 101 | timestamp = max(timestamp, ctx.presence_timestamp.clone()); 102 | for (_, req) in ctx.waiting_locked_cols.iter().chain(ctx.inserting_locked_cols.iter()) { 103 | timestamp = max(timestamp, req.timestamp.clone()); 104 | } 105 | timestamp = timestamp.add(mk_t(1)); 106 | 107 | DropTableRMInner { prepared_timestamp: timestamp } 108 | } 109 | 110 | fn new_follower( 111 | _: &mut TabletContext, 112 | _: &mut IO, 113 | payload: DropTableRMPrepared, 114 | ) -> DropTableRMInner { 115 | DropTableRMInner { prepared_timestamp: payload.timestamp } 116 | } 117 | 118 | fn mk_closed() -> DropTableClosed { 119 | DropTableClosed {} 120 | } 121 | 122 | fn mk_prepared_plm( 123 | &mut self, 124 | ctx: &mut TabletContext, 125 | _: &mut IO, 126 | ) -> Option { 127 | if ctx.pause_ddl() { 128 | None 129 | } else { 130 | Some(DropTableRMPrepared { timestamp: self.prepared_timestamp.clone() }) 131 | } 132 | } 133 | 134 | fn prepared_plm_inserted( 135 | &mut self, 136 | _: &mut TabletContext, 137 | _: &mut IO, 138 | ) -> DropTablePrepared { 139 | DropTablePrepared { timestamp: self.prepared_timestamp.clone() } 140 | } 141 | 142 | fn mk_committed_plm( 143 | &mut self, 144 | _: &mut TabletContext, 145 | _: &mut IO, 146 | commit: &DropTableCommit, 147 | ) -> DropTableRMCommitted { 148 | DropTableRMCommitted { timestamp: commit.timestamp.clone() } 149 | } 150 | 151 | /// Apply the `alter_op` to this Tablet's `table_schema`. 152 | fn committed_plm_inserted( 153 | &mut self, 154 | _: &mut TabletContext, 155 | _: &mut IO, 156 | committed_plm: &RMCommittedPLm, 157 | ) -> Timestamp { 158 | committed_plm.payload.timestamp.clone() 159 | } 160 | 161 | fn mk_aborted_plm( 162 | &mut self, 163 | _: &mut TabletContext, 164 | _: &mut IO, 165 | ) -> DropTableRMAborted { 166 | DropTableRMAborted {} 167 | } 168 | 169 | fn aborted_plm_inserted(&mut self, _: &mut TabletContext, _: &mut IO) {} 170 | 171 | fn reconfig_snapshot(&self) -> DropTableRMInner { 172 | self.clone() 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/ms_table_read_es.rs: -------------------------------------------------------------------------------- 1 | use crate::col_usage::{col_collecting_cb, QueryIterator}; 2 | use crate::common::{mk_qid, ColName, CoreIOCtx, OrigP, QueryESResult, WriteRegion}; 3 | use crate::common::{ 4 | ColType, ColVal, ColValN, ContextRow, PrimaryKey, QueryId, TablePath, TableView, TransTableName, 5 | }; 6 | use crate::expression::{is_true, EvalError}; 7 | use crate::gr_query_es::{GRQueryConstructorView, GRQueryES}; 8 | use crate::message as msg; 9 | use crate::ms_table_es::{GeneralQueryES, MSTableES, SqlQueryInner}; 10 | use crate::server::{mk_eval_error, ContextConstructor}; 11 | use crate::sql_ast::proc; 12 | 13 | use crate::storage::{GenericTable, MSStorageView}; 14 | use crate::table_read_es::{compute_read_region, fully_evaluate_select}; 15 | use crate::tablet::{ 16 | compute_subqueries, MSQueryES, RequestedReadProtected, StorageLocalTable, TPESAction, 17 | TabletContext, 18 | }; 19 | use std::collections::BTreeSet; 20 | use std::iter::FromIterator; 21 | use std::ops::Deref; 22 | 23 | // ----------------------------------------------------------------------------------------------- 24 | // MSTableReadES 25 | // ----------------------------------------------------------------------------------------------- 26 | 27 | pub type MSTableReadES = MSTableES; 28 | 29 | #[derive(Debug)] 30 | pub struct SelectInner { 31 | sql_query: proc::TableSelect, 32 | } 33 | 34 | impl SelectInner { 35 | pub fn new(sql_query: proc::TableSelect) -> Self { 36 | SelectInner { sql_query } 37 | } 38 | } 39 | 40 | impl SqlQueryInner for SelectInner { 41 | /// This function shouly only be called if we know `from` is not a `JoinNode`. 42 | fn table_path(&self) -> &TablePath { 43 | &self.sql_query.from.table_path 44 | } 45 | 46 | fn request_region_locks( 47 | &mut self, 48 | ctx: &mut TabletContext, 49 | io_ctx: &mut IO, 50 | es: &GeneralQueryES, 51 | ) -> Result { 52 | // Get extra columns that must be in the region due to SELECT * . 53 | let mut extra_cols = Vec::::new(); 54 | for item in &self.sql_query.projection { 55 | match item { 56 | proc::SelectItem::ExprWithAlias { .. } => {} 57 | proc::SelectItem::Wildcard { .. } => { 58 | // Choose all columns in the Table, and break out early 59 | // since there is no reason to continue. 60 | extra_cols = ctx.table_schema.get_schema_val_cols_static(&es.timestamp); 61 | break; 62 | } 63 | } 64 | } 65 | 66 | // Collect all `ColNames` of this table that all `ColumnRefs` refer to. 67 | let mut safe_present_cols = Vec::::new(); 68 | QueryIterator::new().iterate_table_select( 69 | &mut col_collecting_cb(&self.sql_query.from.alias, &mut safe_present_cols), 70 | &self.sql_query, 71 | ); 72 | 73 | // Compute the ReadRegion 74 | let read_region = compute_read_region( 75 | &ctx.table_schema.key_cols, 76 | &ctx.this_tablet_key_range, 77 | &es.context, 78 | &self.sql_query.selection, 79 | &self.sql_query.from.alias, 80 | safe_present_cols, 81 | extra_cols, 82 | ); 83 | 84 | // Move the MSTableReadES to the Pending state with the given ReadRegion. 85 | let protect_qid = mk_qid(io_ctx.rand()); 86 | 87 | // Add a ReadRegion to the m_waiting_read_protected. 88 | let verifying = ctx.verifying_writes.get_mut(&es.timestamp).unwrap(); 89 | verifying.m_waiting_read_protected.insert(RequestedReadProtected { 90 | orig_p: OrigP::new(es.query_id.clone()), 91 | query_id: protect_qid.clone(), 92 | read_region, 93 | }); 94 | 95 | Ok(protect_qid) 96 | } 97 | 98 | fn compute_subqueries( 99 | &mut self, 100 | ctx: &mut TabletContext, 101 | io_ctx: &mut IO, 102 | es: &GeneralQueryES, 103 | ms_query_es: &mut MSQueryES, 104 | ) -> Vec { 105 | compute_subqueries( 106 | GRQueryConstructorView { 107 | root_query_path: &es.root_query_path, 108 | timestamp: &es.timestamp, 109 | sql_query: &self.sql_query, 110 | query_plan: &es.query_plan, 111 | query_id: &es.query_id, 112 | context: &es.context, 113 | }, 114 | io_ctx.rand(), 115 | StorageLocalTable::new( 116 | &ctx.table_schema, 117 | &es.timestamp, 118 | &self.sql_query.from, 119 | &ctx.this_tablet_key_range, 120 | &self.sql_query.selection, 121 | MSStorageView::new( 122 | &ctx.storage, 123 | &ctx.table_schema, 124 | &ms_query_es.update_views, 125 | es.tier.clone(), 126 | ), 127 | ), 128 | ) 129 | } 130 | 131 | fn finish( 132 | &mut self, 133 | ctx: &mut TabletContext, 134 | _: &mut IO, 135 | es: &GeneralQueryES, 136 | (children, subquery_results): ( 137 | Vec<(Vec, Vec)>, 138 | Vec>, 139 | ), 140 | ms_query_es: &mut MSQueryES, 141 | ) -> Option { 142 | // Create the ContextConstructor. 143 | let context_constructor = ContextConstructor::new( 144 | es.context.context_schema.clone(), 145 | StorageLocalTable::new( 146 | &ctx.table_schema, 147 | &es.timestamp, 148 | &self.sql_query.from, 149 | &ctx.this_tablet_key_range, 150 | &self.sql_query.selection, 151 | MSStorageView::new( 152 | &ctx.storage, 153 | &ctx.table_schema, 154 | &ms_query_es.update_views, 155 | es.tier.clone(), 156 | ), 157 | ), 158 | children, 159 | ); 160 | 161 | // Evaluate 162 | let eval_res = fully_evaluate_select( 163 | context_constructor, 164 | &es.context.deref(), 165 | subquery_results, 166 | &self.sql_query, 167 | ); 168 | 169 | match eval_res { 170 | Ok(res_table_views) => { 171 | // Signal Success and return the data. 172 | Some(TPESAction::Success(QueryESResult { 173 | result: res_table_views, 174 | new_rms: es.new_rms.iter().cloned().collect(), 175 | })) 176 | } 177 | Err(eval_error) => Some(TPESAction::QueryError(mk_eval_error(eval_error))), 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /src/alter_table_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::alter_table_tm_es::{ 2 | AlterTableClosed, AlterTableCommit, AlterTablePrepare, AlterTablePrepared, 3 | AlterTableTMPayloadTypes, 4 | }; 5 | use crate::common::TNodePath; 6 | use crate::common::{cur_timestamp, mk_t, BasicIOCtx, Timestamp}; 7 | use crate::message as msg; 8 | use crate::server::ServerContextBase; 9 | use crate::sql_ast::proc; 10 | use crate::stmpaxos2pc_rm::{ 11 | RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner, 12 | STMPaxos2PCRMOuter, 13 | }; 14 | use crate::stmpaxos2pc_tm::TMMessage; 15 | use crate::tablet::{TabletContext, TabletPLm}; 16 | use serde::{Deserialize, Serialize}; 17 | use std::cmp::max; 18 | 19 | // ----------------------------------------------------------------------------------------------- 20 | // Payloads 21 | // ----------------------------------------------------------------------------------------------- 22 | 23 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 24 | pub struct AlterTableRMPayloadTypes {} 25 | 26 | impl RMPayloadTypes for AlterTableRMPayloadTypes { 27 | type TM = AlterTableTMPayloadTypes; 28 | type RMContext = TabletContext; 29 | 30 | // Actions 31 | type RMCommitActionData = (); 32 | 33 | // RM PLm 34 | type RMPreparedPLm = AlterTableRMPrepared; 35 | type RMCommittedPLm = AlterTableRMCommitted; 36 | type RMAbortedPLm = AlterTableRMAborted; 37 | } 38 | 39 | // RM PLm 40 | 41 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 42 | pub struct AlterTableRMPrepared { 43 | pub alter_op: proc::AlterOp, 44 | pub timestamp: Timestamp, 45 | } 46 | 47 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 48 | pub struct AlterTableRMCommitted { 49 | pub timestamp: Timestamp, 50 | } 51 | 52 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 53 | pub struct AlterTableRMAborted {} 54 | 55 | // ----------------------------------------------------------------------------------------------- 56 | // RMServerContext AlterTable 57 | // ----------------------------------------------------------------------------------------------- 58 | 59 | impl RMServerContext for TabletContext { 60 | fn push_plm(&mut self, plm: RMPLm) { 61 | self.tablet_bundle.push(TabletPLm::AlterTable(plm)); 62 | } 63 | 64 | fn send_to_tm( 65 | &mut self, 66 | io_ctx: &mut IO, 67 | _: &(), 68 | msg: TMMessage, 69 | ) { 70 | self.send_to_master(io_ctx, msg::MasterRemotePayload::AlterTable(msg)); 71 | } 72 | 73 | fn mk_node_path(&self) -> TNodePath { 74 | TabletContext::mk_node_path(self) 75 | } 76 | 77 | fn is_leader(&self) -> bool { 78 | TabletContext::is_leader(self) 79 | } 80 | } 81 | 82 | // ----------------------------------------------------------------------------------------------- 83 | // AlterTableES Implementation 84 | // ----------------------------------------------------------------------------------------------- 85 | 86 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 87 | pub struct AlterTableRMInner { 88 | pub alter_op: proc::AlterOp, 89 | pub prepared_timestamp: Timestamp, 90 | } 91 | 92 | pub type AlterTableRMES = STMPaxos2PCRMOuter; 93 | pub type AlterTableRMAction = STMPaxos2PCRMAction; 94 | 95 | impl STMPaxos2PCRMInner for AlterTableRMInner { 96 | fn new( 97 | ctx: &mut TabletContext, 98 | io_ctx: &mut IO, 99 | payload: AlterTablePrepare, 100 | ) -> AlterTableRMInner { 101 | // Construct the `preparing_timestamp` 102 | let mut timestamp = cur_timestamp(io_ctx, ctx.tablet_config.timestamp_suffix_divisor); 103 | let col_name = &payload.alter_op.col_name; 104 | timestamp = max(timestamp, ctx.table_schema.val_cols.get_lat(col_name)); 105 | for (_, req) in ctx.waiting_locked_cols.iter().chain(ctx.inserting_locked_cols.iter()) { 106 | if req.cols.contains(col_name) { 107 | timestamp = max(timestamp, req.timestamp.clone()); 108 | } 109 | } 110 | timestamp = timestamp.add(mk_t(1)); 111 | 112 | AlterTableRMInner { alter_op: payload.alter_op, prepared_timestamp: timestamp } 113 | } 114 | 115 | fn new_follower( 116 | _: &mut TabletContext, 117 | _: &mut IO, 118 | payload: AlterTableRMPrepared, 119 | ) -> AlterTableRMInner { 120 | AlterTableRMInner { alter_op: payload.alter_op, prepared_timestamp: payload.timestamp } 121 | } 122 | 123 | fn mk_closed() -> AlterTableClosed { 124 | AlterTableClosed {} 125 | } 126 | 127 | fn mk_prepared_plm( 128 | &mut self, 129 | ctx: &mut TabletContext, 130 | _: &mut IO, 131 | ) -> Option { 132 | if ctx.pause_ddl() { 133 | None 134 | } else { 135 | Some(AlterTableRMPrepared { 136 | alter_op: self.alter_op.clone(), 137 | timestamp: self.prepared_timestamp.clone(), 138 | }) 139 | } 140 | } 141 | 142 | fn prepared_plm_inserted( 143 | &mut self, 144 | _: &mut TabletContext, 145 | _: &mut IO, 146 | ) -> AlterTablePrepared { 147 | AlterTablePrepared { timestamp: self.prepared_timestamp.clone() } 148 | } 149 | 150 | fn mk_committed_plm( 151 | &mut self, 152 | _: &mut TabletContext, 153 | _: &mut IO, 154 | commit: &AlterTableCommit, 155 | ) -> AlterTableRMCommitted { 156 | AlterTableRMCommitted { timestamp: commit.timestamp.clone() } 157 | } 158 | 159 | /// Apply the `alter_op` to this Tablet's `table_schema`. 160 | fn committed_plm_inserted( 161 | &mut self, 162 | ctx: &mut TabletContext, 163 | _: &mut IO, 164 | committed_plm: &RMCommittedPLm, 165 | ) { 166 | ctx.table_schema.val_cols.write( 167 | &self.alter_op.col_name, 168 | self.alter_op.maybe_col_type.clone(), 169 | committed_plm.payload.timestamp.clone(), 170 | ); 171 | } 172 | 173 | fn mk_aborted_plm( 174 | &mut self, 175 | _: &mut TabletContext, 176 | _: &mut IO, 177 | ) -> AlterTableRMAborted { 178 | AlterTableRMAborted {} 179 | } 180 | 181 | fn aborted_plm_inserted(&mut self, _: &mut TabletContext, _: &mut IO) {} 182 | 183 | fn reconfig_snapshot(&self) -> AlterTableRMInner { 184 | self.clone() 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/tests_paxos2pc.rs: -------------------------------------------------------------------------------- 1 | use crate::message as msg; 2 | use crate::simple_tm_es::SimplePayloadTypes; 3 | use crate::simulation::Simulation; 4 | use crate::slave::SlavePLm; 5 | use rand::{RngCore, SeedableRng}; 6 | use rand_xorshift::XorShiftRng; 7 | use runiversal::common::mk_qid; 8 | use runiversal::common::{EndpointId, SlaveGroupId}; 9 | use runiversal::paxos2pc_tm::RMPLm; 10 | use runiversal::simulation_utils::{mk_client_eid, mk_slave_eid}; 11 | use runiversal::slave::SlaveConfig; 12 | use runiversal::test_utils::mk_sid; 13 | use std::collections::BTreeMap; 14 | 15 | enum CompletionResult { 16 | Invalid, 17 | SuccessfullyCommitted, 18 | SuccessfullyAborted, 19 | SuccessfullyTrivial, 20 | } 21 | 22 | /// This checks for 2PC Completion. Recall that 2PC Completion is where every 23 | /// RM either Commits or Aborts. 24 | fn check_completion(sim: &Simulation, rms: &Vec) -> CompletionResult { 25 | let mut rms_plms = BTreeMap::>>::new(); 26 | 27 | // Add RMPLms 28 | for rm in rms { 29 | rms_plms.insert(rm.clone(), vec![]); 30 | for pl_entry in sim.global_pls.get(rm).unwrap() { 31 | if let msg::PLEntry::Bundle(bundle) = pl_entry { 32 | for plm in &bundle.plms { 33 | if let SlavePLm::SimpleRM(rm_plm) = plm { 34 | rms_plms.get_mut(rm).unwrap().push(rm_plm.clone()); 35 | } 36 | } 37 | } 38 | } 39 | } 40 | 41 | // Check if the execution was trivial 42 | let mut is_trivial = true; 43 | for (_, rm_plms) in &rms_plms { 44 | match rm_plms[..] { 45 | [] => continue, 46 | _ => { 47 | is_trivial = false; 48 | break; 49 | } 50 | } 51 | } 52 | if is_trivial { 53 | return CompletionResult::SuccessfullyTrivial; 54 | } 55 | 56 | // Check if the execution committed 57 | let mut did_commit = true; 58 | for (_, rm_plms) in &rms_plms { 59 | match rm_plms[..] { 60 | [RMPLm::Prepared(_), RMPLm::Committed(_)] => continue, 61 | _ => { 62 | did_commit = false; 63 | break; 64 | } 65 | } 66 | } 67 | if did_commit { 68 | return CompletionResult::SuccessfullyCommitted; 69 | } 70 | 71 | // Check if the execution aborted 72 | let mut did_abort = true; 73 | for (_, rm_plms) in &rms_plms { 74 | match rm_plms[..] { 75 | [] => continue, 76 | [RMPLm::Prepared(_), RMPLm::Aborted(_)] => continue, 77 | _ => { 78 | did_abort = false; 79 | break; 80 | } 81 | } 82 | } 83 | if did_abort { 84 | return CompletionResult::SuccessfullyAborted; 85 | } 86 | 87 | // Otherwise, this was an invalid execution. 88 | return CompletionResult::Invalid; 89 | } 90 | 91 | pub fn test_single(test_num: u32, seed: [u8; 16]) { 92 | // Setup Simulation 93 | 94 | // Create 5 SlaveGroups, each with 3 nodes. 95 | const NUM_PAXOS_GROUPS: u32 = 5; 96 | const NUM_PAXOS_NODES: u32 = 3; 97 | let mut slave_address_config = BTreeMap::>::new(); 98 | for i in 0..NUM_PAXOS_GROUPS { 99 | let mut eids = Vec::::new(); 100 | for j in 0..NUM_PAXOS_NODES { 101 | eids.push(mk_slave_eid(i * NUM_PAXOS_NODES + j)); 102 | } 103 | slave_address_config.insert(SlaveGroupId(format!("s{}", i)), eids); 104 | } 105 | 106 | let client_eid = mk_client_eid(0); 107 | 108 | let slave_config = SlaveConfig { 109 | timestamp_suffix_divisor: 1, 110 | remote_leader_changed_period_ms: 5, 111 | // The below are not needed 112 | failure_detector_period_ms: 0, 113 | check_unconfirmed_eids_period_ms: 0, 114 | }; 115 | let mut sim = Simulation::new(seed, 1, slave_config, slave_address_config.clone()); 116 | 117 | // Run the simulation to warm it up. Activity here consists of Leadership changes, 118 | // Gossip, Paxos Insertions, etc. 119 | sim.simulate_n_ms(100); 120 | 121 | // Randomly construct a SimpleRequest and send it to a random Slave 122 | // to perform Simple Paxos2PC. 123 | 124 | // Take s0 to be the TM. 125 | let tm = mk_sid("s0"); 126 | let tm_eid = sim.leader_map.get(&tm).unwrap().eid.clone(); 127 | 128 | // Randomly chose RMs, where none of them are the TM. 129 | // Recall that Paxos2PC requires at least one. 130 | let num_rms = (sim.rand.next_u32() % (NUM_PAXOS_GROUPS - 1)) + 1; 131 | let mut all_slaves: Vec = slave_address_config.keys().cloned().collect(); 132 | all_slaves.remove(all_slaves.iter().position(|i| i == &tm).unwrap()); 133 | let mut rms = Vec::::new(); 134 | for _ in 0..num_rms { 135 | let r = sim.rand.next_u32() % all_slaves.len() as u32; 136 | rms.push(all_slaves.remove(r as usize)); 137 | } 138 | 139 | let request = msg::SimpleRequest { query_id: mk_qid(&mut sim.rand), rms: rms.clone() }; 140 | sim.add_msg( 141 | msg::NetworkMessage::Slave(msg::SlaveMessage::ExternalMessage( 142 | msg::ExternalMessage::SimpleRequest(request), 143 | )), 144 | &client_eid, 145 | &tm_eid, 146 | ); 147 | 148 | /// The number of iterations we simulate for, where we check 2PC 149 | /// consistency after each iteration. 150 | const NUM_CONSISTENCY_ITERATIONS: u32 = 5; 151 | /// Number of iterations per iteration. 152 | const MS_PER_ITERATION: u32 = 5; 153 | 154 | // Continue simulating, checking 2PC Consistency after each round 155 | sim.simulate_n_ms(NUM_CONSISTENCY_ITERATIONS * MS_PER_ITERATION); 156 | 157 | // Finally, run the Simulation in Cooldown Mode and test for Paxos2PC 158 | // completion at end. "Cooldown Mode" is defined to be where no Leadership changes occur. 159 | sim.sim_params.pl_entry_delivery_prob = 70; 160 | sim.sim_params.global_pl_insertion_prob = 30; 161 | 162 | /// Here, "cooldown ms" are the number of milliseconds that we expect the Paxos2PC to finish, 163 | /// given that no leadership changes happen during this time. Although this can be calculated, 164 | /// we simply guess a sensible number for expedience. 165 | const EXPECTED_COOLDOWN_MS: u32 = 500; 166 | 167 | sim.simulate_n_ms(EXPECTED_COOLDOWN_MS); 168 | 169 | match check_completion(&mut sim, &rms) { 170 | CompletionResult::Invalid => { 171 | println!( 172 | "{:?}. Paxos2PC Test Failed: Invalid PLs after cooldown. Seed: {:?}", 173 | test_num, seed 174 | ); 175 | panic!() 176 | } 177 | CompletionResult::SuccessfullyCommitted => { 178 | println!("{:?}. Paxos2PC SuccessfullyCommitted!", test_num); 179 | } 180 | CompletionResult::SuccessfullyAborted => { 181 | println!("{:?}. Paxos2PC SuccessfullyAborted!", test_num); 182 | } 183 | CompletionResult::SuccessfullyTrivial => { 184 | println!("{:?}. Paxos2PC SuccessfullyTrivial!", test_num); 185 | } 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/network_driver.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{EndpointId, Gen, LeadershipId, PaxosGroupId}; 2 | use crate::common::{LeaderMap, RemoteLeaderChangedPLm, VersionedValue}; 3 | use crate::message as msg; 4 | use std::collections::BTreeMap; 5 | 6 | pub struct NetworkDriverContext<'a> { 7 | pub this_gid: &'a PaxosGroupId, 8 | pub this_eid: &'a EndpointId, 9 | pub leader_map: &'a VersionedValue, 10 | pub remote_leader_changes: &'a mut Vec, 11 | } 12 | 13 | // TODO: amend the proof for when PaxosGroupIds get removed. It can go something like: 14 | // "Safety: a `remote_message` with PaxosGroupId outside of `network_buffer` will never 15 | // come in unless `leader_map` comes in containing the new PaxosGroupIds. Liveness: a 16 | // buffered messages, either `deliver_blocked_messages` is called with high enough `lid`, 17 | // or the `gid` is removed. 18 | 19 | #[derive(Debug)] 20 | pub struct NetworkDriver { 21 | /// This buffers the NetworkMessages until the corresponding `LeadershipId` in the 22 | /// `LeaderMap` is sufficiently high enough. Some properties: 23 | /// 1. All `RemoteMessage`s for a given `PaxosGroupId` have the same `from_lid`. 24 | network_buffer: BTreeMap>>, 25 | /// The `Gen` of the `ctx.leader_map` that `network_buffer` corresponds to. 26 | gen: Gen, 27 | } 28 | 29 | impl NetworkDriver { 30 | pub fn new(leader_map: &VersionedValue) -> NetworkDriver { 31 | let mut network_buffer = BTreeMap::>>::new(); 32 | for (gid, _) in leader_map.value() { 33 | network_buffer.insert(gid.clone(), Vec::new()); 34 | } 35 | 36 | NetworkDriver { network_buffer, gen: leader_map.gen().clone() } 37 | } 38 | 39 | /// The precondition is that the `remote_message` is always from a `PaxosGroupId` that 40 | /// is in the `leader_map` in `ctx`. 41 | pub fn receive( 42 | &mut self, 43 | ctx: NetworkDriverContext, 44 | remote_message: msg::RemoteMessage, 45 | ) -> Option { 46 | // Update `network_buffer` if the LeaderMap has since been updated. 47 | if &self.gen < ctx.leader_map.gen() { 48 | self.gen = ctx.leader_map.gen().clone(); 49 | 50 | // Add new PaxosGroupIds 51 | for (gid, _) in ctx.leader_map.value() { 52 | if !self.network_buffer.contains_key(gid) { 53 | self.network_buffer.insert(gid.clone(), vec![]); 54 | } 55 | } 56 | 57 | // Remove old PaxosGroupIds 58 | let mut removed_gids = Vec::::new(); 59 | for (gid, _) in &self.network_buffer { 60 | if !ctx.leader_map.value().contains_key(gid) { 61 | removed_gids.push(gid.clone()); 62 | } 63 | } 64 | for gid in removed_gids { 65 | self.network_buffer.remove(&gid); 66 | } 67 | } 68 | 69 | let this_gid = &ctx.this_gid; 70 | let this_lid = ctx.leader_map.value().get(&this_gid).unwrap(); 71 | 72 | // A node only gets to this code if it is the Leader. 73 | debug_assert!(&this_lid.eid == ctx.this_eid); 74 | // Messages should not misrouted. 75 | debug_assert!(remote_message.to_lid.eid == this_lid.eid); 76 | // Messages should not be routed here ahead of this node knowing it is the Leader. 77 | debug_assert!(remote_message.to_lid.gen <= this_lid.gen); 78 | 79 | // Drop the RemoteMessage if it was destined to an older generation. 80 | if remote_message.to_lid.gen < this_lid.gen { 81 | return None; 82 | } 83 | 84 | // This assertion follows immediately from the above. 85 | debug_assert!(remote_message.to_lid.gen == this_lid.gen); 86 | 87 | let from_gid = remote_message.from_gid.clone(); 88 | let from_lid = remote_message.from_lid.clone(); 89 | let buffer = self.network_buffer.get_mut(&from_gid).unwrap(); 90 | if !buffer.is_empty() { 91 | // This means there are already messages from a new remote Leader. 92 | let new_from_lid = &buffer.get(0).unwrap().from_lid; 93 | if from_lid.gen < new_from_lid.gen { 94 | // The Leadership of the new message is too old, so we drop it. 95 | None 96 | } else if from_lid.gen == new_from_lid.gen { 97 | // The Leadership of the new message is the same as the other new messages, so we push. 98 | buffer.push(remote_message); 99 | None 100 | } else { 101 | // The Leadership of the new message is even newer, so we replace. 102 | buffer.clear(); 103 | buffer.push(remote_message); 104 | // We also add a new RemoteLeaderChanged PLm to be inserted. 105 | ctx.remote_leader_changes.push(RemoteLeaderChangedPLm { gid: from_gid, lid: from_lid }); 106 | None 107 | } 108 | } else { 109 | let cur_from_lid = ctx.leader_map.value().get(&from_gid).unwrap(); 110 | if from_lid.gen < cur_from_lid.gen { 111 | // The Leadership of the new message is old, so we drop it. 112 | None 113 | } else if from_lid.gen == cur_from_lid.gen { 114 | // The Leadership of the new message is current, so we Deliver the message 115 | Some(remote_message.payload) 116 | } else { 117 | // The Leadership of the new message is new, so we buffer it. 118 | buffer.push(remote_message); 119 | // We also add a new RemoteLeaderChanged PLm to be inserted. 120 | ctx.remote_leader_changes.push(RemoteLeaderChangedPLm { gid: from_gid, lid: from_lid }); 121 | None 122 | } 123 | } 124 | } 125 | 126 | /// This is called a `RemoteLeaderChangedPLm` is inserted. 127 | pub fn deliver_blocked_messages( 128 | &mut self, 129 | from_gid: PaxosGroupId, 130 | from_lid: LeadershipId, 131 | ) -> Vec { 132 | if let Some(buffer) = self.network_buffer.get_mut(&from_gid) { 133 | if !buffer.is_empty() { 134 | // Recall that the `from_lid.gen` of all bufferred messages should be the same. 135 | let new_from_lid = &buffer.get(0).unwrap().from_lid; 136 | if from_lid.gen > new_from_lid.gen { 137 | // Here, the new RemoteLeaderChangedPLm is beyond all buffered messages, so we drop them. 138 | buffer.clear(); 139 | Vec::new() 140 | } else if from_lid.gen == new_from_lid.gen { 141 | // Deliver all messages from the buffer. 142 | let remote_messages = std::mem::replace(buffer, Vec::new()); 143 | remote_messages.into_iter().map(|m| m.payload).collect() 144 | } else { 145 | // Here, the newly inserted RemoteLeaderChangedPLm will have no affect. Note that from 146 | // `recieve`, an appropriate one is still scheduled for insertion. 147 | Vec::new() 148 | } 149 | } else { 150 | Vec::new() 151 | } 152 | } else { 153 | Vec::new() 154 | } 155 | } 156 | 157 | // Here, we just clear the NetworkBuffer. 158 | pub fn leader_changed(&mut self) { 159 | for (_, buffer) in &mut self.network_buffer { 160 | buffer.clear(); 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/tests_stmpaxos2pc.rs: -------------------------------------------------------------------------------- 1 | use crate::message as msg; 2 | use crate::simulation::Simulation; 3 | use crate::slave::SlavePLm; 4 | use crate::stm_simple_rm_es::STMSimpleRMPayloadTypes; 5 | use crate::stm_simple_tm_es::STMSimpleTMPayloadTypes; 6 | use rand::{RngCore, SeedableRng}; 7 | use rand_xorshift::XorShiftRng; 8 | use runiversal::common::mk_qid; 9 | use runiversal::common::{EndpointId, SlaveGroupId}; 10 | use runiversal::simulation_utils::{mk_client_eid, mk_slave_eid}; 11 | use runiversal::slave::SlaveConfig; 12 | use runiversal::stmpaxos2pc_rm::RMPLm; 13 | use runiversal::stmpaxos2pc_tm::TMPLm; 14 | use runiversal::test_utils::mk_sid; 15 | use std::collections::BTreeMap; 16 | 17 | enum CompletionResult { 18 | Invalid, 19 | SuccessfullyCommitted, 20 | SuccessfullyAborted, 21 | SuccessfullyTrivial, 22 | } 23 | 24 | /// This checks for 2PC Completion. Recall that 2PC Completion is where every 25 | /// RM either Commits or Aborts. 26 | fn check_completion( 27 | sim: &Simulation, 28 | rms: &Vec, 29 | tm: &SlaveGroupId, 30 | ) -> CompletionResult { 31 | let mut tm_plms = Vec::>::new(); 32 | let mut rms_plms = BTreeMap::>>::new(); 33 | 34 | // Add TMPLms 35 | for pl_entry in sim.global_pls.get(tm).unwrap() { 36 | if let msg::PLEntry::Bundle(bundle) = pl_entry { 37 | for plm in &bundle.plms { 38 | if let SlavePLm::SimpleSTMTM(tm_plm) = plm { 39 | tm_plms.push(tm_plm.clone()); 40 | } 41 | } 42 | } 43 | } 44 | 45 | // Add RMPLms 46 | for rm in rms { 47 | rms_plms.insert(rm.clone(), vec![]); 48 | for pl_entry in sim.global_pls.get(rm).unwrap() { 49 | if let msg::PLEntry::Bundle(bundle) = pl_entry { 50 | for plm in &bundle.plms { 51 | if let SlavePLm::SimpleSTMRM(rm_plm) = plm { 52 | rms_plms.get_mut(rm).unwrap().push(rm_plm.clone()); 53 | } 54 | } 55 | } 56 | } 57 | } 58 | 59 | // For every valid value of `tm_plms`, we verify that all `rms_plms` are as expected. 60 | match tm_plms[..] { 61 | [TMPLm::Prepared(_), TMPLm::Committed(_), TMPLm::Closed(_)] => { 62 | for (_, rm_plms) in &rms_plms { 63 | match rm_plms[..] { 64 | [RMPLm::Prepared(_), RMPLm::Committed(_)] => continue, 65 | _ => return CompletionResult::Invalid, 66 | } 67 | } 68 | CompletionResult::SuccessfullyCommitted 69 | } 70 | [TMPLm::Prepared(_), TMPLm::Aborted(_), TMPLm::Closed(_)] => { 71 | for (_, rm_plms) in &rms_plms { 72 | match rm_plms[..] { 73 | [] | [RMPLm::Prepared(_), RMPLm::Aborted(_)] => continue, 74 | _ => return CompletionResult::Invalid, 75 | } 76 | } 77 | CompletionResult::SuccessfullyAborted 78 | } 79 | [] => { 80 | for (_, rm_plms) in &rms_plms { 81 | match rm_plms[..] { 82 | [] => continue, 83 | _ => return CompletionResult::Invalid, 84 | } 85 | } 86 | CompletionResult::SuccessfullyTrivial 87 | } 88 | _ => CompletionResult::Invalid, 89 | } 90 | } 91 | 92 | pub fn test_single(test_num: u32, seed: [u8; 16]) { 93 | // Setup Simulation 94 | 95 | // Create 5 SlaveGroups, each with 3 nodes. 96 | const NUM_PAXOS_GROUPS: u32 = 5; 97 | const NUM_PAXOS_NODES: u32 = 3; 98 | let mut slave_address_config = BTreeMap::>::new(); 99 | for i in 0..NUM_PAXOS_GROUPS { 100 | let mut eids = Vec::::new(); 101 | for j in 0..NUM_PAXOS_NODES { 102 | eids.push(mk_slave_eid(i * NUM_PAXOS_NODES + j)); 103 | } 104 | slave_address_config.insert(SlaveGroupId(format!("s{}", i)), eids); 105 | } 106 | 107 | let client_eid = mk_client_eid(0); 108 | 109 | let slave_config = SlaveConfig { 110 | timestamp_suffix_divisor: 1, 111 | remote_leader_changed_period_ms: 5, 112 | // The below are not needed 113 | failure_detector_period_ms: 0, 114 | check_unconfirmed_eids_period_ms: 0, 115 | }; 116 | let mut sim = Simulation::new(seed, 1, slave_config, slave_address_config.clone()); 117 | 118 | // Run the simulation to warm it up. Activity here consists of Leadership changes, 119 | // Gossip, Paxos Insertions, etc. 120 | sim.simulate_n_ms(100); 121 | 122 | // Randomly construct a STMSimpleRequest and send it to a random Slave 123 | // to perform Simple STMPaxos2PC. 124 | 125 | // Take s0 to be the TM. 126 | let tm = mk_sid("s0"); 127 | let tm_eid = sim.leader_map.get(&tm).unwrap().eid.clone(); 128 | 129 | // Randomly chose RMs, where none of them are the TM. 130 | // Recall that STMPaxos2PC requires at least one. 131 | let num_rms = (sim.rand.next_u32() % (NUM_PAXOS_GROUPS - 1)) + 1; 132 | let mut all_slaves: Vec = slave_address_config.keys().cloned().collect(); 133 | all_slaves.remove(all_slaves.iter().position(|i| i == &tm).unwrap()); 134 | let mut rms = Vec::::new(); 135 | for _ in 0..num_rms { 136 | let r = sim.rand.next_u32() % all_slaves.len() as u32; 137 | rms.push(all_slaves.remove(r as usize)); 138 | } 139 | 140 | let request = msg::STMSimpleRequest { query_id: mk_qid(&mut sim.rand), rms: rms.clone() }; 141 | sim.add_msg( 142 | msg::NetworkMessage::Slave(msg::SlaveMessage::ExternalMessage( 143 | msg::ExternalMessage::STMSimpleRequest(request), 144 | )), 145 | &client_eid, 146 | &tm_eid, 147 | ); 148 | 149 | /// The number of iterations we simulate for, where we check 2PC 150 | /// consistency after each iteration. 151 | const NUM_CONSISTENCY_ITERATIONS: u32 = 5; 152 | /// Number of iterations per iteration. 153 | const MS_PER_ITERATION: u32 = 5; 154 | 155 | // Continue simulating, checking 2PC Consistency after each round 156 | sim.simulate_n_ms(NUM_CONSISTENCY_ITERATIONS * MS_PER_ITERATION); 157 | 158 | // Finally, run the Simulation in Cooldown Mode and test for STMPaxos2PC 159 | // completion at end. "Cooldown Mode" is defined to be where no Leadership changes occur. 160 | sim.sim_params.pl_entry_delivery_prob = 70; 161 | sim.sim_params.global_pl_insertion_prob = 30; 162 | 163 | /// Here, "cooldown ms" are the number of milliseconds that we expect the STMPaxos2PC to finish, 164 | /// given that no leadership changes happen during this time. Although this can be calculated, 165 | /// we simply guess a sensible number for expedience. 166 | const EXPECTED_COOLDOWN_MS: u32 = 500; 167 | 168 | sim.simulate_n_ms(EXPECTED_COOLDOWN_MS); 169 | 170 | match check_completion(&mut sim, &rms, &tm) { 171 | CompletionResult::Invalid => { 172 | println!( 173 | "{:?}. STMPaxos2PC Test Failed: Invalid PLs after cooldown. Seed: {:?}", 174 | test_num, seed 175 | ); 176 | panic!() 177 | } 178 | CompletionResult::SuccessfullyCommitted => { 179 | println!("{:?}. STMPaxos2PC SuccessfullyCommitted!", test_num); 180 | } 181 | CompletionResult::SuccessfullyAborted => { 182 | println!("{:?}. STMPaxos2PC SuccessfullyAborted!", test_num); 183 | } 184 | CompletionResult::SuccessfullyTrivial => { 185 | println!("{:?}. STMPaxos2PC SuccessfullyTrivial!", test_num); 186 | } 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/test/query_converter_test.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{TablePath, TransTableName}; 2 | use crate::message as msg; 3 | use crate::query_converter::{rename_under_query, ConversionContext, RenameContext}; 4 | use crate::sql_ast::{iast, proc}; 5 | 6 | // ----------------------------------------------------------------------------------------------- 7 | // Common 8 | // ----------------------------------------------------------------------------------------------- 9 | 10 | fn basic_join_node(name: String, alias: Option) -> iast::JoinNode { 11 | iast::JoinNode::JoinLeaf(iast::JoinLeaf { alias, source: iast::JoinNodeSource::Table(name) }) 12 | } 13 | 14 | fn basic_select(table_ref: &str) -> iast::Select { 15 | iast::Select { 16 | distinct: false, 17 | projection: vec![], 18 | from: basic_join_node(table_ref.to_string(), None), 19 | selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) }, 20 | } 21 | } 22 | 23 | fn basic_select_query(ctes: Vec<(&str, iast::Query)>, table_ref: &str) -> iast::Query { 24 | iast::Query { 25 | ctes: ctes.iter().map(|(name, query)| (name.to_string(), query.clone())).collect(), 26 | body: iast::QueryBody::Select(basic_select(table_ref)), 27 | } 28 | } 29 | 30 | // ----------------------------------------------------------------------------------------------- 31 | // Renaming 32 | // ----------------------------------------------------------------------------------------------- 33 | 34 | // This test simply checks that TransTables that are shadowed in the 35 | // original Query are still renamed properly, where references of that 36 | // TransTable are also renamed to properly. 37 | #[test] 38 | fn test_basic_rename() { 39 | let mut in_query = basic_select_query( 40 | vec![ 41 | ("tt1", basic_select_query(vec![], "t2")), 42 | ("tt2", basic_select_query(vec![("tt1", basic_select_query(vec![], "tt1"))], "tt1")), 43 | ], 44 | "tt2", 45 | ); 46 | 47 | // Rename TransTables 48 | let mut ctx = RenameContext { trans_table_map: Default::default(), counter: 0 }; 49 | rename_under_query(&mut ctx, &mut in_query); 50 | 51 | let expected = iast::Query { 52 | ctes: vec![ 53 | ( 54 | "tt\\0\\tt1".to_string(), 55 | iast::Query { 56 | ctes: vec![], 57 | body: iast::QueryBody::Select(iast::Select { 58 | distinct: false, 59 | projection: iast::SelectClause::SelectList(vec![]), 60 | from: basic_join_node("t2".to_string(), None), 61 | selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) }, 62 | }), 63 | }, 64 | ), 65 | ( 66 | "tt\\2\\tt2".to_string(), 67 | iast::Query { 68 | ctes: vec![( 69 | "tt\\1\\tt1".to_string(), 70 | iast::Query { 71 | ctes: vec![], 72 | body: iast::QueryBody::Select(iast::Select { 73 | distinct: false, 74 | projection: iast::SelectClause::SelectList(vec![]), 75 | from: basic_join_node("tt\\0\\tt1".to_string(), Some("tt1".to_string())), 76 | selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) }, 77 | }), 78 | }, 79 | )], 80 | body: iast::QueryBody::Select(iast::Select { 81 | distinct: false, 82 | projection: iast::SelectClause::SelectList(vec![]), 83 | from: basic_join_node("tt\\1\\tt1".to_string(), Some("tt1".to_string())), 84 | selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) }, 85 | }), 86 | }, 87 | ), 88 | ], 89 | body: iast::QueryBody::Select(iast::Select { 90 | distinct: false, 91 | projection: iast::SelectClause::SelectList(vec![]), 92 | from: basic_join_node("tt\\2\\tt2".to_string(), Some("tt2".to_string())), 93 | selection: iast::ValExpr::Value { val: iast::Value::Boolean(true) }, 94 | }), 95 | }; 96 | 97 | // Verify the result. 98 | assert_eq!(in_query, expected); 99 | } 100 | 101 | // ----------------------------------------------------------------------------------------------- 102 | // Flattening 103 | // ----------------------------------------------------------------------------------------------- 104 | 105 | // This tests for a basic flattening of the Query. 106 | #[test] 107 | fn test_basic_flatten() { 108 | let query = basic_select_query( 109 | vec![ 110 | ("tt\\0\\tt1", basic_select_query(vec![], "t2")), 111 | ( 112 | "tt\\2\\tt2", 113 | basic_select_query( 114 | vec![("tt\\1\\tt1", basic_select_query(vec![], "tt\\0\\tt1"))], 115 | "tt\\1\\tt1", 116 | ), 117 | ), 118 | ], 119 | "tt\\2\\tt2", 120 | ); 121 | 122 | let expected: Result = Ok(proc::MSQuery { 123 | trans_tables: vec![ 124 | ( 125 | TransTableName("tt\\0\\tt1".to_string()), 126 | proc::MSQueryStage::TableSelect(proc::TableSelect { 127 | distinct: false, 128 | projection: proc::SelectClause::SelectList(vec![]), 129 | from: proc::GeneralSource { 130 | source_ref: proc::GeneralSourceRef::TablePath(TablePath("t2".to_string())), 131 | alias: None, 132 | }, 133 | selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) }, 134 | }), 135 | ), 136 | ( 137 | TransTableName("tt\\1\\tt1".to_string()), 138 | proc::MSQueryStage::TableSelect(proc::TableSelect { 139 | distinct: false, 140 | projection: proc::SelectClause::SelectList(vec![]), 141 | from: proc::GeneralSource { 142 | source_ref: proc::GeneralSourceRef::TransTableName(TransTableName( 143 | "tt\\0\\tt1".to_string(), 144 | )), 145 | alias: None, 146 | }, 147 | selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) }, 148 | }), 149 | ), 150 | ( 151 | TransTableName("tt\\2\\tt2".to_string()), 152 | proc::MSQueryStage::TableSelect(proc::TableSelect { 153 | distinct: false, 154 | projection: proc::SelectClause::SelectList(vec![]), 155 | from: proc::GeneralSource { 156 | source_ref: proc::GeneralSourceRef::TransTableName(TransTableName( 157 | "tt\\1\\tt1".to_string(), 158 | )), 159 | alias: None, 160 | }, 161 | selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) }, 162 | }), 163 | ), 164 | ( 165 | TransTableName("tt\\3\\".to_string()), 166 | proc::MSQueryStage::TableSelect(proc::TableSelect { 167 | distinct: false, 168 | projection: proc::SelectClause::SelectList(vec![]), 169 | from: proc::GeneralSource { 170 | source_ref: proc::GeneralSourceRef::TransTableName(TransTableName( 171 | "tt\\2\\tt2".to_string(), 172 | )), 173 | alias: None, 174 | }, 175 | selection: proc::ValExpr::Value { val: iast::Value::Boolean(true) }, 176 | }), 177 | ), 178 | ] 179 | .into_iter() 180 | .collect(), 181 | returning: TransTableName("tt\\3\\".to_string()), 182 | }); 183 | 184 | let mut ctx = ConversionContext { col_usage_map: Default::default(), counter: 3 }; 185 | assert_eq!(ctx.flatten_top_level_query(&query).unwrap(), expected); 186 | } 187 | -------------------------------------------------------------------------------- /src/bin/paxos2pc_sim/stm_simple_tm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::message as msg; 2 | use crate::slave::{SlaveContext, SlavePLm}; 3 | use runiversal::common::BasicIOCtx; 4 | use runiversal::common::{EndpointId, RequestId, SlaveGroupId}; 5 | use runiversal::stmpaxos2pc_tm::{ 6 | RMMessage, STMPaxos2PCTMInner, STMPaxos2PCTMOuter, TMClosedPLm, TMCommittedPLm, TMMessage, TMPLm, 7 | TMPayloadTypes, TMServerContext, 8 | }; 9 | use serde::{Deserialize, Serialize}; 10 | use std::collections::BTreeMap; 11 | 12 | // ----------------------------------------------------------------------------------------------- 13 | // Payloads 14 | // ----------------------------------------------------------------------------------------------- 15 | 16 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 17 | pub struct STMSimpleTMPayloadTypes {} 18 | 19 | impl TMPayloadTypes for STMSimpleTMPayloadTypes { 20 | // Master 21 | type RMPath = SlaveGroupId; 22 | type TMPath = SlaveGroupId; 23 | type NetworkMessageT = msg::NetworkMessage; 24 | type TMContext = SlaveContext; 25 | 26 | // TM PLm 27 | type TMPreparedPLm = STMSimpleTMPrepared; 28 | type TMCommittedPLm = STMSimpleTMCommitted; 29 | type TMAbortedPLm = STMSimpleTMAborted; 30 | type TMClosedPLm = STMSimpleTMClosed; 31 | 32 | // TM-to-RM Messages 33 | type Prepare = STMSimplePrepare; 34 | type Abort = STMSimpleAbort; 35 | type Commit = STMSimpleCommit; 36 | 37 | // RM-to-TM Messages 38 | type Prepared = STMSimplePrepared; 39 | type Aborted = STMSimpleAborted; 40 | type Closed = STMSimpleClosed; 41 | } 42 | 43 | // TM PLm 44 | 45 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 46 | pub struct STMSimpleTMPrepared { 47 | pub rms: Vec, 48 | } 49 | 50 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 51 | pub struct STMSimpleTMCommitted {} 52 | 53 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 54 | pub struct STMSimpleTMAborted {} 55 | 56 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 57 | pub struct STMSimpleTMClosed {} 58 | 59 | // TM-to-RM 60 | 61 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 62 | pub struct STMSimplePrepare {} 63 | 64 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 65 | pub struct STMSimpleAbort {} 66 | 67 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 68 | pub struct STMSimpleCommit {} 69 | 70 | // RM-to-TM 71 | 72 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 73 | pub struct STMSimplePrepared {} 74 | 75 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 76 | pub struct STMSimpleAborted {} 77 | 78 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 79 | pub struct STMSimpleClosed {} 80 | 81 | // ----------------------------------------------------------------------------------------------- 82 | // STMTMServerContext 83 | // ----------------------------------------------------------------------------------------------- 84 | 85 | impl TMServerContext for SlaveContext { 86 | fn push_plm(&mut self, plm: TMPLm) { 87 | self.slave_bundle.plms.push(SlavePLm::SimpleSTMTM(plm)); 88 | } 89 | 90 | fn send_to_rm>( 91 | &mut self, 92 | io_ctx: &mut IO, 93 | rm: &SlaveGroupId, 94 | msg: RMMessage, 95 | ) { 96 | self.send(io_ctx, rm, msg::SlaveRemotePayload::STMRMMessage(msg)); 97 | } 98 | 99 | fn mk_node_path(&self) -> SlaveGroupId { 100 | self.this_sid.clone() 101 | } 102 | 103 | fn is_leader(&self) -> bool { 104 | SlaveContext::is_leader(self) 105 | } 106 | } 107 | 108 | // ----------------------------------------------------------------------------------------------- 109 | // General STMPaxos2PC TM Types 110 | // ----------------------------------------------------------------------------------------------- 111 | #[derive(Debug)] 112 | pub struct ResponseData { 113 | pub request_id: RequestId, 114 | pub sender_eid: EndpointId, 115 | } 116 | 117 | // ----------------------------------------------------------------------------------------------- 118 | // Simple Implementation 119 | // ----------------------------------------------------------------------------------------------- 120 | 121 | pub type STMSimpleTMES = STMPaxos2PCTMOuter; 122 | 123 | #[derive(Debug)] 124 | pub struct STMSimpleTMInner { 125 | // RMs to use 126 | pub rms: Vec, 127 | } 128 | 129 | impl STMPaxos2PCTMInner for STMSimpleTMInner { 130 | fn new_follower>( 131 | _: &mut SlaveContext, 132 | _: &mut IO, 133 | payload: STMSimpleTMPrepared, 134 | ) -> STMSimpleTMInner { 135 | STMSimpleTMInner { rms: payload.rms } 136 | } 137 | 138 | fn mk_prepared_plm>( 139 | &mut self, 140 | _: &mut SlaveContext, 141 | _: &mut IO, 142 | ) -> STMSimpleTMPrepared { 143 | STMSimpleTMPrepared { rms: self.rms.clone() } 144 | } 145 | 146 | fn prepared_plm_inserted>( 147 | &mut self, 148 | _: &mut SlaveContext, 149 | _: &mut IO, 150 | ) -> BTreeMap { 151 | let mut prepares = BTreeMap::::new(); 152 | for rm in &self.rms { 153 | prepares.insert(rm.clone(), STMSimplePrepare {}); 154 | } 155 | prepares 156 | } 157 | 158 | fn mk_committed_plm>( 159 | &mut self, 160 | _: &mut SlaveContext, 161 | _: &mut IO, 162 | _: &BTreeMap, 163 | ) -> STMSimpleTMCommitted { 164 | STMSimpleTMCommitted {} 165 | } 166 | 167 | fn committed_plm_inserted>( 168 | &mut self, 169 | _: &mut SlaveContext, 170 | _: &mut IO, 171 | _: &TMCommittedPLm, 172 | ) -> BTreeMap { 173 | let mut commits = BTreeMap::::new(); 174 | for rm in &self.rms { 175 | commits.insert(rm.clone(), STMSimpleCommit {}); 176 | } 177 | commits 178 | } 179 | 180 | fn mk_aborted_plm>( 181 | &mut self, 182 | _: &mut SlaveContext, 183 | _: &mut IO, 184 | ) -> STMSimpleTMAborted { 185 | STMSimpleTMAborted {} 186 | } 187 | 188 | fn aborted_plm_inserted>( 189 | &mut self, 190 | _: &mut SlaveContext, 191 | _: &mut IO, 192 | ) -> BTreeMap { 193 | let mut aborts = BTreeMap::::new(); 194 | for rm in &self.rms { 195 | aborts.insert(rm.clone(), STMSimpleAbort {}); 196 | } 197 | aborts 198 | } 199 | 200 | fn mk_closed_plm>( 201 | &mut self, 202 | _: &mut SlaveContext, 203 | _: &mut IO, 204 | ) -> STMSimpleTMClosed { 205 | STMSimpleTMClosed {} 206 | } 207 | 208 | fn closed_plm_inserted>( 209 | &mut self, 210 | _: &mut SlaveContext, 211 | _: &mut IO, 212 | _: &TMClosedPLm, 213 | ) { 214 | } 215 | 216 | fn leader_changed>( 217 | &mut self, 218 | _: &mut SlaveContext, 219 | _: &mut IO, 220 | ) { 221 | } 222 | 223 | fn reconfig_snapshot(&self) -> Self { 224 | unimplemented!() 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /src/query_planning.rs: -------------------------------------------------------------------------------- 1 | use crate::col_usage::{QueryElement, QueryIterator}; 2 | use crate::common::{ 3 | lookup, ColName, FullGen, Gen, TablePath, TableSchema, TierMap, Timestamp, TransTableName, 4 | }; 5 | use crate::master_query_planning_es::{DBSchemaView, ErrorTrait}; 6 | use crate::message as msg; 7 | use crate::multiversion_map::MVM; 8 | use crate::sql_ast::proc; 9 | use sqlparser::test_utils::table; 10 | use std::collections::{BTreeMap, BTreeSet}; 11 | 12 | /// Gather every reference to a `TablePath` found in the `query`. 13 | pub fn collect_table_paths(query: &proc::MSQuery) -> BTreeSet { 14 | let mut table_paths = BTreeSet::::new(); 15 | QueryIterator::new().iterate_ms_query( 16 | &mut |stage: QueryElement| match stage { 17 | QueryElement::TableSelect(query) => { 18 | table_paths.insert(query.from.table_path.clone()); 19 | } 20 | QueryElement::TransTableSelect(_) => {} 21 | QueryElement::JoinSelect(_) => {} 22 | QueryElement::JoinNode(_) => {} 23 | QueryElement::JoinLeaf(_) => {} 24 | QueryElement::TableSelect(query) => { 25 | table_paths.insert(query.from.table_path.clone()); 26 | } 27 | QueryElement::Update(query) => { 28 | table_paths.insert(query.table.table_path.clone()); 29 | } 30 | QueryElement::Insert(query) => { 31 | table_paths.insert(query.table.table_path.clone()); 32 | } 33 | QueryElement::Delete(query) => { 34 | table_paths.insert(query.table.table_path.clone()); 35 | } 36 | QueryElement::ValExpr(_) => {} 37 | QueryElement::MSQuery(_) => {} 38 | QueryElement::GRQuery(_) => {} 39 | QueryElement::GRQueryStage(_) => {} 40 | }, 41 | query, 42 | ); 43 | table_paths 44 | } 45 | 46 | /// Compute the `TierMap` for every stage in the `MSQuery`. A `TablePath` should appear 47 | /// in a `TierMap` iff it is written to by the `MSQuery`. 48 | /// 49 | /// The `TierMap` for a stage contains the Tiers that should be used to read the `TablePath`s 50 | /// inside. Note that if a stage is a write (e.g. an Update), the Tier of the written `TablePath` 51 | /// in the `TierMap` is one behind (i.e. one more) the Tier that the write should commit at. 52 | pub fn compute_all_tier_maps(ms_query: &proc::MSQuery) -> BTreeMap { 53 | let mut all_tier_maps = BTreeMap::::new(); 54 | let mut cur_tier_map = BTreeMap::::new(); 55 | for (_, stage) in &ms_query.trans_tables { 56 | match stage { 57 | proc::MSQueryStage::TableSelect(_) => {} 58 | proc::MSQueryStage::TransTableSelect(_) => {} 59 | proc::MSQueryStage::JoinSelect(_) => {} 60 | proc::MSQueryStage::Update(update) => { 61 | cur_tier_map.insert(update.table.table_path.clone(), 0); 62 | } 63 | proc::MSQueryStage::Insert(insert) => { 64 | cur_tier_map.insert(insert.table.table_path.clone(), 0); 65 | } 66 | proc::MSQueryStage::Delete(delete) => { 67 | cur_tier_map.insert(delete.table.table_path.clone(), 0); 68 | } 69 | } 70 | } 71 | for (trans_table_name, stage) in ms_query.trans_tables.iter().rev() { 72 | match stage { 73 | proc::MSQueryStage::TableSelect(_) => {} 74 | proc::MSQueryStage::TransTableSelect(_) => {} 75 | proc::MSQueryStage::JoinSelect(_) => {} 76 | proc::MSQueryStage::Update(update) => { 77 | *cur_tier_map.get_mut(&update.table.table_path).unwrap() += 1; 78 | } 79 | proc::MSQueryStage::Insert(insert) => { 80 | *cur_tier_map.get_mut(&insert.table.table_path).unwrap() += 1; 81 | } 82 | proc::MSQueryStage::Delete(delete) => { 83 | *cur_tier_map.get_mut(&delete.table.table_path).unwrap() += 1; 84 | } 85 | } 86 | all_tier_maps.insert(trans_table_name.clone(), TierMap { map: cur_tier_map.clone() }); 87 | } 88 | all_tier_maps 89 | } 90 | 91 | /// Computes a map that maps all `TablePath`s used in the MSQuery to the `Gen` 92 | /// in the `table_generation` at `timestamp`. 93 | /// 94 | /// Precondition: 95 | /// 1. All `TablePath`s in the MSQuery must have a non-None `Gen` in `table_generation`. 96 | pub fn compute_table_location_map( 97 | view: &mut ViewT, 98 | table_paths: &BTreeSet, 99 | ) -> Result, ViewT::ErrorT> { 100 | let mut table_location_map = BTreeMap::::new(); 101 | for table_path in table_paths { 102 | table_location_map.insert(table_path.clone(), view.get_gen(table_path)?); 103 | } 104 | Ok(table_location_map) 105 | } 106 | 107 | /// Validates the `MSQuery` in various ways. In particularly, this checks whether the 108 | /// columns that are written by an Insert or Update are valid and present in `view`. 109 | pub fn perform_validations>( 110 | view: &mut ViewT, 111 | ms_query: &proc::MSQuery, 112 | ) -> Result<(), ErrorT> { 113 | for (_, stage) in &ms_query.trans_tables { 114 | match stage { 115 | proc::MSQueryStage::TableSelect(_) => {} 116 | proc::MSQueryStage::TransTableSelect(_) => {} 117 | proc::MSQueryStage::JoinSelect(_) => {} 118 | proc::MSQueryStage::Update(query) => { 119 | // Check that the `stage` is not trying to modify a KeyCol, 120 | // all assigned columns are unique, and they are present. 121 | let table_path = &query.table.table_path; 122 | let key_cols = view.key_cols(table_path)?.clone(); 123 | let mut all_cols = BTreeSet::<&ColName>::new(); 124 | for (col_name, _) in &query.assignment { 125 | if !all_cols.insert(col_name) || lookup(&key_cols, col_name).is_some() { 126 | return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidUpdate)); 127 | } 128 | if !view.contains_col(table_path, col_name)? { 129 | return Err(ErrorTrait::mk_error(msg::QueryPlanningError::RequiredColumnDNE( 130 | col_name.clone(), 131 | ))); 132 | } 133 | } 134 | } 135 | proc::MSQueryStage::Insert(query) => { 136 | // Check that the `stage` is inserting to all KeyCols. 137 | let table_path = &query.table.table_path; 138 | let key_cols = view.key_cols(table_path)?; 139 | for (col_name, _) in key_cols { 140 | if !query.columns.contains(col_name) { 141 | return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidInsert)); 142 | } 143 | } 144 | 145 | // Check that every inserted column is present 146 | for col_name in &query.columns { 147 | if !view.contains_col(table_path, col_name)? { 148 | return Err(ErrorTrait::mk_error(msg::QueryPlanningError::RequiredColumnDNE( 149 | col_name.clone(), 150 | ))); 151 | } 152 | } 153 | 154 | // Check that all assigned columns are unique. 155 | let mut all_cols = BTreeSet::<&ColName>::new(); 156 | for col_name in &query.columns { 157 | if !all_cols.insert(col_name) { 158 | return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidInsert)); 159 | } 160 | } 161 | 162 | // Check that `values` has equal length to `columns`. 163 | for row in &query.values { 164 | if row.len() != query.columns.len() { 165 | return Err(ErrorTrait::mk_error(msg::QueryPlanningError::InvalidInsert)); 166 | } 167 | } 168 | } 169 | proc::MSQueryStage::Delete(_) => {} 170 | } 171 | } 172 | 173 | Ok(()) 174 | } 175 | -------------------------------------------------------------------------------- /src/multiversion_map.rs: -------------------------------------------------------------------------------- 1 | use crate::common::Timestamp; 2 | use crate::common::{mk_t, update_all_eids}; 3 | use serde::{Deserialize, Serialize}; 4 | use std::cmp::max; 5 | use std::collections::BTreeMap; 6 | use std::hash::Hash; 7 | 8 | /// Here, `min_lat` is used to increase the LATs of all Keys in existance (which is an 9 | /// infinite set). When it is incremeted, the LATs of every key that is present in `map` 10 | /// are updated as well so that they are always >= to `min_lat`. 11 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 12 | pub struct MVM { 13 | min_lat: Timestamp, 14 | map: BTreeMap)>)>, 15 | } 16 | 17 | impl MVM 18 | where 19 | K: Eq + Ord + Clone, 20 | V: Clone, 21 | { 22 | pub fn new() -> MVM { 23 | MVM { min_lat: mk_t(0), map: BTreeMap::new() } 24 | } 25 | 26 | pub fn init(init_vals: BTreeMap) -> MVM { 27 | let mut map = BTreeMap::)>)>::new(); 28 | for (key, value) in init_vals { 29 | map.insert(key, (mk_t(0), vec![(mk_t(0), Some(value))])); 30 | } 31 | MVM { min_lat: mk_t(0), map } 32 | } 33 | 34 | /// Performs an MVMWrite to the MVM. The user *must* be sure that `timestamp` is beyond the 35 | /// `lat` of the key, otherwise we `assert`. They can verify this by doing static and weak reads. 36 | pub fn write(&mut self, key: &K, value: Option, timestamp: Timestamp) { 37 | if let Some((lat, versions)) = self.map.get_mut(key) { 38 | assert!(*lat < timestamp); 39 | *lat = timestamp.clone(); 40 | versions.push((timestamp, value)); 41 | } else { 42 | // Here, the `key` has a LAT of `min_lat` and contains no versions. 43 | assert!(self.min_lat < timestamp); 44 | self.map.insert(key.clone(), (timestamp.clone(), vec![(timestamp, value)])); 45 | } 46 | } 47 | 48 | pub fn read(&mut self, key: &K, timestamp: &Timestamp) -> Option { 49 | if let Some((lat, versions)) = self.map.get_mut(key) { 50 | *lat = max(lat.clone(), timestamp.clone()); 51 | find_prior_value(versions, timestamp).cloned() 52 | } else { 53 | if timestamp > &self.min_lat { 54 | self.map.insert(key.clone(), (timestamp.clone(), vec![])); 55 | } 56 | 57 | None 58 | } 59 | } 60 | 61 | pub fn update_lat(&mut self, key: &K, timestamp: Timestamp) { 62 | if let Some((lat, _)) = self.map.get_mut(key) { 63 | *lat = max(lat.clone(), timestamp); 64 | } else if timestamp > self.min_lat { 65 | self.map.insert(key.clone(), (timestamp, vec![])); 66 | } 67 | } 68 | 69 | pub fn update_all_lats(&mut self, timestamp: Timestamp) { 70 | if timestamp > self.min_lat { 71 | for (_, (lat, _)) in &mut self.map { 72 | *lat = max(lat.clone(), timestamp.clone()) 73 | } 74 | self.min_lat = timestamp; 75 | } 76 | } 77 | 78 | /// Reads the version prior to the timestamp. This function asserts that the `lat` of 79 | /// the `key` is `>= timestamp`. Recall that all keys in existance implicitly at 80 | /// least have a `lat` of 0. Thus, the return value of this function is idempotent. 81 | pub fn strong_static_read(&self, key: &K, timestamp: &Timestamp) -> Option<&V> { 82 | if let Some((lat, versions)) = self.map.get(key) { 83 | assert!(timestamp <= lat); 84 | find_prior_value(versions, timestamp) 85 | } else { 86 | assert!(timestamp <= &self.min_lat); 87 | None 88 | } 89 | } 90 | 91 | /// Get the value that would be read if we did a `read` at the `lat`. 92 | pub fn get_last_version(&self, key: &K) -> Option<&V> { 93 | if let Some((_, versions)) = self.map.get(key) { 94 | if let Some((_, val)) = versions.iter().last() { 95 | val.as_ref() 96 | } else { 97 | None 98 | } 99 | } else { 100 | None 101 | } 102 | } 103 | 104 | /// Get the latest version of the `key` that was non-`None`. 105 | pub fn get_last_present_version(&self, key: &K) -> Option<&V> { 106 | if let Some((_, versions)) = self.map.get(key) { 107 | for (_, val) in versions.iter().rev() { 108 | if val.is_some() { 109 | return val.as_ref(); 110 | } 111 | } 112 | } 113 | None 114 | } 115 | 116 | /// Reads the prior value at the timestamp. This does not mutate the `lat` if the read 117 | /// happens with a future timestamp. Thus, the values read are not idempotent. 118 | pub fn static_read(&self, key: &K, timestamp: &Timestamp) -> Option<&V> { 119 | let (_, value) = self.static_read_version(key, timestamp)?; 120 | value.as_ref() 121 | } 122 | 123 | /// Reads the prior version at the timestamp. This does not mutate the `lat` if the read 124 | /// happens with a future timestamp. Thus, the values read are not idempotent. 125 | pub fn static_read_version( 126 | &self, 127 | key: &K, 128 | timestamp: &Timestamp, 129 | ) -> Option<&(Timestamp, Option)> { 130 | if let Some((_, versions)) = self.map.get(key) { 131 | find_prior_version(versions, timestamp) 132 | } else { 133 | None 134 | } 135 | } 136 | 137 | /// Returns the values for all keys that are present at the given 138 | /// `timestamp`. This is done statically, so no lats are updated. 139 | pub fn static_snapshot_read(&self, timestamp: &Timestamp) -> BTreeMap { 140 | let mut snapshot = BTreeMap::new(); 141 | for (key, (_, versions)) in &self.map { 142 | if let Some(value) = find_prior_value(versions, timestamp) { 143 | snapshot.insert(key.clone(), value.clone()); 144 | } 145 | } 146 | return snapshot; 147 | } 148 | 149 | /// Recall that abstractly, all keys are mapped to `(0, [])` 150 | pub fn get_lat(&self, key: &K) -> Timestamp { 151 | if let Some((lat, _)) = self.map.get(key) { 152 | lat.clone() 153 | } else { 154 | self.min_lat.clone() 155 | } 156 | } 157 | 158 | /// Get the smallest LAT among all keys. There certainly exists a key with a LAT of 159 | /// `min_lat`, since there are infinite keys. Thus, we simply return `min_lat`. 160 | pub fn get_min_lat(&self) -> Timestamp { 161 | self.min_lat.clone() 162 | } 163 | 164 | /// Get the highest LAT of any key-value pair in the MVM. 165 | pub fn get_latest_lat(&self) -> Timestamp { 166 | let mut latest_lat = self.min_lat.clone(); 167 | for (_, (lat, _)) in &self.map { 168 | latest_lat = max(latest_lat, lat.clone()); 169 | } 170 | latest_lat 171 | } 172 | } 173 | 174 | fn find_prior_value<'a, V>( 175 | versions: &'a Vec<(Timestamp, Option)>, 176 | timestamp: &Timestamp, 177 | ) -> Option<&'a V> { 178 | let (_, value) = find_prior_version(versions, timestamp)?; 179 | value.as_ref() 180 | } 181 | 182 | fn find_prior_version<'a, V>( 183 | versions: &'a Vec<(Timestamp, Option)>, 184 | timestamp: &Timestamp, 185 | ) -> Option<&'a (Timestamp, Option)> { 186 | for version in versions.iter().rev() { 187 | let (t, _) = version; 188 | if t <= timestamp { 189 | return Some(version); 190 | } 191 | } 192 | return None; 193 | } 194 | 195 | #[cfg(test)] 196 | mod tests { 197 | use crate::common::mk_t; 198 | use crate::common::Timestamp; 199 | use crate::multiversion_map::MVM; 200 | 201 | #[test] 202 | fn single_key_test() { 203 | let mut mvm = MVM::new(); 204 | let k = String::from("k"); 205 | let v1 = String::from("v1"); 206 | let v2 = String::from("v2"); 207 | let v3 = String::from("v3"); 208 | assert_eq!(mvm.read(&k, &mk_t(1)), None); 209 | mvm.write(&k, Some(v1.clone()), mk_t(2)); 210 | mvm.write(&k, Some(v2.clone()), mk_t(4)); 211 | assert_eq!(mvm.read(&k, &mk_t(3)), Some(v1)); 212 | assert_eq!(mvm.read(&k, &mk_t(5)), Some(v2)); 213 | mvm.write(&k, Some(v3.clone()), mk_t(6)); 214 | assert_eq!(mvm.read(&k, &mk_t(6)), Some(v3)); 215 | mvm.write(&k, None, mk_t(7)); 216 | assert_eq!(mvm.read(&k, &mk_t(7)), None); 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/tm_status.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{merge_table_views, mk_qid, CoreIOCtx, OrigP}; 2 | use crate::common::{ 3 | CQueryPath, CTNodePath, ColName, LeadershipId, PaxosGroupIdTrait, QueryId, SlaveGroupId, 4 | TQueryPath, TableView, TabletGroupId, TransTableLocationPrefix, 5 | }; 6 | use crate::message as msg; 7 | use crate::server::{CTServerContext, CommonQuery}; 8 | use std::collections::{BTreeMap, BTreeSet}; 9 | 10 | // ----------------------------------------------------------------------------------------------- 11 | // TMStatus 12 | // ----------------------------------------------------------------------------------------------- 13 | 14 | pub enum SendHelper { 15 | TableQuery(msg::GeneralQuery, Vec), 16 | TransTableQuery(msg::GeneralQuery, TransTableLocationPrefix), 17 | } 18 | 19 | // These are used to perform PCSA over the network for reads and writes. 20 | #[derive(Debug)] 21 | pub struct TMStatus { 22 | root_query_path: CQueryPath, 23 | /// The QueryId of the TMStatus. 24 | pub query_id: QueryId, 25 | /// This is the QueryId of the PerformQuery. We keep this distinct from the TMStatus' 26 | /// QueryId, since one of the RMs might be this node. 27 | child_query_id: QueryId, 28 | /// Accumulates all transitively accessed Tablets where an `MSQueryES` was used. 29 | new_rms: BTreeSet, 30 | /// The current set of Leaderships that this TMStatus is waiting on. Thus, in order to 31 | /// contact an RM, we just use the `LeadershipId` found here. 32 | pub leaderships: BTreeMap, 33 | /// Holds the number of nodes that responded (used to decide when this TM is done). 34 | responded_count: usize, 35 | /// Holds all child Querys, initially mapping to `None`. As results come in, we hold them here. 36 | tm_state: BTreeMap>>, 37 | pub orig_p: OrigP, 38 | } 39 | 40 | impl TMStatus { 41 | pub fn new( 42 | io_ctx: &mut IO, 43 | root_query_path: CQueryPath, 44 | orig_p: OrigP, 45 | ) -> TMStatus { 46 | TMStatus { 47 | root_query_path, 48 | query_id: mk_qid(io_ctx.rand()), 49 | child_query_id: mk_qid(io_ctx.rand()), 50 | new_rms: Default::default(), 51 | leaderships: Default::default(), 52 | responded_count: 0, 53 | tm_state: Default::default(), 54 | orig_p, 55 | } 56 | } 57 | 58 | pub fn query_id(&self) -> &QueryId { 59 | &self.query_id 60 | } 61 | 62 | /// Perform the sending indicated by `helper`. 63 | /// TODO: when sharding occurs, this query_leader_map.get might be invalid. 64 | pub fn send_general( 65 | &mut self, 66 | ctx: &mut Ctx, 67 | io_ctx: &mut IO, 68 | query_leader_map: &BTreeMap, 69 | helper: SendHelper, 70 | ) -> bool { 71 | match helper { 72 | SendHelper::TableQuery(general_query, tids) => { 73 | // Validate the LeadershipId of PaxosGroups that the PerformQuery will be sent to. 74 | // We do this before sending any messages, in case it fails. Recall that the local 75 | // `leader_map` is allowed to get ahead of the `query_leader_map` which we computed 76 | // earlier, so this check is necessary. 77 | for tid in &tids { 78 | let sid = ctx.gossip().get().tablet_address_config.get(&tid).unwrap(); 79 | if let Some(lid) = query_leader_map.get(sid) { 80 | if lid.gen < ctx.leader_map().get(&sid.to_gid()).unwrap().gen { 81 | // The `lid` has since changed, so we cannot finish this MSQueryES. 82 | return false; 83 | } 84 | } 85 | } 86 | 87 | // Having non-empty `tids` solves the TMStatus deadlock and determining the child schema. 88 | assert!(tids.len() > 0); 89 | for tid in tids { 90 | // Recall we already validated that `lid` in `query_leader_map` is no lower than 91 | // the one at this node's LeaderMap, so it safe to use. 92 | let to_node_path = ctx.mk_tablet_node_path(tid).into_ct(); 93 | let sid = &to_node_path.sid; 94 | let to_lid = query_leader_map.get(sid).or(ctx.leader_map().get(&sid.to_gid())).unwrap(); 95 | self.send_perform(ctx, io_ctx, general_query.clone(), to_node_path, to_lid.clone()); 96 | } 97 | } 98 | SendHelper::TransTableQuery(general_query, location_prefix) => { 99 | // Validate the LeadershipId of PaxosGroups that the PerformQuery will be sent to. 100 | // We do this before sending any messages, in case it fails. 101 | let sid = &location_prefix.source.node_path.sid; 102 | if let Some(lid) = query_leader_map.get(sid) { 103 | if lid.gen < ctx.leader_map().get(&sid.to_gid()).unwrap().gen { 104 | // The `lid` is too old, so we cannot finish this GRQueryES. 105 | return false; 106 | } 107 | } 108 | 109 | // Recall we already validated that `lid` in `query_leader_map` is no lower than 110 | // the one at this node's LeaderMap, so it safe to use. 111 | let to_lid = query_leader_map.get(&sid).or(ctx.leader_map().get(&sid.to_gid())).unwrap(); 112 | let to_node_path = location_prefix.source.node_path.clone(); 113 | self.send_perform(ctx, io_ctx, general_query, to_node_path, to_lid.clone()); 114 | } 115 | } 116 | 117 | true 118 | } 119 | 120 | /// Cleans up all currently owned resources, and goes to Done. 121 | pub fn send_perform( 122 | &mut self, 123 | ctx: &mut Ctx, 124 | io_ctx: &mut IO, 125 | general_query: msg::GeneralQuery, 126 | to_node_path: CTNodePath, 127 | to_lid: LeadershipId, 128 | ) { 129 | let sender_path = ctx.mk_this_query_path(self.query_id.clone()); 130 | // Construct PerformQuery 131 | let perform_query = msg::PerformQuery { 132 | root_query_path: self.root_query_path.clone(), 133 | sender_path, 134 | query_id: self.child_query_id.clone(), 135 | query: general_query, 136 | }; 137 | 138 | // Send out PerformQuery. Recall that this could only be a Tablet. 139 | let common_query = CommonQuery::PerformQuery(perform_query); 140 | ctx.send_to_ct_lid(io_ctx, to_node_path.clone(), common_query, to_lid.clone()); 141 | 142 | // Add the TabletGroup into the TMStatus. 143 | self.leaderships.insert(to_node_path.sid.clone(), to_lid); 144 | self.tm_state.insert(to_node_path, None); 145 | } 146 | 147 | /// We accumulate the results of the `query_success` here. 148 | pub fn handle_query_success(&mut self, query_success: msg::QuerySuccess) { 149 | let node_path = query_success.responder_path.node_path; 150 | self.tm_state.insert(node_path, Some(query_success.result.clone())); 151 | self.new_rms.extend(query_success.new_rms); 152 | self.responded_count += 1; 153 | } 154 | 155 | /// Merge there `TableView`s together. Note that this should be only called when 156 | /// all child queries have responded. 157 | pub fn get_results(self) -> (OrigP, Vec, BTreeSet) { 158 | debug_assert!(self.is_complete()); 159 | let mut results = Vec::>::new(); 160 | for (_, rm_result) in self.tm_state { 161 | results.push(rm_result.unwrap()); 162 | } 163 | (self.orig_p, merge_table_views(results), self.new_rms) 164 | } 165 | 166 | pub fn is_complete(&self) -> bool { 167 | self.responded_count == self.tm_state.len() 168 | } 169 | 170 | /// We ECU this `TMStatus` by sending `CancelQuery` to all remaining RMs. 171 | pub fn exit_and_clean_up( 172 | self, 173 | ctx: &mut Ctx, 174 | io_ctx: &mut IO, 175 | ) { 176 | for (rm_path, rm_result) in self.tm_state { 177 | if rm_result.is_none() { 178 | let orig_sid = &rm_path.sid; 179 | let orig_lid = self.leaderships.get(&orig_sid).unwrap().clone(); 180 | ctx.send_to_ct_lid( 181 | io_ctx, 182 | rm_path, 183 | CommonQuery::CancelQuery(msg::CancelQuery { query_id: self.child_query_id.clone() }), 184 | orig_lid, 185 | ); 186 | } 187 | } 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/create_table_rm_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{mk_t, BasicIOCtx, CTSubNodePath, PaxosGroupIdTrait, TableSchema}; 2 | use crate::common::{ 3 | ColName, ColType, Gen, SlaveGroupId, TablePath, TabletGroupId, TabletKeyRange, 4 | }; 5 | use crate::create_table_tm_es::{ 6 | CreateTableClosed, CreateTableCommit, CreateTablePrepare, CreateTablePrepared, 7 | CreateTableTMPayloadTypes, 8 | }; 9 | use crate::message as msg; 10 | use crate::multiversion_map::MVM; 11 | use crate::server::ServerContextBase; 12 | use crate::slave::{SlaveContext, SlavePLm}; 13 | use crate::stmpaxos2pc_rm::{ 14 | RMCommittedPLm, RMPLm, RMPayloadTypes, RMServerContext, STMPaxos2PCRMAction, STMPaxos2PCRMInner, 15 | STMPaxos2PCRMOuter, 16 | }; 17 | use crate::stmpaxos2pc_tm::TMMessage; 18 | use crate::storage::GenericMVTable; 19 | use crate::tablet::{TabletConfig, TabletContext}; 20 | use rand::RngCore; 21 | use serde::{Deserialize, Serialize}; 22 | 23 | // ----------------------------------------------------------------------------------------------- 24 | // Payloads 25 | // ----------------------------------------------------------------------------------------------- 26 | 27 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 28 | pub struct CreateTableRMPayloadTypes {} 29 | 30 | impl RMPayloadTypes for CreateTableRMPayloadTypes { 31 | type TM = CreateTableTMPayloadTypes; 32 | type RMContext = SlaveContext; 33 | 34 | // Actions 35 | type RMCommitActionData = TabletContext; 36 | 37 | // RM PLm 38 | type RMPreparedPLm = CreateTableRMPrepared; 39 | type RMCommittedPLm = CreateTableRMCommitted; 40 | type RMAbortedPLm = CreateTableRMAborted; 41 | } 42 | 43 | // RM PLm 44 | 45 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 46 | pub struct CreateTableRMPrepared { 47 | pub tablet_group_id: TabletGroupId, 48 | pub table_path: TablePath, 49 | pub gen: Gen, 50 | 51 | pub key_range: TabletKeyRange, 52 | pub key_cols: Vec<(ColName, ColType)>, 53 | pub val_cols: Vec<(ColName, ColType)>, 54 | } 55 | 56 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 57 | pub struct CreateTableRMCommitted {} 58 | 59 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 60 | pub struct CreateTableRMAborted {} 61 | 62 | // ----------------------------------------------------------------------------------------------- 63 | // RMServerContext 64 | // ----------------------------------------------------------------------------------------------- 65 | 66 | impl RMServerContext for SlaveContext { 67 | fn push_plm(&mut self, plm: RMPLm) { 68 | self.slave_bundle.plms.push(SlavePLm::CreateTable(plm)); 69 | } 70 | 71 | fn send_to_tm( 72 | &mut self, 73 | io_ctx: &mut IO, 74 | _: &(), 75 | msg: TMMessage, 76 | ) { 77 | self.send_to_master(io_ctx, msg::MasterRemotePayload::CreateTable(msg)); 78 | } 79 | 80 | fn mk_node_path(&self) -> SlaveGroupId { 81 | self.this_sid.clone() 82 | } 83 | 84 | fn is_leader(&self) -> bool { 85 | SlaveContext::is_leader(self) 86 | } 87 | } 88 | 89 | // ----------------------------------------------------------------------------------------------- 90 | // CreateTableES Implementation 91 | // ----------------------------------------------------------------------------------------------- 92 | 93 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 94 | pub struct CreateTableRMInner { 95 | pub tablet_group_id: TabletGroupId, 96 | pub table_path: TablePath, 97 | pub gen: Gen, 98 | 99 | pub key_range: TabletKeyRange, 100 | pub key_cols: Vec<(ColName, ColType)>, 101 | pub val_cols: Vec<(ColName, ColType)>, 102 | } 103 | 104 | pub type CreateTableRMES = STMPaxos2PCRMOuter; 105 | pub type CreateTableRMAction = STMPaxos2PCRMAction; 106 | 107 | impl STMPaxos2PCRMInner for CreateTableRMInner { 108 | fn new( 109 | _: &mut SlaveContext, 110 | _: &mut IO, 111 | payload: CreateTablePrepare, 112 | ) -> CreateTableRMInner { 113 | CreateTableRMInner { 114 | tablet_group_id: payload.tablet_group_id, 115 | table_path: payload.table_path, 116 | gen: payload.gen, 117 | key_range: payload.key_range, 118 | key_cols: payload.key_cols, 119 | val_cols: payload.val_cols, 120 | } 121 | } 122 | 123 | fn new_follower( 124 | _: &mut SlaveContext, 125 | _: &mut IO, 126 | payload: CreateTableRMPrepared, 127 | ) -> CreateTableRMInner { 128 | CreateTableRMInner { 129 | tablet_group_id: payload.tablet_group_id, 130 | table_path: payload.table_path, 131 | gen: payload.gen, 132 | key_range: payload.key_range, 133 | key_cols: payload.key_cols, 134 | val_cols: payload.val_cols, 135 | } 136 | } 137 | 138 | fn mk_closed() -> CreateTableClosed { 139 | CreateTableClosed {} 140 | } 141 | 142 | fn mk_prepared_plm( 143 | &mut self, 144 | _: &mut SlaveContext, 145 | _: &mut IO, 146 | ) -> Option { 147 | Some(CreateTableRMPrepared { 148 | tablet_group_id: self.tablet_group_id.clone(), 149 | table_path: self.table_path.clone(), 150 | gen: self.gen.clone(), 151 | key_range: self.key_range.clone(), 152 | key_cols: self.key_cols.clone(), 153 | val_cols: self.val_cols.clone(), 154 | }) 155 | } 156 | 157 | fn prepared_plm_inserted( 158 | &mut self, 159 | _: &mut SlaveContext, 160 | _: &mut IO, 161 | ) -> CreateTablePrepared { 162 | CreateTablePrepared {} 163 | } 164 | 165 | fn mk_committed_plm( 166 | &mut self, 167 | _: &mut SlaveContext, 168 | _: &mut IO, 169 | _: &CreateTableCommit, 170 | ) -> CreateTableRMCommitted { 171 | CreateTableRMCommitted {} 172 | } 173 | 174 | /// Construct `TabletContext` so a Tablet be constructed. We return the `TabletContext` 175 | /// in the `RMCommitActionData` rather than construct the Tablet here, since we do not have 176 | /// access to the `SlaveIOCtx`. 177 | fn committed_plm_inserted( 178 | &mut self, 179 | ctx: &mut SlaveContext, 180 | io_ctx: &mut IO, 181 | _: &RMCommittedPLm, 182 | ) -> TabletContext { 183 | let mut rand_seed = [0; 16]; 184 | io_ctx.rand().fill_bytes(&mut rand_seed); 185 | TabletContext { 186 | tablet_config: TabletConfig { 187 | timestamp_suffix_divisor: ctx.slave_config.timestamp_suffix_divisor, 188 | }, 189 | this_sid: ctx.this_sid.clone(), 190 | this_gid: ctx.this_sid.to_gid(), 191 | this_tid: self.tablet_group_id.clone(), 192 | sub_node_path: CTSubNodePath::Tablet(self.tablet_group_id.clone()), 193 | this_eid: ctx.this_eid.clone(), 194 | gossip: ctx.gossip.clone(), 195 | leader_map: ctx.leader_map.value().clone(), 196 | storage: GenericMVTable::new(), 197 | this_table_path: self.table_path.clone(), 198 | this_sharding_gen: Gen(0), 199 | this_tablet_key_range: self.key_range.clone(), 200 | sharding_done: true, 201 | table_schema: TableSchema { 202 | key_cols: self.key_cols.clone(), 203 | val_cols: MVM::init(self.val_cols.clone().into_iter().collect()), 204 | }, 205 | presence_timestamp: mk_t(0), 206 | verifying_writes: Default::default(), 207 | inserting_prepared_writes: Default::default(), 208 | prepared_writes: Default::default(), 209 | committed_writes: Default::default(), 210 | waiting_read_protected: Default::default(), 211 | inserting_read_protected: Default::default(), 212 | read_protected: Default::default(), 213 | waiting_locked_cols: Default::default(), 214 | inserting_locked_cols: Default::default(), 215 | ms_root_query_map: Default::default(), 216 | tablet_bundle: vec![], 217 | } 218 | } 219 | 220 | fn mk_aborted_plm( 221 | &mut self, 222 | _: &mut SlaveContext, 223 | _: &mut IO, 224 | ) -> CreateTableRMAborted { 225 | CreateTableRMAborted {} 226 | } 227 | 228 | fn aborted_plm_inserted(&mut self, _: &mut SlaveContext, _: &mut IO) {} 229 | 230 | fn reconfig_snapshot(&self) -> CreateTableRMInner { 231 | self.clone() 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /src/slave_group_create_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{ 2 | mk_cid, mk_sid, update_all_eids, CoordGroupId, EndpointId, Gen, LeadershipId, MasterIOCtx, 3 | PaxosGroupIdTrait, SlaveGroupId, 4 | }; 5 | use crate::master::{MasterContext, MasterPLm}; 6 | use crate::message as msg; 7 | use serde::{Deserialize, Serialize}; 8 | use std::collections::{BTreeMap, BTreeSet}; 9 | 10 | // ----------------------------------------------------------------------------------------------- 11 | // PLms 12 | // ----------------------------------------------------------------------------------------------- 13 | 14 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 15 | pub struct ConfirmCreateGroup { 16 | sid: SlaveGroupId, 17 | } 18 | 19 | // ----------------------------------------------------------------------------------------------- 20 | // SlaveGroupCreateES 21 | // ----------------------------------------------------------------------------------------------- 22 | 23 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 24 | enum State { 25 | Follower, 26 | WaitingConfirmed(BTreeSet), 27 | InsertingConfirmed, 28 | } 29 | 30 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 31 | struct SlaveGroupCreateES { 32 | create_msg: msg::CreateSlaveGroup, 33 | paxos_nodes: Vec, 34 | state: State, 35 | } 36 | 37 | impl SlaveGroupCreateES { 38 | /// Constructs an `ES`, sending out `CreateSlaveGroup` if this is the Master node. 39 | fn create( 40 | ctx: &mut MasterContext, 41 | io_ctx: &mut IO, 42 | sid: SlaveGroupId, 43 | paxos_nodes: Vec, 44 | coord_ids: Vec, 45 | ) -> SlaveGroupCreateES { 46 | // Construct the `CreateSlaveGroup` message. 47 | let create_msg = msg::CreateSlaveGroup { 48 | gossip: ctx.gossip.clone(), 49 | leader_map: ctx.leader_map.value().clone(), 50 | sid, 51 | paxos_nodes: paxos_nodes.clone(), 52 | coord_ids, 53 | }; 54 | 55 | // If this is the Leader, start the new Slave Nodes 56 | let state = if ctx.is_leader() { 57 | for eid in &paxos_nodes { 58 | io_ctx.send( 59 | eid, 60 | msg::NetworkMessage::FreeNode(msg::FreeNodeMessage::CreateSlaveGroup(create_msg.clone())), 61 | ) 62 | } 63 | State::WaitingConfirmed(BTreeSet::new()) 64 | } else { 65 | // Otherwise, start in the `Follower` state. 66 | State::Follower 67 | }; 68 | 69 | SlaveGroupCreateES { create_msg, paxos_nodes, state } 70 | } 71 | 72 | /// Handles the `ConfirmSlaveCreation` sent back by a node that successfully constructed itself. 73 | fn handle_confirm_msg( 74 | &mut self, 75 | ctx: &mut MasterContext, 76 | _: &mut IO, 77 | msg: msg::ConfirmSlaveCreation, 78 | ) { 79 | match &mut self.state { 80 | State::WaitingConfirmed(eids) => { 81 | // Add in the incoming `EndpointId`. 82 | debug_assert!(self.paxos_nodes.contains(&msg.sender_eid)); 83 | eids.insert(msg.sender_eid.clone()); 84 | 85 | // If a majority of nodes have responded, we can finish. 86 | if 2 * eids.len() > self.paxos_nodes.len() { 87 | ctx.master_bundle.plms.push(MasterPLm::ConfirmCreateGroup(ConfirmCreateGroup { 88 | sid: self.create_msg.sid.clone(), 89 | })); 90 | self.state = State::InsertingConfirmed; 91 | } 92 | } 93 | _ => {} 94 | } 95 | } 96 | 97 | /// Handles the insertion of the `ConfirmCreateGroup` PLm. 98 | fn handle_confirm_plm(&mut self, ctx: &mut MasterContext, io_ctx: &mut IO) { 99 | match &self.state { 100 | State::Follower | State::InsertingConfirmed => { 101 | // Update the GossipData 102 | let sid = &self.create_msg.sid; 103 | let paxos_nodes = &self.create_msg.paxos_nodes; 104 | ctx.gossip.update(|gossip_data| { 105 | gossip_data.slave_address_config.insert(sid.clone(), paxos_nodes.clone()) 106 | }); 107 | 108 | // Update the LeaderMap 109 | let lid = LeadershipId { gen: Gen(0), eid: paxos_nodes.get(0).unwrap().clone() }; 110 | ctx.leader_map.update(move |leader_map| { 111 | leader_map.insert(sid.to_gid(), lid); 112 | }); 113 | 114 | // Update the `all_eids` 115 | update_all_eids(&mut ctx.all_eids, &vec![], self.create_msg.paxos_nodes.clone()); 116 | 117 | if ctx.is_leader() { 118 | // Broadcast the GossipData. 119 | ctx.broadcast_gossip(io_ctx); 120 | } 121 | } 122 | State::WaitingConfirmed(_) => {} 123 | } 124 | } 125 | 126 | /// Handle the current (Master) leader changing. 127 | fn leader_changed(&mut self, ctx: &mut MasterContext, io_ctx: &mut IO) { 128 | match &self.state { 129 | State::Follower => { 130 | if ctx.is_leader() { 131 | // Broadcast `CreateSlaveGroup` and then go to `WaitingConfirmed`. 132 | for eid in &self.paxos_nodes { 133 | io_ctx.send( 134 | eid, 135 | msg::NetworkMessage::FreeNode(msg::FreeNodeMessage::CreateSlaveGroup( 136 | self.create_msg.clone(), 137 | )), 138 | ) 139 | } 140 | self.state = State::WaitingConfirmed(BTreeSet::new()) 141 | } 142 | } 143 | State::WaitingConfirmed(_) | State::InsertingConfirmed => { 144 | self.state = State::Follower; 145 | } 146 | } 147 | } 148 | 149 | /// If this node is a Follower, a copy of this `SlaveGroupCreateES` is returned. If this 150 | /// node is a Leader, then the value of this `SlaveGroupCreateES` that would result from 151 | /// losing Leadership is returned (i.e. after calling `leader_changed`). 152 | fn reconfig_snapshot(&self) -> SlaveGroupCreateES { 153 | SlaveGroupCreateES { 154 | create_msg: self.create_msg.clone(), 155 | paxos_nodes: self.paxos_nodes.clone(), 156 | state: State::Follower, 157 | } 158 | } 159 | } 160 | 161 | // ----------------------------------------------------------------------------------------------- 162 | // ES Container Functions 163 | // ----------------------------------------------------------------------------------------------- 164 | 165 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 166 | pub struct SlaveGroupCreateESS { 167 | ess: BTreeMap, 168 | } 169 | 170 | impl SlaveGroupCreateESS { 171 | pub fn new() -> SlaveGroupCreateESS { 172 | SlaveGroupCreateESS { ess: Default::default() } 173 | } 174 | 175 | // Leader-only 176 | 177 | pub fn handle_msg( 178 | &mut self, 179 | ctx: &mut MasterContext, 180 | io_ctx: &mut IO, 181 | confirm_msg: msg::ConfirmSlaveCreation, 182 | ) { 183 | if let Some(es) = self.ess.get_mut(&confirm_msg.sid) { 184 | es.handle_confirm_msg(ctx, io_ctx, confirm_msg); 185 | } 186 | } 187 | 188 | pub fn handle_new_slaves( 189 | &mut self, 190 | ctx: &mut MasterContext, 191 | io_ctx: &mut IO, 192 | new_slave_groups: BTreeMap, Vec)>, 193 | ) { 194 | // Construct `SlaveGroupCreateES`s accordingly 195 | for (sid, (paxos_nodes, coord_ids)) in new_slave_groups { 196 | let es = SlaveGroupCreateES::create(ctx, io_ctx, sid.clone(), paxos_nodes, coord_ids); 197 | self.ess.insert(sid, es); 198 | } 199 | } 200 | 201 | // Leader and Follower 202 | 203 | pub fn handle_plm( 204 | &mut self, 205 | ctx: &mut MasterContext, 206 | io_ctx: &mut IO, 207 | confirm_create: ConfirmCreateGroup, 208 | ) { 209 | // Here, we remove the ES and then finish it off. 210 | let mut es = self.ess.remove(&confirm_create.sid).unwrap(); 211 | es.handle_confirm_plm(ctx, io_ctx); 212 | } 213 | 214 | pub fn handle_lc(&mut self, ctx: &mut MasterContext, io_ctx: &mut IO) { 215 | for (_, es) in &mut self.ess { 216 | es.leader_changed(ctx, io_ctx); 217 | } 218 | } 219 | 220 | /// Add in the `SlaveGroupCreateES` where at least `ReconfigSlaveGroup` PLm has been inserted. 221 | pub fn handle_reconfig_snapshot(&self) -> SlaveGroupCreateESS { 222 | let mut create_ess = SlaveGroupCreateESS::new(); 223 | for (qid, es) in &self.ess { 224 | let es = es.reconfig_snapshot(); 225 | create_ess.ess.insert(qid.clone(), es); 226 | } 227 | create_ess 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /src/bin/paxos/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(map_first_last)] 2 | 3 | use crate::simulation::{SimConfig, SimpleBundle, Simulation}; 4 | use rand::RngCore; 5 | use rand_xorshift::XorShiftRng; 6 | use runiversal::common::{Gen, LeadershipId}; 7 | use runiversal::message as msg; 8 | use std::iter::FromIterator; 9 | 10 | mod simulation; 11 | 12 | fn main() { 13 | test(); 14 | } 15 | 16 | /** 17 | 18 | Next tests 19 | 1. We might not be exercising retries because of how long they take. We 20 | should reduce the timer event times when doing simulation tests a little 21 | so it is not as expensive. 22 | 23 | */ 24 | 25 | fn test() { 26 | println!("test_basic"); 27 | test_basic(); 28 | 29 | println!("test_leader_partition"); 30 | test_leader_partition(); 31 | 32 | println!("test_general_partition"); 33 | test_general_partition(); 34 | 35 | println!("Test Successful!"); 36 | } 37 | 38 | fn default_config() -> SimConfig { 39 | SimConfig { target_temp_blocked_frac: 0.5, max_pause_time_ms: 2000 } 40 | } 41 | 42 | fn print_stats(sim: &Simulation) { 43 | for (_, paxos_data) in &sim.paxos_data { 44 | println!("Size: {:#?}", sim.max_common_index + paxos_data.paxos_log.len()); 45 | } 46 | } 47 | 48 | // ----------------------------------------------------------------------------------------------- 49 | // test_basic 50 | // ----------------------------------------------------------------------------------------------- 51 | 52 | /// This is a basic test with random queues being paused temporarily randomly. 53 | fn test_basic() { 54 | let mut sim = Simulation::new([0; 16], 5, default_config()); 55 | sim.simulate_n_ms(1000); 56 | assert!(sim.global_paxos_log.len() > 0, "Failed! No elements in Global Paxos Log.",); 57 | print_stats(&sim); 58 | } 59 | 60 | // ----------------------------------------------------------------------------------------------- 61 | // test_leader_partition 62 | // ----------------------------------------------------------------------------------------------- 63 | 64 | /// Run the simulation for a bit, find the latest leader, partition it out, and then 65 | /// run the simulation some more. Verify that more `PLEntry`s were added. 66 | fn test_leader_partition() { 67 | let mut sim = Simulation::new([0; 16], 5, default_config()); 68 | sim.simulate_n_ms(10000); 69 | print_stats(&sim); 70 | 71 | // Find the latest Leader 72 | let lid = LeadershipId { gen: Gen(0), eid: sim.address_config[0].clone() }; 73 | let mut latest_leader_changed = msg::LeaderChanged { lid }; 74 | for entry in sim.global_paxos_log.iter().rev() { 75 | if let msg::PLEntry::LeaderChanged(leader_changed) = entry { 76 | latest_leader_changed = leader_changed.clone(); 77 | break; 78 | } 79 | } 80 | 81 | // Partition out this Leader 82 | let leader_eid = latest_leader_changed.lid.eid; 83 | let eids = sim.address_config.clone(); 84 | for eid in eids { 85 | sim.block_queue_permanently(leader_eid.clone(), eid.clone()); 86 | sim.block_queue_permanently(eid, leader_eid.clone()); 87 | } 88 | 89 | let old_log_len = sim.global_paxos_log.len(); 90 | sim.simulate_n_ms(20000); 91 | 92 | assert!( 93 | old_log_len < sim.global_paxos_log.len(), 94 | "Failed! No new log messages where added since the old Leader died.", 95 | ); 96 | 97 | print_stats(&sim); 98 | } 99 | 100 | // ----------------------------------------------------------------------------------------------- 101 | // test_general_partition 102 | // ----------------------------------------------------------------------------------------------- 103 | 104 | /// Generates a partition out of `indicies, where at least one partition has the 105 | /// majority of nodes (as Paxos requires). 106 | fn gen_partition(rand: &mut XorShiftRng, mut indices: Vec) -> Vec> { 107 | assert!(indices.len() > 0); 108 | 109 | fn add_partition( 110 | rand: &mut XorShiftRng, 111 | partition: &mut Vec>, 112 | rem_indices: &mut Vec, 113 | new_partition_len: usize, 114 | ) { 115 | assert!(new_partition_len <= rem_indices.len()); 116 | let mut new_partition = Vec::::new(); 117 | while new_partition.len() < new_partition_len { 118 | let r = rand.next_u32() as usize % rem_indices.len(); 119 | new_partition.push(rem_indices.remove(r)); 120 | } 121 | partition.push(new_partition); 122 | } 123 | 124 | let mut partition = Vec::>::new(); 125 | // Construct the majority partition 126 | let majority_partition_len = indices.len() / 2 + 1; 127 | add_partition(rand, &mut partition, &mut indices, majority_partition_len); 128 | // Construct other partitions 129 | while indices.len() > 0 { 130 | let next_partition_len = (rand.next_u32() as usize % indices.len()) + 1; 131 | add_partition(rand, &mut partition, &mut indices, next_partition_len); 132 | } 133 | 134 | partition 135 | } 136 | 137 | /// Here, `partition` is a partition of the indices `sim.address_config`. This function 138 | /// permamently blocks queues between these partitions. 139 | fn block_partition(sim: &mut Simulation, partition: &Vec>) { 140 | let eids = sim.address_config.clone(); 141 | for i in 0..partition.len() { 142 | for j in 0..partition.len() { 143 | if i != j { 144 | for idx_i in partition.get(i).unwrap() { 145 | for idx_j in partition.get(j).unwrap() { 146 | let eid_i = eids.get(*idx_i).unwrap().clone(); 147 | let eid_j = eids.get(*idx_j).unwrap().clone(); 148 | sim.block_queue_permanently(eid_i, eid_j); 149 | } 150 | } 151 | } 152 | } 153 | } 154 | } 155 | 156 | /// Here, `partition` is a partition of the indices `sim.address_config`. This function 157 | /// permamently unblocks queues between these partitions. 158 | fn unblock_partition(sim: &mut Simulation, partition: &Vec>) { 159 | let eids = sim.address_config.clone(); 160 | for i in 0..partition.len() { 161 | for j in 0..partition.len() { 162 | if i != j { 163 | for idx_i in partition.get(i).unwrap() { 164 | for idx_j in partition.get(j).unwrap() { 165 | let eid_i = eids.get(*idx_i).unwrap().clone(); 166 | let eid_j = eids.get(*idx_j).unwrap().clone(); 167 | sim.unblock_queue_permanently(eid_i, eid_j); 168 | } 169 | } 170 | } 171 | } 172 | } 173 | } 174 | 175 | fn verify_leadership_changes(sim: &Simulation, expected_changes: u32) { 176 | let lid = LeadershipId { gen: Gen(0), eid: sim.address_config[0].clone() }; 177 | // Verify that there were Leadership changes. 178 | let mut num_leader_changes = 0; 179 | for entry in sim.global_paxos_log.iter() { 180 | if let msg::PLEntry::LeaderChanged(leader_changed) = entry { 181 | assert_ne!(lid, leader_changed.lid); 182 | num_leader_changes += 1; 183 | } 184 | } 185 | 186 | assert!( 187 | num_leader_changes >= expected_changes, 188 | "Test Failed! Not enough LeaderChanges occurred: {:?} instead of {:?}.", 189 | num_leader_changes, 190 | expected_changes 191 | ); 192 | } 193 | 194 | /// Loop around for some time, creating and changing network partition. Verify that 195 | /// the algorithm is safe and that new `PLEntry`s constantly get added. 196 | fn test_general_partition() { 197 | let sim_config = SimConfig { target_temp_blocked_frac: 0.0, max_pause_time_ms: 0 }; 198 | let mut sim = Simulation::new([0; 16], 5, sim_config); 199 | let all_indices: Vec = (0..sim.address_config.len()).collect(); 200 | 201 | // Verification metadata 202 | let mut num_unlive_periods = 0; 203 | let mut num_periods = 0; 204 | let mut last_log_len = 0; 205 | 206 | // Simulation 207 | let mut cur_time = 0; 208 | let mut cur_partition = gen_partition(&mut sim.rand, all_indices.clone()); 209 | while cur_time < 200000 { 210 | let time_for_partition = sim.rand.next_u32() as usize % 15000; 211 | sim.simulate_n_ms(time_for_partition as u32); 212 | cur_time += time_for_partition; 213 | 214 | // Update verification metadata 215 | if sim.global_paxos_log.len() == last_log_len { 216 | num_unlive_periods += 1; 217 | } 218 | num_periods += 1; 219 | last_log_len = sim.global_paxos_log.len(); 220 | 221 | // Change the partition 222 | unblock_partition(&mut sim, &cur_partition); 223 | cur_partition = gen_partition(&mut sim.rand, all_indices.clone()); 224 | block_partition(&mut sim, &cur_partition); 225 | } 226 | 227 | // Make simple assertions about Verification Metadata. 228 | // Check if the fraction of unlive periods to live periods is low enough. 229 | assert!( 230 | (num_unlive_periods as f32) < 0.3 * num_periods as f32, 231 | "Failed! There were too many unlive periods: {:?} of {:?}.", 232 | num_unlive_periods, 233 | num_periods 234 | ); 235 | 236 | // Verify that there were Leadership changes. 237 | verify_leadership_changes(&sim, 5); 238 | print_stats(&sim); 239 | } 240 | -------------------------------------------------------------------------------- /run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Extract 4 | A1="${1:-}" 5 | A2="${2:-}" 6 | A3="${3:-}" 7 | A4="${4:-}" 8 | 9 | # The number of Slave Groups to instantiate (e.g. from the `start` subcommand) 10 | readonly NUM_SLAVES_GROUPS=2 11 | readonly NUM_SLAVES=$((5 * NUM_SLAVES_GROUPS)) 12 | 13 | is_help_flag() { 14 | [[ "$1" = "-h" || "$1" = "--help" ]] 15 | } 16 | 17 | function masters() { 18 | # Start the image by running transact, and have it be in the background. 19 | for ((i = 10; i < 15; i++)); do 20 | CONTAINER_NAME="runiversal${i}"; 21 | docker run --cap-add=NET_ADMIN -d --name="${CONTAINER_NAME}" --ip 172.20.0."${i}" --network=runiversal-net runiversal scripts/transact -i 172.20.0."${i}" -t masterbootup > /dev/null & 22 | done 23 | wait 24 | } 25 | 26 | function slaves() { 27 | # Start the image by running transact, and have it be in the background. 28 | for ((i = 15; i < $((15 + NUM_SLAVES)); i++)); do 29 | CONTAINER_NAME="runiversal${i}"; 30 | docker run --cap-add=NET_ADMIN -d --name="${CONTAINER_NAME}" --ip 172.20.0."${i}" --network=runiversal-net runiversal scripts/transact -i 172.20.0."${i}" -t freenode -f newslave -e 172.20.0.10 > /dev/null & 31 | done 32 | wait 33 | } 34 | 35 | function client_init () { 36 | # Start the image by running the client, but only to initialize the Master Group. 37 | docker run --cap-add=NET_ADMIN -it --name=rclient --ip 172.20.0.2 --network=runiversal-net runiversal scripts/client -i 172.20.0.2 -m '172.20.0.10 172.20.0.11 172.20.0.12 172.20.0.13 172.20.0.14' > /dev/null; 38 | # After we exit the terminal, clean up the container. 39 | docker container rm rclient > /dev/null; 40 | } 41 | 42 | function client() { 43 | # Start the image by running the client, and start an interactive terminal. 44 | docker run --cap-add=NET_ADMIN -it --name=rclient --ip 172.20.0.2 --network=runiversal-net runiversal scripts/client -i 172.20.0.2 -e 172.20.0.10; 45 | # After we exit the terminal, clean up the container. 46 | docker container rm rclient; 47 | } 48 | 49 | if [[ "$A1" = "setup" ]]; then 50 | if is_help_flag "$A2"; then 51 | echo "Usage: ./run setup 52 | 53 | Setup a bridge network and the initial Docker image." 54 | exit 0 55 | fi 56 | 57 | # This should be run once after installing docker to create the docker 58 | # network, the base image, etc. 59 | docker network create --subnet=172.20.0.0/16 runiversal-net 60 | docker build -t runiversal -f Dockerfile.init . 61 | docker build -t runiversal . 62 | 63 | elif [[ "$A1" = "build" ]]; then 64 | if is_help_flag "$A2"; then 65 | echo "Usage: ./run build 66 | 67 | Do an incremental build based on the prior Docker image." 68 | exit 0 69 | fi 70 | 71 | docker build -t runiversal . 72 | 73 | # Spin a client and turn the current shell into an interactive CLI. 74 | elif [[ "$A1" = "new_client" ]]; then 75 | if is_help_flag "$A2"; then 76 | echo "Usage: ./run new_client 77 | 78 | Create a client container, turning the current shell into an interactive session. 79 | 80 | E.g. \`./run new_client 3 10\` will create a docker container named \`rclient3\` with 81 | IP \`172.20.0.3\` who will register itself with the system by contacting \`172.20.0.10\`, 82 | which needs to be the current leader of the Master group." 83 | exit 0 84 | fi 85 | 86 | docker run --cap-add=NET_ADMIN -it --name=rclient"$A2" --ip 172.20.0."$A2" --network=runiversal-net runiversal scripts/client -i 172.20.0."$A2" -e 172.20.0."$A3" 87 | 88 | # Spin up a slave in the background (starting as a FreeNode) 89 | elif [[ "$A1" = "new_node" ]]; then 90 | if is_help_flag "$A2"; then 91 | echo "Usage: ./run new_node 92 | 93 | Create and register a new node with the system for use as either a replacement 94 | in case an existing node goes down, or to expand the system to scale it up. 95 | 96 | Values for : 97 | reconfig Create a free node that is used to replace an existing node that died. 98 | new_slave Create a free node that is used to create a new SlaveGroup if enough 99 | such nodes have been registered. 100 | 101 | E.g. \`./run new_node 31 reconfig 10\` will create a docker container named \`runiversal31\` with 102 | IP \`172.20.0.31\` who will register itself with the system as a node meant for reconfiguration by 103 | contacting \`172.20.0.10\`, which needs to be the current leader of the Master group." 104 | exit 0 105 | fi 106 | 107 | docker run --cap-add=NET_ADMIN -d --name=runiversal"$A2" --ip 172.20.0."$A2" --network=runiversal-net runiversal scripts/transact -i 172.20.0."$A2" -t freenode -f "$A3" -e 172.20.0."$A4" 108 | 109 | elif [[ "$A1" = "masters" ]]; then 110 | masters 111 | 112 | elif [[ "$A1" = "slaves" ]]; then 113 | slaves 114 | 115 | elif [[ "$A1" = "client_init" ]]; then 116 | client_init 117 | 118 | elif [[ "$A1" = "client" ]]; then 119 | client 120 | 121 | # Spin up the Master Group, `NUM_SLAVES_GROUPS` number of Slave Groups, and a an interactive client. 122 | elif [[ "$A1" = "start" ]]; then 123 | if is_help_flag "$A2"; then 124 | echo "Usage: ./run start 125 | 126 | Instantiate the system locally using Docker." 127 | exit 0 128 | fi 129 | 130 | masters 131 | client_init 132 | slaves 133 | client 134 | 135 | # Clean only what is allocated by the `start` 136 | elif [[ "$A1" = "clean" ]]; then 137 | CONTAINERS=("rclient"); 138 | for ((i = 10; i < $((15 + NUM_SLAVES)); i++)); do 139 | CONTAINERS+=("runiversal${i}"); 140 | done 141 | docker kill "${CONTAINERS[@]}" &> /dev/null; 142 | docker container rm "${CONTAINERS[@]}" &> /dev/null; 143 | 144 | # Do a deep clean, where we just kill all `runiversal` and `rclient` nodes that we might have spun up. 145 | elif [[ "$A1" = "dclean" ]]; then 146 | if is_help_flag "$A2"; then 147 | echo "Usage: ./run dclean 148 | 149 | Stop and delete all Docker containers associated with rUniversalDB. 'd' stands for 'deep'." 150 | exit 0 151 | fi 152 | 153 | CONTAINERS=("rclient"); 154 | for ((i = 2; i < 10; i++)); do 155 | CONTAINERS+=("rclient${i}"); 156 | done 157 | for ((i = 10; i < 45; i++)); do 158 | CONTAINERS+=("runiversal${i}"); 159 | done 160 | docker kill "${CONTAINERS[@]}" &> /dev/null; 161 | docker container rm "${CONTAINERS[@]}" &> /dev/null; 162 | 163 | # Clean up single `runiversal` node. 164 | elif [[ "$A1" = "nclean" ]]; then 165 | if is_help_flag "$A2"; then 166 | echo "Usage: ./run nclean 167 | 168 | Stop and delete a specific system node. 169 | 170 | E.g. \`./run nclean 20\` will stop and remove the docker container \`runiversal20\`." 171 | exit 0 172 | fi 173 | 174 | CONTAINER="runiversal${A2}" 175 | docker kill "${CONTAINER}" &> /dev/null; 176 | docker container rm "${CONTAINER}" &> /dev/null; 177 | 178 | # Clean up single `rclient` node. 179 | elif [[ "$A1" = "cclean" ]]; then 180 | if is_help_flag "$A2"; then 181 | echo "Usage: ./run cclean 182 | 183 | Stop and delete a specific client node. 184 | 185 | E.g. \`./run cclean 3\` will stop and remove the docker container \`rclient3\`." 186 | exit 0 187 | fi 188 | 189 | CONTAINER="rclient${A2}" 190 | docker kill "${CONTAINER}" &> /dev/null; 191 | docker container rm "${CONTAINER}" &> /dev/null; 192 | 193 | elif [[ "$A1" = "test" ]]; then 194 | if is_help_flag "$A2"; then 195 | echo "Usage: ./run test 196 | 197 | Run the main simulation tests." 198 | exit 0 199 | fi 200 | 201 | cargo run --release --bin simtest -- -i 8 -r 80 202 | 203 | elif [[ "$A1" = "test_all" ]]; then 204 | if is_help_flag "$A2"; then 205 | echo "Usage: ./run test_all 206 | 207 | Run all simulation tests." 208 | exit 0 209 | fi 210 | 211 | cargo run --release --bin paxos; 212 | cargo run --release --bin paxos2pc_sim; 213 | cargo run --release --bin simtest -- -i 8 -r 80; 214 | 215 | elif [[ "$A1" = "bench" ]]; then 216 | if is_help_flag "$A2"; then 217 | echo "Usage: ./run bench 218 | 219 | Do a small run of the main simulation tests to guage performance regressions." 220 | exit 0 221 | fi 222 | 223 | cargo run --release --bin simtest -- -i 4 -r 8 224 | 225 | else 226 | if ! is_help_flag "$A1"; then 227 | echo "unrecognized command: $A1 228 | " 229 | fi 230 | 231 | echo "Usage: ./run [COMMAND] 232 | 233 | A tool to help build, run, and test rUniversalDB locally. 234 | 235 | Commands: 236 | setup Setup a bridge network and the initial Docker image. 237 | build Do an incremental build based on the prior Docker image. 238 | new_client Create a client container, turning the current shell into an interactive session. 239 | new_node Create a new node for the system to use if needed. 240 | start Instantiate the system locally using Docker. 241 | dclean Stop and delete all Docker container associated with rUniverslDB. 242 | nclean Stop and delete a specific system node. 243 | cclean Stop and delete a specific client node. 244 | test Run the main simulation tests. 245 | test_all Run all simulation tests. 246 | bench Do a small run of the main simulation test to guage performance regressions." 247 | 248 | fi 249 | -------------------------------------------------------------------------------- /src/ms_table_delete_es.rs: -------------------------------------------------------------------------------- 1 | use crate::col_usage::{col_collecting_cb, col_ref_collecting_cb, QueryIterator}; 2 | use crate::common::{mk_qid, ColName, CoreIOCtx, OrigP, QueryESResult, WriteRegion}; 3 | use crate::common::{ 4 | ColValN, ContextRow, PrimaryKey, QueryId, TablePath, TableView, TransTableName, 5 | }; 6 | use crate::expression::is_true; 7 | use crate::gr_query_es::{GRQueryConstructorView, GRQueryES}; 8 | use crate::message as msg; 9 | use crate::ms_table_es::{GeneralQueryES, MSTableES, SqlQueryInner}; 10 | use crate::server::{evaluate_delete, mk_eval_error, ContextConstructor, GeneralColumnRef}; 11 | use crate::sql_ast::proc; 12 | use crate::storage::{GenericTable, MSStorageView}; 13 | use crate::table_read_es::compute_read_region; 14 | use crate::tablet::{ 15 | compute_subqueries, MSQueryES, RequestedReadProtected, StorageLocalTable, TPESAction, 16 | TabletContext, 17 | }; 18 | use std::collections::BTreeSet; 19 | use std::iter::FromIterator; 20 | 21 | // ----------------------------------------------------------------------------------------------- 22 | // MSTableDeleteES 23 | // ----------------------------------------------------------------------------------------------- 24 | 25 | pub type MSTableDeleteES = MSTableES; 26 | 27 | #[derive(Debug)] 28 | pub struct DeleteInner { 29 | sql_query: proc::Delete, 30 | } 31 | 32 | impl DeleteInner { 33 | pub fn new(sql_query: proc::Delete) -> Self { 34 | DeleteInner { sql_query } 35 | } 36 | } 37 | 38 | impl SqlQueryInner for DeleteInner { 39 | fn table_path(&self) -> &TablePath { 40 | &self.sql_query.table.table_path 41 | } 42 | 43 | fn request_region_locks( 44 | &mut self, 45 | ctx: &mut TabletContext, 46 | io_ctx: &mut IO, 47 | es: &GeneralQueryES, 48 | ) -> Result { 49 | // Collect all `ColNames` of this table that all `ColumnRefs` refer to. 50 | let mut safe_present_cols = Vec::::new(); 51 | QueryIterator::new().iterate_delete( 52 | &mut col_collecting_cb(&self.sql_query.table.alias, &mut safe_present_cols), 53 | &self.sql_query, 54 | ); 55 | 56 | // Compute the ReadRegion 57 | let read_region = compute_read_region( 58 | &ctx.table_schema.key_cols, 59 | &ctx.this_tablet_key_range, 60 | &es.context, 61 | &self.sql_query.selection, 62 | &self.sql_query.table.alias, 63 | safe_present_cols, 64 | vec![], 65 | ); 66 | 67 | // Compute the WriteRegion 68 | let write_region = WriteRegion { 69 | row_region: read_region.row_region.clone(), 70 | presence: true, 71 | val_col_region: vec![], 72 | }; 73 | 74 | // Verify that we have WriteRegion Isolation with Subsequent Reads. We abort 75 | // if we don't, and we amend this MSQuery's VerifyingReadWriteRegions if we do. 76 | if !ctx.check_write_region_isolation(&write_region, &es.timestamp) { 77 | Err(msg::QueryError::WriteRegionConflictWithSubsequentRead) 78 | } else { 79 | // Move the MSTableDeleteES to the Pending state with the given ReadRegion. 80 | let protect_qid = mk_qid(io_ctx.rand()); 81 | 82 | // Add a ReadRegion to the `m_waiting_read_protected` and the 83 | // WriteRegion into `m_write_protected`. 84 | let verifying = ctx.verifying_writes.get_mut(&es.timestamp).unwrap(); 85 | verifying.m_waiting_read_protected.insert(RequestedReadProtected { 86 | orig_p: OrigP::new(es.query_id.clone()), 87 | query_id: protect_qid.clone(), 88 | read_region, 89 | }); 90 | verifying.m_write_protected.insert(write_region); 91 | 92 | Ok(protect_qid) 93 | } 94 | } 95 | 96 | fn compute_subqueries( 97 | &mut self, 98 | ctx: &mut TabletContext, 99 | io_ctx: &mut IO, 100 | es: &GeneralQueryES, 101 | ms_query_es: &mut MSQueryES, 102 | ) -> Vec { 103 | compute_subqueries( 104 | GRQueryConstructorView { 105 | root_query_path: &es.root_query_path, 106 | timestamp: &es.timestamp, 107 | sql_query: &self.sql_query, 108 | query_plan: &es.query_plan, 109 | query_id: &es.query_id, 110 | context: &es.context, 111 | }, 112 | io_ctx.rand(), 113 | StorageLocalTable::new( 114 | &ctx.table_schema, 115 | &es.timestamp, 116 | &self.sql_query.table, 117 | &ctx.this_tablet_key_range, 118 | &self.sql_query.selection, 119 | MSStorageView::new( 120 | &ctx.storage, 121 | &ctx.table_schema, 122 | &ms_query_es.update_views, 123 | es.tier.clone(), 124 | ), 125 | ), 126 | ) 127 | } 128 | 129 | fn finish( 130 | &mut self, 131 | ctx: &mut TabletContext, 132 | _: &mut IO, 133 | es: &GeneralQueryES, 134 | (children, subquery_results): ( 135 | Vec<(Vec, Vec)>, 136 | Vec>, 137 | ), 138 | ms_query_es: &mut MSQueryES, 139 | ) -> Option { 140 | // Create the ContextConstructor. 141 | let context_constructor = ContextConstructor::new( 142 | es.context.context_schema.clone(), 143 | StorageLocalTable::new( 144 | &ctx.table_schema, 145 | &es.timestamp, 146 | &self.sql_query.table, 147 | &ctx.this_tablet_key_range, 148 | &self.sql_query.selection, 149 | MSStorageView::new( 150 | &ctx.storage, 151 | &ctx.table_schema, 152 | &ms_query_es.update_views, 153 | es.tier.clone(), 154 | ), 155 | ), 156 | children, 157 | ); 158 | 159 | // These are all of the `ColNames` that we need in order to evaluate the Delete. 160 | // This consists of all Top-Level Columns for every expression, as well as all Key 161 | // Columns (since they are included in the resulting table). 162 | let mut top_level_cols_set = BTreeSet::::new(); 163 | let cur_alias = &self.sql_query.table.alias; 164 | top_level_cols_set.extend(ctx.table_schema.get_key_col_refs(cur_alias)); 165 | QueryIterator::new_top_level() 166 | .iterate_delete(&mut col_ref_collecting_cb(&mut top_level_cols_set), &self.sql_query); 167 | let top_level_col_names = Vec::from_iter(top_level_cols_set.into_iter()); 168 | let top_level_extra_col_refs = 169 | Vec::from_iter(top_level_col_names.iter().map(|c| GeneralColumnRef::Named(c.clone()))); 170 | 171 | // Setup the TableView that we are going to return and the UpdateView that we're going 172 | // to hold in the MSQueryES. 173 | let mut res_table_view = TableView::new(); 174 | let mut update_view = GenericTable::new(); 175 | 176 | // Finally, iterate over the Context Rows of the subqueries and compute the final values. 177 | let eval_res = context_constructor.run( 178 | &es.context.context_rows, 179 | top_level_extra_col_refs, 180 | &mut |context_row_idx: usize, 181 | top_level_col_vals: Vec, 182 | contexts: Vec<(ContextRow, usize)>, 183 | count: u64| { 184 | assert_eq!(context_row_idx, 0); // Recall there is only one ContextRow for Updates. 185 | 186 | // First, we extract the subquery values using the child Context indices. 187 | let mut subquery_vals = Vec::::new(); 188 | for (subquery_idx, (_, child_context_idx)) in contexts.iter().enumerate() { 189 | let val = subquery_results.get(subquery_idx).unwrap().get(*child_context_idx).unwrap(); 190 | subquery_vals.push(val.clone()); 191 | } 192 | 193 | // Now, we evaluate all expressions in the SQL query and amend the 194 | // result to this TableView (if the WHERE clause evaluates to true). 195 | let evaluated_delete = evaluate_delete( 196 | &self.sql_query, 197 | &top_level_col_names, 198 | &top_level_col_vals, 199 | &subquery_vals, 200 | )?; 201 | if is_true(&evaluated_delete.selection)? { 202 | // This means that the current row should be selected for the result. 203 | let mut res_row = Vec::::new(); 204 | 205 | // We reconstruct the PrimaryKey 206 | let mut primary_key = PrimaryKey { cols: vec![] }; 207 | let cur_alias = &self.sql_query.table.alias; 208 | for key_col in &ctx.table_schema.get_key_col_refs(cur_alias) { 209 | let idx = top_level_col_names.iter().position(|col| key_col == col).unwrap(); 210 | let col_val = top_level_col_vals.get(idx).unwrap().clone(); 211 | res_row.push(col_val.clone()); 212 | primary_key.cols.push(col_val.unwrap()); 213 | } 214 | 215 | // Amend the UpdateView to delete the PrimaryKey 216 | update_view.insert((primary_key, None), None); 217 | }; 218 | Ok(()) 219 | }, 220 | ); 221 | 222 | match eval_res { 223 | Ok(()) => { 224 | // Amend the `update_view` in the MSQueryES. 225 | ms_query_es.update_views.insert(es.tier.clone() - 1, update_view); 226 | 227 | // Signal Success and return the data. 228 | Some(TPESAction::Success(QueryESResult { 229 | result: vec![res_table_view], 230 | new_rms: es.new_rms.iter().cloned().collect(), 231 | })) 232 | } 233 | Err(eval_error) => Some(TPESAction::QueryError(mk_eval_error(eval_error))), 234 | } 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /src/shard_snapshot_es.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{ 2 | CTSubNodePath, CoreIOCtx, PaxosGroupId, PaxosGroupIdTrait, QueryId, RemoteLeaderChangedPLm, 3 | SlaveIOCtx, TNodePath, TabletGroupId, 4 | }; 5 | use crate::expression::range_might_intersect_row_region; 6 | use crate::finish_query_rm_es::FinishQueryRMES; 7 | use crate::message as msg; 8 | use crate::server::ServerContextBase; 9 | use crate::shard_split_tm_es::STRange; 10 | use crate::slave::{SlaveContext, SlavePLm}; 11 | use crate::storage::{compute_range_storage, remove_range, GenericMVTable}; 12 | use crate::tablet::{ShardingSnapshot, TabletConfig, TabletContext, TabletForwardMsg, TabletPLm}; 13 | use serde::{Deserialize, Serialize}; 14 | use std::collections::{BTreeMap, Bound}; 15 | 16 | // ----------------------------------------------------------------------------------------------- 17 | // PLms 18 | // ----------------------------------------------------------------------------------------------- 19 | 20 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 21 | pub struct ShardingConfirmedPLm { 22 | query_id: QueryId, 23 | } 24 | 25 | // ----------------------------------------------------------------------------------------------- 26 | // ShardingSnapshotES 27 | // ----------------------------------------------------------------------------------------------- 28 | 29 | pub enum ShardingSnapshotAction { 30 | Wait, 31 | Exit, 32 | } 33 | 34 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 35 | enum State { 36 | Follower, 37 | WaitingPreparedWrites, 38 | ShardingSnapshotSent, 39 | InsertingShardingConfirmed, 40 | } 41 | 42 | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 43 | pub struct ShardingSnapshotES { 44 | pub query_id: QueryId, 45 | /// The Target to send the `ShardingSnapshot` to. 46 | target: STRange, 47 | /// If this is `true`, the snapshot is sent to Slave to create the `target`. Otherwise, 48 | /// the `target` already exists. 49 | is_new: bool, 50 | state: State, 51 | } 52 | 53 | impl ShardingSnapshotES { 54 | pub fn create_split( 55 | ctx: &mut TabletContext, 56 | io_ctx: &mut IO, 57 | finish_query_ess: &BTreeMap, 58 | query_id: QueryId, 59 | target: STRange, 60 | ) -> ShardingSnapshotES { 61 | let mut es = ShardingSnapshotES { query_id, target, is_new: false, state: State::Follower }; 62 | es.start(ctx, io_ctx, finish_query_ess); 63 | es 64 | } 65 | 66 | fn start( 67 | &mut self, 68 | ctx: &mut TabletContext, 69 | io_ctx: &mut IO, 70 | finish_query_ess: &BTreeMap, 71 | ) -> ShardingSnapshotAction { 72 | if ctx.is_leader() { 73 | self.advance_prepared(ctx, io_ctx, finish_query_ess); 74 | } else { 75 | self.state = State::Follower; 76 | } 77 | 78 | ShardingSnapshotAction::Wait 79 | } 80 | 81 | fn send_sharding_snapshot(&mut self, ctx: &mut TabletContext, io_ctx: &mut IO) { 82 | // Construct the ShardingSnapshot 83 | let snapshot = ShardingSnapshot { 84 | this_tid: self.target.tid.clone(), 85 | this_table_path: ctx.this_table_path.clone(), 86 | this_sharding_gen: ctx.this_sharding_gen.clone(), 87 | this_table_key_range: self.target.range.clone(), 88 | storage: compute_range_storage(&ctx.storage, &self.target.range), 89 | table_schema: ctx.table_schema.clone(), 90 | presence_timestamp: ctx.presence_timestamp.clone(), 91 | committed_writes: ctx.committed_writes.clone(), 92 | read_protected: ctx.read_protected.clone(), 93 | }; 94 | 95 | // Send the Snapshot 96 | let node_path = ctx.mk_node_path(); 97 | ctx.send_to_slave_common( 98 | io_ctx, 99 | self.target.sid.clone(), 100 | msg::SlaveRemotePayload::ShardingMessage(msg::ShardingMessage { 101 | query_id: self.query_id.clone(), 102 | node_path, 103 | snapshot, 104 | }), 105 | ); 106 | } 107 | 108 | fn advance_prepared( 109 | &mut self, 110 | ctx: &mut TabletContext, 111 | io_ctx: &mut IO, 112 | finish_query_ess: &BTreeMap, 113 | ) { 114 | let ready_to_send = (|| -> bool { 115 | // We compute if all `FinishQueryESs` with old `ShardingGen` are done 116 | for (_, es) in finish_query_ess { 117 | match es { 118 | FinishQueryRMES::Committed => {} 119 | FinishQueryRMES::Aborted => {} 120 | FinishQueryRMES::Paxos2PCRMExecOuter(es) => { 121 | if es.inner.sharding_gen < ctx.this_sharding_gen { 122 | return false; 123 | } 124 | } 125 | _ => {} 126 | } 127 | } 128 | 129 | // Then, we check that all ReadRegions in `(waiting/inserting)_read_protected` are within 130 | // the new TabletKeyRange here. To do this, we simply see if ReadRegion 131 | // intersects with the part of the TabletKeyRange that is being sent off. 132 | let unpersisted_read_protected = 133 | ctx.waiting_read_protected.iter().chain(ctx.inserting_read_protected.iter()); 134 | for (_, reqs) in unpersisted_read_protected { 135 | for req in reqs { 136 | if range_might_intersect_row_region( 137 | &ctx.table_schema.key_cols, 138 | &self.target.range, 139 | &req.read_region.row_region, 140 | ) { 141 | return false; 142 | } 143 | } 144 | } 145 | 146 | true 147 | })(); 148 | 149 | // If so, construct and send the snapshot. Either way, advance the state. 150 | if ready_to_send { 151 | self.send_sharding_snapshot(ctx, io_ctx); 152 | self.state = State::ShardingSnapshotSent; 153 | } else { 154 | self.state = State::WaitingPreparedWrites; 155 | } 156 | } 157 | 158 | /// In order to figure out if all `FinishQueryES`s and `(waiting/inserting)_read_protected` 159 | /// that must be finished are finished, we check every time a `TabletBundle` is inserted. 160 | /// Note: A less wasteful scheme might be possible later. 161 | pub fn handle_bundle_processed( 162 | &mut self, 163 | ctx: &mut TabletContext, 164 | io_ctx: &mut IO, 165 | finish_query_ess: &BTreeMap, 166 | ) -> ShardingSnapshotAction { 167 | match &self.state { 168 | State::WaitingPreparedWrites => { 169 | self.advance_prepared(ctx, io_ctx, finish_query_ess); 170 | } 171 | _ => {} 172 | } 173 | ShardingSnapshotAction::Wait 174 | } 175 | 176 | pub fn handle_msg( 177 | &mut self, 178 | ctx: &mut TabletContext, 179 | confirm: msg::ShardingConfirmed, 180 | ) -> ShardingSnapshotAction { 181 | match &self.state { 182 | State::ShardingSnapshotSent => { 183 | ctx 184 | .tablet_bundle 185 | .push(TabletPLm::ShardingConfirmedPLm(ShardingConfirmedPLm { query_id: confirm.qid })); 186 | self.state = State::InsertingShardingConfirmed; 187 | } 188 | _ => { 189 | // TODO: Figure out why this debug_assert can get hit (i.e. why it is okay for it 190 | // to get hit). Removing it results in the simulation tests still passing, so there 191 | // was definitely an execution path that caused this to get hit. 192 | // debug_assert!(false); 193 | } 194 | } 195 | ShardingSnapshotAction::Wait 196 | } 197 | 198 | /// This function returns `true` iff this ES is finished. 199 | pub fn handle_plm( 200 | &mut self, 201 | ctx: &mut TabletContext, 202 | _: ShardingConfirmedPLm, 203 | ) -> ShardingSnapshotAction { 204 | match &self.state { 205 | State::InsertingShardingConfirmed | State::Follower => { 206 | // Remove all the storage data that this Tablet no longer manages. 207 | let remaining = remove_range(&mut ctx.storage, &self.target.range); 208 | debug_assert!(remaining.is_empty()); 209 | ShardingSnapshotAction::Exit 210 | } 211 | _ => { 212 | debug_assert!(false); 213 | ShardingSnapshotAction::Wait 214 | } 215 | } 216 | } 217 | 218 | pub fn handle_lc( 219 | &mut self, 220 | ctx: &mut TabletContext, 221 | io_ctx: &mut IO, 222 | finish_query_ess: &BTreeMap, 223 | ) -> ShardingSnapshotAction { 224 | match &self.state { 225 | State::Follower => { 226 | if ctx.is_leader() { 227 | self.advance_prepared(ctx, io_ctx, finish_query_ess); 228 | } 229 | } 230 | State::WaitingPreparedWrites 231 | | State::ShardingSnapshotSent 232 | | State::InsertingShardingConfirmed => self.state = State::Follower, 233 | } 234 | ShardingSnapshotAction::Wait 235 | } 236 | 237 | pub fn handle_rlc( 238 | &mut self, 239 | ctx: &mut TabletContext, 240 | io_ctx: &mut IO, 241 | remote_leader_changed: RemoteLeaderChangedPLm, 242 | ) -> ShardingSnapshotAction { 243 | match &self.state { 244 | State::ShardingSnapshotSent => { 245 | // If the Leader that changed was of the target SlaveGroupId, we resend the snapshot. 246 | if remote_leader_changed.gid == self.target.sid.to_gid() { 247 | self.send_sharding_snapshot(ctx, io_ctx); 248 | } 249 | } 250 | _ => {} 251 | } 252 | ShardingSnapshotAction::Wait 253 | } 254 | 255 | /// Construct the version of `ShardingSnapshotES` that would result by losing Leadership. 256 | pub fn reconfig_snapshot(&self) -> ShardingSnapshotES { 257 | let mut es = self.clone(); 258 | es.state = State::Follower; 259 | es 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /src/bin/simtest/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(map_first_last)] 2 | 3 | use crate::advanced_parallel_test::test_all_advanced_parallel; 4 | use crate::basic_serial_test::test_all_basic_serial; 5 | use crate::paxos_parallel_test::{ 6 | test_all_basic_parallel, test_all_paxos_parallel, ParallelTestStats, Writer, 7 | }; 8 | use crate::stats::{format_message_stats, process_stats, Stats}; 9 | use clap::{arg, App}; 10 | use rand::{RngCore, SeedableRng}; 11 | use rand_xorshift::XorShiftRng; 12 | use runiversal::test_utils::mk_seed; 13 | use std::cmp::max; 14 | use std::collections::BTreeMap; 15 | use std::panic::AssertUnwindSafe; 16 | use std::sync::mpsc; 17 | use std::sync::mpsc::Sender; 18 | 19 | #[macro_export] 20 | macro_rules! cast { 21 | ($enum:path, $expr:expr) => {{ 22 | if let $enum(item) = $expr { 23 | Ok(item) 24 | } else { 25 | Err("Could not cast the value to the desired Variant.") 26 | } 27 | }}; 28 | } 29 | 30 | mod advanced_parallel_test; 31 | mod basic_serial_test; 32 | mod paxos_parallel_test; 33 | mod serial_test_utils; 34 | mod simulation; 35 | mod stats; 36 | 37 | /** 38 | * Debugging Tips: 39 | * - We thread a global RNG through all test cases. However, in every test case, we try to 40 | * use it for nothing more than creating a new RNG by creating a random seed. The reason 41 | * for this is so that if a failure happens, we can just print the seed and then quickly 42 | * reproduce by using that seed directly to run the test case. 43 | */ 44 | 45 | fn main() { 46 | // Setup CLI parsing 47 | let matches = App::new("rUniversalDB Tests") 48 | .version("1.0") 49 | .author("Pasindu M. ") 50 | .arg( 51 | arg!(-i --instances ) 52 | .required(false) 53 | .help("Indicates if the simulation tests should be run in parallel."), 54 | ) 55 | .arg( 56 | arg!(-r --rounds ) 57 | .required(false) 58 | .help("The number of rounds to execute the parallel tests."), 59 | ) 60 | .get_matches(); 61 | 62 | // Run Serial tests in just one thread (since these are fast). 63 | let mut rand = XorShiftRng::from_seed([1; 16]); 64 | println!("Basic Serial Tests:"); 65 | test_all_basic_serial(&mut rand); 66 | println!("\n"); 67 | 68 | // Run parallel tests, potentially in multiple threads if requested. 69 | const DEFAULT_NUM_ROUNDS: u32 = 33; 70 | let rounds: u32 = if let Some(rounds) = matches.value_of("rounds") { 71 | rounds.parse().unwrap() 72 | } else { 73 | DEFAULT_NUM_ROUNDS 74 | }; 75 | 76 | if let Some(instances) = matches.value_of("instances") { 77 | let instances: u32 = instances.parse().unwrap(); 78 | execute_multi(instances, rounds); 79 | } else { 80 | execute_once(&mut rand, rounds); 81 | } 82 | } 83 | 84 | // ----------------------------------------------------------------------------------------------- 85 | // Print Utils 86 | // ----------------------------------------------------------------------------------------------- 87 | 88 | /// Trivial implementation just using `println!`. 89 | struct BasicPrintWriter {} 90 | 91 | impl Writer for BasicPrintWriter { 92 | fn println(&mut self, s: String) { 93 | println!("{}", s); 94 | } 95 | 96 | fn flush(&mut self) {} 97 | } 98 | 99 | /// Concurrent Writer for when we want multiple threads writing data. This class allows multiple 100 | /// `println` calls to be batched together and then written to the console atomically with 101 | /// `flush`. We also have `flush_error` so that if the thread errors out before it would normally 102 | /// call `flush`, then we can catch the exception and then call this function explicitly. 103 | struct ConcurrentWriter<'a> { 104 | sender: &'a Sender, 105 | print_buffer: Vec, 106 | } 107 | 108 | impl<'a> ConcurrentWriter<'a> { 109 | fn create(sender: &Sender) -> ConcurrentWriter { 110 | ConcurrentWriter { sender, print_buffer: vec![] } 111 | } 112 | 113 | fn mk_text(&mut self) -> String { 114 | let print_buffer = std::mem::take(&mut self.print_buffer); 115 | print_buffer.join("") 116 | } 117 | 118 | /// Flushes the currently bufferred string as an error, indicating that the sending 119 | /// thread encountered an error. 120 | fn flush_error(&mut self) { 121 | let text = self.mk_text(); 122 | self.sender.send(ParallelTestMessage::Error(text)).unwrap(); 123 | } 124 | } 125 | 126 | impl<'a> Writer for ConcurrentWriter<'a> { 127 | fn println(&mut self, s: String) { 128 | self.print_buffer.push(format!("{}\n", s)); 129 | } 130 | 131 | /// Flushes the currently bufferred string normally. 132 | fn flush(&mut self) { 133 | let text = self.mk_text(); 134 | self.sender.send(ParallelTestMessage::PrintMessage(text)).unwrap(); 135 | } 136 | } 137 | 138 | // ----------------------------------------------------------------------------------------------- 139 | // Parallel Simulation Tests 140 | // ----------------------------------------------------------------------------------------------- 141 | 142 | /// The message sent from a the test executor threads to the coordinator thread 143 | /// (i.e the main thread). 144 | enum ParallelTestMessage { 145 | PrintMessage(String), 146 | Error(String), 147 | Done((ParallelTestStats, Vec)), 148 | } 149 | 150 | /// Execute parallel tests in a single thread. 151 | fn execute_once(rand: &mut XorShiftRng, rounds: u32) { 152 | let mut writer = BasicPrintWriter {}; 153 | println!("Paxos Parallel Tests:"); 154 | test_all_paxos_parallel(rand, &mut writer, rounds); 155 | println!("\n"); 156 | println!("Basic Parallel Tests:"); 157 | test_all_basic_parallel(rand, &mut writer, rounds); 158 | println!("\n"); 159 | } 160 | 161 | /// Execute parallel tests in multiple threads. 162 | fn execute_multi(instances: u32, rounds: u32) { 163 | let (sender, receiver) = mpsc::channel::(); 164 | 165 | // Create `instances` number of threads to run the test in parallel. 166 | for i in 0..instances { 167 | let mut seed: [u8; 16] = [0; 16]; 168 | seed[0] = i as u8; 169 | let sender = sender.clone(); 170 | std::thread::spawn(move || { 171 | let mut writer = ConcurrentWriter::create(&sender); 172 | let mut rand = XorShiftRng::from_seed(seed); 173 | 174 | // Catch any panics or errors that happen inside 175 | let result = std::panic::catch_unwind(AssertUnwindSafe(|| { 176 | println!("Paxos Parallel Tests:"); 177 | let parallel_stats = test_all_paxos_parallel(&mut rand, &mut writer, rounds); 178 | println!("\n"); 179 | println!("Basic Parallel Tests:"); 180 | let stats_basic = test_all_basic_parallel(&mut rand, &mut writer, rounds); 181 | println!("\n"); 182 | 183 | (parallel_stats, stats_basic) 184 | })); 185 | 186 | // If the above ended with an error, we flush the last of whatever was written 187 | // as an error. Otherwise, we flush it normally and send off the results. 188 | match result { 189 | Ok(done) => { 190 | writer.flush(); 191 | sender.send(ParallelTestMessage::Done(done)).unwrap(); 192 | } 193 | Err(_) => writer.flush_error(), 194 | } 195 | }); 196 | } 197 | 198 | // Drop the original sender to avoid blocking the following `recv` call forever. 199 | drop(sender); 200 | 201 | let mut parallel_stats_acc = Vec::::new(); 202 | let mut basic_stats_acc = Vec::>::new(); 203 | 204 | // Receive data until there are no more `senders` in existance; i.e. when all 205 | // threads above have finished. 206 | while let Ok(result) = receiver.recv() { 207 | match result { 208 | ParallelTestMessage::PrintMessage(string) => println!("{}", string), 209 | ParallelTestMessage::Error(string) => { 210 | println!("{}", string); 211 | println!("Terminating..."); 212 | // Terminate all testing. 213 | return; 214 | } 215 | ParallelTestMessage::Done((parallel_stats, basic_stats)) => { 216 | parallel_stats_acc.push(parallel_stats); 217 | basic_stats_acc.push(basic_stats); 218 | } 219 | } 220 | } 221 | 222 | // Process the basic stats 223 | { 224 | let mut all_stats = Vec::::new(); 225 | 226 | for basic_stats in basic_stats_acc { 227 | all_stats.extend(basic_stats); 228 | } 229 | 230 | let (avg_duration, avg_message_stats) = process_stats(all_stats); 231 | 232 | // Print the stats. 233 | println!("Avg Basic Duration: {}", avg_duration); 234 | println!("Avg Basic Statistics: {}", format_message_stats(&avg_message_stats)); 235 | } 236 | 237 | // Process the parallel stats 238 | { 239 | let mut all_stats = Vec::::new(); 240 | let mut all_reconfig_stats = Vec::::new(); 241 | let mut all_sharding_stats = Vec::::new(); 242 | 243 | for parallel_stats in parallel_stats_acc { 244 | all_stats.extend(parallel_stats.all_stats); 245 | all_reconfig_stats.extend(parallel_stats.all_reconfig_stats); 246 | all_sharding_stats.extend(parallel_stats.all_sharding_stats); 247 | } 248 | 249 | let (avg_duration, avg_message_stats) = process_stats(all_stats); 250 | let (avg_reconfig_duration, avg_reconfig_message_stats) = process_stats(all_reconfig_stats); 251 | let (avg_sharding_duration, avg_sharding_message_stats) = process_stats(all_sharding_stats); 252 | 253 | // Print the stats. 254 | println!("Avg Duration: {}", avg_duration); 255 | println!("Avg Statistics: {}", format_message_stats(&avg_message_stats)); 256 | println!("Avg Reconfig Duration: {}", avg_reconfig_duration); 257 | println!("Avg Reconfig Statistics: {}", format_message_stats(&avg_reconfig_message_stats)); 258 | println!("Avg Sharding Duration: {}", avg_sharding_duration); 259 | println!("Avg Sharding Statistics: {}", format_message_stats(&avg_sharding_message_stats)); 260 | } 261 | } 262 | --------------------------------------------------------------------------------